From 9179916d6aca2fac50b2b2026cdac9479ac53974 Mon Sep 17 00:00:00 2001 From: Casey Date: Fri, 8 Jul 2022 12:59:26 -0700 Subject: [PATCH 1/9] export comments --- backend/data_export/celery_tasks.py | 10 +++++---- backend/data_export/models.py | 14 +++++++++++- backend/data_export/pipeline/comments.py | 26 +++++++++++++++++++++++ backend/data_export/pipeline/dataset.py | 9 ++++++-- backend/data_export/pipeline/factories.py | 10 ++++++++- 5 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 backend/data_export/pipeline/comments.py diff --git a/backend/data_export/celery_tasks.py b/backend/data_export/celery_tasks.py index 2e273e7a..5ad93e42 100644 --- a/backend/data_export/celery_tasks.py +++ b/backend/data_export/celery_tasks.py @@ -8,9 +8,9 @@ from django.conf import settings from django.shortcuts import get_object_or_404 from .pipeline.dataset import Dataset -from .pipeline.factories import create_formatter, create_labels, create_writer +from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment from .pipeline.services import ExportApplicationService -from data_export.models import ExportedExample +from data_export.models import ExportedExample, ExportedComment from projects.models import Member, Project logger = get_task_logger(__name__) @@ -23,7 +23,8 @@ def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only: else: examples = ExportedExample.objects.filter(project=project) labels = create_labels(project, examples) - dataset = Dataset(examples, labels, is_text_project) + comments = create_comment(examples) + dataset = Dataset(examples, labels, comments, is_text_project) service = ExportApplicationService(dataset, formatters, writer) @@ -40,7 +41,8 @@ def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bo else: examples = ExportedExample.objects.filter(project=project) labels = create_labels(project, examples, member.user) - dataset = Dataset(examples, labels, is_text_project) + comments = create_comment(examples, member.user) + dataset = Dataset(examples, labels, comments, is_text_project) service = ExportApplicationService(dataset, formatters, writer) diff --git a/backend/data_export/models.py b/backend/data_export/models.py index f7492dfc..af4cd937 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple from django.db import models -from examples.models import Example +from examples.models import Example, Comment from labels.models import Category, Relation, Span, TextLabel from projects.models import Project @@ -79,3 +79,15 @@ class ExportedText(TextLabel): class Meta: proxy = True + + +class ExportedComment(Comment): + def to_string(self) -> str: + return self.text + + def to_dict(self): + return {"id": self.example.id, "comment": self.text} + + class Meta: + proxy = True + diff --git a/backend/data_export/pipeline/comments.py b/backend/data_export/pipeline/comments.py new file mode 100644 index 00000000..4aeb6bab --- /dev/null +++ b/backend/data_export/pipeline/comments.py @@ -0,0 +1,26 @@ +import abc +from collections import defaultdict +from typing import Dict, List, Tuple + +from django.db.models import QuerySet + +from data_export.models import ( + ExportedExample, + ExportedComment, +) + +class Comments(abc.ABC): + comment_class = ExportedComment + column = "Comments" + fields: Tuple[str, ...] = ("example", "user") # To boost performance + + def __init__(self, examples: QuerySet[ExportedExample], user=None): + self.comment_groups = defaultdict(list) + comments = self.comment_class.objects.filter(example__in=examples) + if user: + comments = comments.filter(user=user) + for comment in comments.select_related(*self.fields): + self.comment_groups[comment.example.id].append(comment) + + def find_by(self, example_id: int) -> Dict[str, List[ExportedComment]]: + return {self.column: self.comment_groups[example_id]} diff --git a/backend/data_export/pipeline/dataset.py b/backend/data_export/pipeline/dataset.py index 9d184704..0bee9179 100644 --- a/backend/data_export/pipeline/dataset.py +++ b/backend/data_export/pipeline/dataset.py @@ -4,20 +4,25 @@ import pandas as pd from django.db.models.query import QuerySet from .labels import Labels -from data_export.models import ExportedExample +from .comments import Comments +from data_export.models import ExportedExample, ExportedComment class Dataset: - def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True): + def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], + is_text_project=True): self.examples = examples self.labels = labels self.is_text_project = is_text_project + self.comments = comments def __iter__(self) -> Iterator[Dict[str, Any]]: for example in self.examples: data = example.to_dict(self.is_text_project) for labels in self.labels: data.update(**labels.find_by(example.id)) + for comment in self.comments: + data.update(**comment.find_by(example.id)) yield data def to_dataframe(self) -> pd.DataFrame: diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index 76f3bc7b..12e4afce 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -14,7 +14,8 @@ from .formatters import ( TupledSpanFormatter, ) from .labels import Categories, Labels, Relations, Spans, Texts -from data_export.models import DATA, ExportedExample +from .comments import Comments +from data_export.models import DATA, ExportedExample, ExportedComment from projects.models import ( DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, @@ -51,14 +52,17 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: DOCUMENT_CLASSIFICATION: { CSV.name: [ JoinedCategoryFormatter(Categories.column), + JoinedCategoryFormatter(Comments.column), RenameFormatter(**mapper_text_classification), ], JSON.name: [ ListedCategoryFormatter(Categories.column), + ListedCategoryFormatter(Comments.column), RenameFormatter(**mapper_text_classification), ], JSONL.name: [ ListedCategoryFormatter(Categories.column), + ListedCategoryFormatter(Comments.column), RenameFormatter(**mapper_text_classification), ], FastText.name: [FastTextCategoryFormatter(Categories.column)], @@ -114,3 +118,7 @@ def create_labels(project: Project, examples: QuerySet[ExportedExample], user=No label_collections = select_label_collection(project) labels = [label_collection(examples=examples, user=user) for label_collection in label_collections] return labels + + +def create_comment(examples: QuerySet[ExportedExample], user=None) -> List[Comments]: + return [Comments(examples=examples, user=user)] From 6a445470851a3427eb4bc085f0ef79a15e74ef05 Mon Sep 17 00:00:00 2001 From: Casey Date: Fri, 8 Jul 2022 13:09:32 -0700 Subject: [PATCH 2/9] delete unused to_dict --- backend/data_export/models.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/backend/data_export/models.py b/backend/data_export/models.py index af4cd937..b4248699 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -85,9 +85,6 @@ class ExportedComment(Comment): def to_string(self) -> str: return self.text - def to_dict(self): - return {"id": self.example.id, "comment": self.text} - class Meta: proxy = True From 73b03673635d734a4887d5d054fce216c5348902 Mon Sep 17 00:00:00 2001 From: Casey Date: Fri, 8 Jul 2022 14:09:19 -0700 Subject: [PATCH 3/9] fix fasttext --- backend/data_export/pipeline/formatters.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/data_export/pipeline/formatters.py b/backend/data_export/pipeline/formatters.py index 56b9144e..41efa19d 100644 --- a/backend/data_export/pipeline/formatters.py +++ b/backend/data_export/pipeline/formatters.py @@ -46,12 +46,15 @@ class FastTextCategoryFormatter(Formatter): """Format the label column to `__label__LabelA __label__LabelB` format. Also, drop the columns except for `data` and `self.target_column`. """ - dataset = dataset[[DATA, self.target_column]] + dataset = dataset[[DATA, self.target_column, "Comments"]] dataset[self.target_column] = dataset[self.target_column].apply( lambda labels: " ".join(sorted(f"__label__{label.to_string()}" for label in labels)) ) dataset[self.target_column] = dataset[self.target_column].fillna("") - dataset = dataset[self.target_column] + " " + dataset[DATA] + dataset["Comments"] = dataset["Comments"].apply( + lambda comments: "#".join(comment.to_string() for comment in comments) + ) + dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"] return dataset From 1537d515312819d5f1f3c7c230a0850dca61ba12 Mon Sep 17 00:00:00 2001 From: Hironsan Date: Mon, 11 Jul 2022 10:57:13 +0900 Subject: [PATCH 4/9] Format codes according to flake8, black, and isort --- backend/data_export/celery_tasks.py | 9 +++++++-- backend/data_export/models.py | 3 +-- backend/data_export/pipeline/comments.py | 6 ++---- backend/data_export/pipeline/dataset.py | 9 +++++---- backend/data_export/pipeline/factories.py | 4 ++-- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/backend/data_export/celery_tasks.py b/backend/data_export/celery_tasks.py index 5ad93e42..d5f0b8cc 100644 --- a/backend/data_export/celery_tasks.py +++ b/backend/data_export/celery_tasks.py @@ -8,9 +8,14 @@ from django.conf import settings from django.shortcuts import get_object_or_404 from .pipeline.dataset import Dataset -from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment +from .pipeline.factories import ( + create_comment, + create_formatter, + create_labels, + create_writer, +) from .pipeline.services import ExportApplicationService -from data_export.models import ExportedExample, ExportedComment +from data_export.models import ExportedExample from projects.models import Member, Project logger = get_task_logger(__name__) diff --git a/backend/data_export/models.py b/backend/data_export/models.py index b4248699..ae6dd201 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple from django.db import models -from examples.models import Example, Comment +from examples.models import Comment, Example from labels.models import Category, Relation, Span, TextLabel from projects.models import Project @@ -87,4 +87,3 @@ class ExportedComment(Comment): class Meta: proxy = True - diff --git a/backend/data_export/pipeline/comments.py b/backend/data_export/pipeline/comments.py index 4aeb6bab..1e42428e 100644 --- a/backend/data_export/pipeline/comments.py +++ b/backend/data_export/pipeline/comments.py @@ -4,10 +4,8 @@ from typing import Dict, List, Tuple from django.db.models import QuerySet -from data_export.models import ( - ExportedExample, - ExportedComment, -) +from data_export.models import ExportedComment, ExportedExample + class Comments(abc.ABC): comment_class = ExportedComment diff --git a/backend/data_export/pipeline/dataset.py b/backend/data_export/pipeline/dataset.py index 0bee9179..c2c85c84 100644 --- a/backend/data_export/pipeline/dataset.py +++ b/backend/data_export/pipeline/dataset.py @@ -3,14 +3,15 @@ from typing import Any, Dict, Iterator, List import pandas as pd from django.db.models.query import QuerySet -from .labels import Labels from .comments import Comments -from data_export.models import ExportedExample, ExportedComment +from .labels import Labels +from data_export.models import ExportedExample class Dataset: - def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], - is_text_project=True): + def __init__( + self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], is_text_project=True + ): self.examples = examples self.labels = labels self.is_text_project = is_text_project diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index 12e4afce..be139819 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -4,6 +4,7 @@ from django.db.models import QuerySet from . import writers from .catalog import CSV, JSON, JSONL, FastText +from .comments import Comments from .formatters import ( DictFormatter, FastTextCategoryFormatter, @@ -14,8 +15,7 @@ from .formatters import ( TupledSpanFormatter, ) from .labels import Categories, Labels, Relations, Spans, Texts -from .comments import Comments -from data_export.models import DATA, ExportedExample, ExportedComment +from data_export.models import DATA, ExportedExample from projects.models import ( DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, From 7d34acf66a20d9394597dbd9035efd0fd5b24355 Mon Sep 17 00:00:00 2001 From: Casey Date: Wed, 13 Jul 2022 15:27:16 -0700 Subject: [PATCH 5/9] update test cases --- backend/data_export/tests/test_dataset.py | 6 +- backend/data_export/tests/test_formatters.py | 4 +- backend/data_export/tests/test_task.py | 121 ++++++++++++------- 3 files changed, 85 insertions(+), 46 deletions(-) diff --git a/backend/data_export/tests/test_dataset.py b/backend/data_export/tests/test_dataset.py index f7029ebf..df9eea57 100644 --- a/backend/data_export/tests/test_dataset.py +++ b/backend/data_export/tests/test_dataset.py @@ -17,9 +17,13 @@ class TestDataset(unittest.TestCase): label.find_by.return_value = {"labels": ["label"]} self.labels = MagicMock() self.labels.__iter__.return_value = [label] + comment = MagicMock() + comment.find_by.return_value = {"Comments": ["comment"]} + self.comments = MagicMock() + self.comments.__iter__.return_value = [comment] def test_to_dataframe(self): dataset = Dataset(self.examples, self.labels) df = dataset.to_dataframe() - expected = pd.DataFrame([{"data": "example", "labels": ["label"]}]) + expected = pd.DataFrame([{"data": "example", "labels": ["label"], "Comments": ["comment"]}]) assert_frame_equal(df, expected) diff --git a/backend/data_export/tests/test_formatters.py b/backend/data_export/tests/test_formatters.py index cb371b3a..09deb97b 100644 --- a/backend/data_export/tests/test_formatters.py +++ b/backend/data_export/tests/test_formatters.py @@ -78,12 +78,12 @@ class TestFastTextFormatter(unittest.TestCase): self.return_value = "Label" label = MagicMock() label.to_string.return_value = self.return_value - self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example"}]) + self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": "comment"}]) def test_format(self): formatter = FastTextCategoryFormatter(TARGET_COLUMN) dataset = formatter.format(self.dataset) - expected_dataset = pd.DataFrame([f"__label__{self.return_value} example"]) + expected_dataset = pd.DataFrame([f"__label__{self.return_value} example comment"]) self.assertEqual(dataset.to_csv(index=False, header=None), expected_dataset.to_csv(index=False, header=None)) diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py index d1d11976..a60e2d78 100644 --- a/backend/data_export/tests/test_task.py +++ b/backend/data_export/tests/test_task.py @@ -61,6 +61,8 @@ class TestExportCategory(TestExport): self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2") self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -70,16 +72,16 @@ class TestExportCategory(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.category1.to_string()]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}, + {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "label": []}, - {**self.data2, "label": []}, + {**self.data1, "label": [], "Comments": []}, + {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.category2.to_string()]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.category2.to_string()], "Comments": self.comment2}, + {**self.data2, "label": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -92,6 +94,7 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": [self.comment1, self.comment2] }, {**self.data2, "label": []}, ] @@ -100,7 +103,7 @@ class TestExportCategory(TestExport): def test_confirmed_and_non_collaborative(self): self.prepare_data() datasets = self.export_dataset(confirmed_only=True) - expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]} + expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}]} for username, dataset in expected_datasets.items(): self.assertEqual(datasets[username], dataset) @@ -111,6 +114,7 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": [self.comment1, self.comment2] } ] self.assertEqual(dataset, expected_dataset) @@ -123,6 +127,8 @@ class TestExportSeq2seq(TestExport): self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin) self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -132,16 +138,16 @@ class TestExportSeq2seq(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "label": []}, - {**self.data2, "label": []}, + {**self.data1, "label": [], "Comments": []}, + {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.text2.text]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.text2.text], "Comments": self.comment2}, + {**self.data2, "label": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -154,6 +160,7 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), + "Comments": [self.comment1, self.comment2] }, {**self.data2, "label": []}, ] @@ -164,7 +171,7 @@ class TestExportSeq2seq(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text]}, + {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, ], self.project.approver.username: [], self.project.annotator.username: [], @@ -179,6 +186,7 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), + "Comments": [self.comment1, self.comment2] } ] self.assertEqual(dataset, expected_dataset) @@ -191,6 +199,8 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) self.span = mommy.make( "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1 ) @@ -207,20 +217,22 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], + "Comments": self.comment1 }, - {**self.data2, "entities": [], "cats": []}, + {**self.data2, "entities": [], "cats": [], "Comments": []}, ], self.project.annotator.username: [ { **self.data1, "entities": [], "cats": [self.category2.to_string()], + "Comments": self.comment2 }, - {**self.data2, "entities": [], "cats": []}, + {**self.data2, "entities": [], "cats": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "entities": [], "cats": []}, - {**self.data2, "entities": [], "cats": []}, + {**self.data1, "entities": [], "cats": [], "Comments": []}, + {**self.data2, "entities": [], "cats": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -234,8 +246,9 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": self.comment1 }, - {**self.data2, "entities": [], "cats": []}, + {**self.data2, "entities": [], "cats": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -248,6 +261,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], + "Comments": self.comment1 }, ], self.project.annotator.username: [], @@ -264,6 +278,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": [self.comment1, self.comment2] }, ] self.assertEqual(dataset, expected_dataset) @@ -281,6 +296,8 @@ class TestExportSequenceLabeling(TestExport): ) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -289,16 +306,16 @@ class TestExportSequenceLabeling(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [list(self.span1.to_tuple())]}, - {**self.data2, "label": []}, + {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1}, + {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [list(self.span2.to_tuple())]}, - {**self.data2, "label": []}, + {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": self.comment2}, + {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "label": []}, - {**self.data2, "label": []}, + {**self.data1, "label": [], "Comments": []}, + {**self.data2, "label": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -311,6 +328,7 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], + "Comments": [self.comment1, self.comment2] }, {**self.data2, "label": []}, ] @@ -321,7 +339,7 @@ class TestExportSequenceLabeling(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [list(self.span1.to_tuple())]}, + {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1}, ], self.project.annotator.username: [], self.project.approver.username: [], @@ -336,6 +354,7 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], + "Comments": [self.comment1, self.comment2] }, ] self.assertEqual(dataset, expected_dataset) @@ -348,6 +367,8 @@ class TestExportSpeechToText(TestExport): self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin) self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_filename(self.example1) self.data2 = self.data_to_filename(self.example2) @@ -357,16 +378,16 @@ class TestExportSpeechToText(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "label": []}, - {**self.data2, "label": []}, + {**self.data1, "label": [], "Comments": []}, + {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.text2.text]}, - {**self.data2, "label": []}, + {**self.data1, "label": [self.text2.text], "Comments": self.comment2}, + {**self.data2, "label": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -379,6 +400,7 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), + "Comments": [self.comment1, self.comment2] }, {**self.data2, "label": []}, ] @@ -389,7 +411,7 @@ class TestExportSpeechToText(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text]}, + {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, ], self.project.annotator.username: [], self.project.approver.username: [], @@ -404,6 +426,7 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), + "Comments": [self.comment1, self.comment2] } ] self.assertEqual(dataset, expected_dataset) @@ -416,6 +439,8 @@ class TestExportImageClassification(TestExport): self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_filename(self.example1) self.data2 = self.data_to_filename(self.example2) @@ -428,19 +453,21 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": [self.category1.to_string()], + "Comments": self.comment1 }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": self.comment2}, ], self.project.approver.username: [ - {**self.data1, "label": []}, - {**self.data2, "label": []}, + {**self.data1, "label": [], "Comments": []}, + {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ { **self.data1, "label": [self.category2.to_string()], + "Comments": self.comment2 }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -453,8 +480,9 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": [self.comment1, self.comment2] }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -494,6 +522,8 @@ class TestExportRelation(TestExport): self.relation = mommy.make( "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin ) + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -507,20 +537,22 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], + "Comments": self.comment1 }, - {**self.data2, "entities": [], "relations": []}, + {**self.data2, "entities": [], "relations": [], "Comments": []}, ], self.project.annotator.username: [ { **self.data1, "entities": [self.span3.to_dict()], "relations": [], + "Comments": self.comment2 }, - {**self.data2, "entities": [], "relations": []}, + {**self.data2, "entities": [], "relations": [], "Comments": self.comment2}, ], self.project.approver.username: [ - {**self.data1, "entities": [], "relations": []}, - {**self.data2, "entities": [], "relations": []}, + {**self.data1, "entities": [], "relations": [], "Comments": []}, + {**self.data2, "entities": [], "relations": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -534,8 +566,9 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], + "Comments": [self.comment1, self.comment2] }, - {**self.data2, "entities": [], "relations": []}, + {**self.data2, "entities": [], "relations": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -548,6 +581,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], + "Comments": self.comment1 }, ], self.project.annotator.username: [], @@ -564,6 +598,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], + "Comments": [self.comment1, self.comment2] } ] self.assertEqual(dataset, expected_dataset) From c236f81ba60e2f37f115050d9bb813c60c72a496 Mon Sep 17 00:00:00 2001 From: Casey Date: Fri, 15 Jul 2022 16:11:57 -0700 Subject: [PATCH 6/9] update test cases --- backend/data_export/models.py | 3 + backend/data_export/pipeline/factories.py | 33 +++++- backend/data_export/pipeline/formatters.py | 2 +- backend/data_export/tests/test_dataset.py | 6 +- backend/data_export/tests/test_formatters.py | 13 ++- backend/data_export/tests/test_task.py | 103 ++++++++++--------- 6 files changed, 100 insertions(+), 60 deletions(-) diff --git a/backend/data_export/models.py b/backend/data_export/models.py index ae6dd201..197bb6be 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -85,5 +85,8 @@ class ExportedComment(Comment): def to_string(self) -> str: return self.text + def to_dict(self): + return {"id": self.id, "comment": self.text} + class Meta: proxy = True diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index be139819..2b2e2e96 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -71,29 +71,52 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: JSONL.name: [ DictFormatter(Spans.column), DictFormatter(Relations.column), + DictFormatter(Comments.column), RenameFormatter(**mapper_relation_extraction), ] if use_relation - else [TupledSpanFormatter(Spans.column), RenameFormatter(**mapper_sequence_labeling)] + else [ + TupledSpanFormatter(Spans.column), + ListedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_sequence_labeling) + ] }, SEQ2SEQ: { - CSV.name: [JoinedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)], - JSON.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)], - JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)], + CSV.name: [ + JoinedCategoryFormatter(Texts.column), + JoinedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_seq2seq) + ], + JSON.name: [ + ListedCategoryFormatter(Texts.column), + ListedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_seq2seq) + ], + JSONL.name: [ + ListedCategoryFormatter(Texts.column), + ListedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_seq2seq) + ], }, IMAGE_CLASSIFICATION: { JSONL.name: [ ListedCategoryFormatter(Categories.column), + ListedCategoryFormatter(Comments.column), RenameFormatter(**mapper_image_classification), ], }, SPEECH2TEXT: { - JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_speech2text)], + JSONL.name: [ + ListedCategoryFormatter(Texts.column), + ListedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_speech2text) + ], }, INTENT_DETECTION_AND_SLOT_FILLING: { JSONL.name: [ ListedCategoryFormatter(Categories.column), TupledSpanFormatter(Spans.column), + ListedCategoryFormatter(Comments.column), RenameFormatter(**mapper_intent_detection), ] }, diff --git a/backend/data_export/pipeline/formatters.py b/backend/data_export/pipeline/formatters.py index 41efa19d..1ce74270 100644 --- a/backend/data_export/pipeline/formatters.py +++ b/backend/data_export/pipeline/formatters.py @@ -52,7 +52,7 @@ class FastTextCategoryFormatter(Formatter): ) dataset[self.target_column] = dataset[self.target_column].fillna("") dataset["Comments"] = dataset["Comments"].apply( - lambda comments: "#".join(comment.to_string() for comment in comments) + lambda comments: " ".join(f"__comment__{comment.to_string()}" for comment in comments) ) dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"] return dataset diff --git a/backend/data_export/tests/test_dataset.py b/backend/data_export/tests/test_dataset.py index df9eea57..9ec57a37 100644 --- a/backend/data_export/tests/test_dataset.py +++ b/backend/data_export/tests/test_dataset.py @@ -18,12 +18,12 @@ class TestDataset(unittest.TestCase): self.labels = MagicMock() self.labels.__iter__.return_value = [label] comment = MagicMock() - comment.find_by.return_value = {"Comments": ["comment"]} + comment.find_by.return_value = {"comments": ["comment"]} self.comments = MagicMock() self.comments.__iter__.return_value = [comment] def test_to_dataframe(self): - dataset = Dataset(self.examples, self.labels) + dataset = Dataset(self.examples, self.labels, self.comments) df = dataset.to_dataframe() - expected = pd.DataFrame([{"data": "example", "labels": ["label"], "Comments": ["comment"]}]) + expected = pd.DataFrame([{"data": "example", "labels": ["label"], "comments": ["comment"]}]) assert_frame_equal(df, expected) diff --git a/backend/data_export/tests/test_formatters.py b/backend/data_export/tests/test_formatters.py index 09deb97b..c1c31661 100644 --- a/backend/data_export/tests/test_formatters.py +++ b/backend/data_export/tests/test_formatters.py @@ -75,15 +75,20 @@ class TestTupledSpanFormatter(unittest.TestCase): class TestFastTextFormatter(unittest.TestCase): def setUp(self): - self.return_value = "Label" + self.return_value_label = "Label" + self.return_value_comment = "Comment" label = MagicMock() - label.to_string.return_value = self.return_value - self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": "comment"}]) + comment = MagicMock() + label.to_string.return_value = self.return_value_label + comment.to_string.return_value = self.return_value_comment + self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": [comment]}]) def test_format(self): formatter = FastTextCategoryFormatter(TARGET_COLUMN) dataset = formatter.format(self.dataset) - expected_dataset = pd.DataFrame([f"__label__{self.return_value} example comment"]) + expected_dataset = pd.DataFrame( + [f"__label__{self.return_value_label} example __comment__{self.return_value_comment}"] + ) self.assertEqual(dataset.to_csv(index=False, header=None), expected_dataset.to_csv(index=False, header=None)) diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py index a60e2d78..458f0317 100644 --- a/backend/data_export/tests/test_task.py +++ b/backend/data_export/tests/test_task.py @@ -62,7 +62,7 @@ class TestExportCategory(TestExport): self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -72,7 +72,7 @@ class TestExportCategory(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}, + {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ @@ -80,7 +80,7 @@ class TestExportCategory(TestExport): {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.category2.to_string()], "Comments": self.comment2}, + {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], } @@ -94,16 +94,20 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) def test_confirmed_and_non_collaborative(self): self.prepare_data() datasets = self.export_dataset(confirmed_only=True) - expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}]} + expected_datasets = { + self.project.admin.username: [ + {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]} + ] + } for username, dataset in expected_datasets.items(): self.assertEqual(datasets[username], dataset) @@ -114,7 +118,7 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) } ] self.assertEqual(dataset, expected_dataset) @@ -128,7 +132,7 @@ class TestExportSeq2seq(TestExport): self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin) self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -138,7 +142,7 @@ class TestExportSeq2seq(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ @@ -146,7 +150,7 @@ class TestExportSeq2seq(TestExport): {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.text2.text], "Comments": self.comment2}, + {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], } @@ -160,9 +164,9 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -171,7 +175,7 @@ class TestExportSeq2seq(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]}, ], self.project.approver.username: [], self.project.annotator.username: [], @@ -186,7 +190,7 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) } ] self.assertEqual(dataset, expected_dataset) @@ -200,7 +204,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) self.span = mommy.make( "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1 ) @@ -217,7 +221,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], - "Comments": self.comment1 + "Comments": [self.comment1.to_string()] }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ], @@ -226,7 +230,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [], "cats": [self.category2.to_string()], - "Comments": self.comment2 + "Comments": [self.comment2.to_string()] }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ], @@ -246,7 +250,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": self.comment1 + "Comments": [self.comment1.to_string(), self.comment2.to_string()] }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ] @@ -261,7 +265,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], - "Comments": self.comment1 + "Comments": [self.comment1.to_string()] }, ], self.project.annotator.username: [], @@ -278,7 +282,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, ] self.assertEqual(dataset, expected_dataset) @@ -297,7 +301,7 @@ class TestExportSequenceLabeling(TestExport): mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -306,11 +310,11 @@ class TestExportSequenceLabeling(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1}, + {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": self.comment2}, + {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": [self.comment2.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ @@ -328,9 +332,9 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -339,7 +343,7 @@ class TestExportSequenceLabeling(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1}, + {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]}, ], self.project.annotator.username: [], self.project.approver.username: [], @@ -354,7 +358,7 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, ] self.assertEqual(dataset, expected_dataset) @@ -368,7 +372,7 @@ class TestExportSpeechToText(TestExport): self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin) self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_filename(self.example1) self.data2 = self.data_to_filename(self.example2) @@ -378,7 +382,7 @@ class TestExportSpeechToText(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ @@ -386,7 +390,7 @@ class TestExportSpeechToText(TestExport): {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, "label": [self.text2.text], "Comments": self.comment2}, + {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], } @@ -400,9 +404,9 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, - {**self.data2, "label": []}, + {**self.data2, "label": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -411,7 +415,7 @@ class TestExportSpeechToText(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, "label": [self.text1.text], "Comments": self.comment1}, + {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]}, ], self.project.annotator.username: [], self.project.approver.username: [], @@ -426,7 +430,7 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) } ] self.assertEqual(dataset, expected_dataset) @@ -440,7 +444,7 @@ class TestExportImageClassification(TestExport): self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin) self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_filename(self.example1) self.data2 = self.data_to_filename(self.example2) @@ -453,9 +457,9 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": [self.category1.to_string()], - "Comments": self.comment1 + "Comments": [self.comment1.to_string()] }, - {**self.data2, "label": [], "Comments": self.comment2}, + {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ {**self.data1, "label": [], "Comments": []}, @@ -465,7 +469,7 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": [self.category2.to_string()], - "Comments": self.comment2 + "Comments": [self.comment2.to_string()] }, {**self.data2, "label": [], "Comments": []}, ], @@ -480,7 +484,7 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1, self.comment2] + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) }, {**self.data2, "label": [], "Comments": []}, ] @@ -489,7 +493,11 @@ class TestExportImageClassification(TestExport): def test_confirmed_and_non_collaborative(self): self.prepare_data() datasets = self.export_dataset(confirmed_only=True) - expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]} + expected_datasets = { + self.project.admin.username: [ + {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]} + ] + } for username, dataset in expected_datasets.items(): self.assertEqual(datasets[username], dataset) @@ -500,6 +508,7 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) } ] self.assertEqual(dataset, expected_dataset) @@ -523,7 +532,7 @@ class TestExportRelation(TestExport): "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin ) self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) - self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) self.data1 = self.data_to_text(self.example1) self.data2 = self.data_to_text(self.example2) @@ -537,7 +546,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], - "Comments": self.comment1 + "Comments": [self.comment1.to_dict()] }, {**self.data2, "entities": [], "relations": [], "Comments": []}, ], @@ -546,9 +555,9 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span3.to_dict()], "relations": [], - "Comments": self.comment2 + "Comments": [self.comment2.to_dict()] }, - {**self.data2, "entities": [], "relations": [], "Comments": self.comment2}, + {**self.data2, "entities": [], "relations": [], "Comments": []}, ], self.project.approver.username: [ {**self.data1, "entities": [], "relations": [], "Comments": []}, @@ -566,7 +575,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1, self.comment2] + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()] }, {**self.data2, "entities": [], "relations": [], "Comments": []}, ] @@ -581,7 +590,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], - "Comments": self.comment1 + "Comments": [self.comment1.to_dict()] }, ], self.project.annotator.username: [], @@ -598,7 +607,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1, self.comment2] + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()] } ] self.assertEqual(dataset, expected_dataset) From add5d4c71a722c1a47fb5e50c6a83109a6430ffa Mon Sep 17 00:00:00 2001 From: Hironsan Date: Mon, 25 Jul 2022 14:03:48 +0900 Subject: [PATCH 7/9] Apply black to fix codes --- backend/data_export/pipeline/factories.py | 10 ++--- backend/data_export/tests/test_task.py | 52 ++++++++++------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index f895df4a..614c01df 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -90,24 +90,24 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: else [ TupledSpanFormatter(Spans.column), ListedCategoryFormatter(Comments.column), - RenameFormatter(**mapper_sequence_labeling) + RenameFormatter(**mapper_sequence_labeling), ] }, SEQ2SEQ: { CSV.name: [ JoinedCategoryFormatter(Texts.column), JoinedCategoryFormatter(Comments.column), - RenameFormatter(**mapper_seq2seq) + RenameFormatter(**mapper_seq2seq), ], JSON.name: [ ListedCategoryFormatter(Texts.column), ListedCategoryFormatter(Comments.column), - RenameFormatter(**mapper_seq2seq) + RenameFormatter(**mapper_seq2seq), ], JSONL.name: [ ListedCategoryFormatter(Texts.column), ListedCategoryFormatter(Comments.column), - RenameFormatter(**mapper_seq2seq) + RenameFormatter(**mapper_seq2seq), ], }, IMAGE_CLASSIFICATION: { @@ -121,7 +121,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: JSONL.name: [ ListedCategoryFormatter(Texts.column), ListedCategoryFormatter(Comments.column), - RenameFormatter(**mapper_speech2text) + RenameFormatter(**mapper_speech2text), ], }, INTENT_DETECTION_AND_SLOT_FILLING: { diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py index cf9df979..8e25dbc2 100644 --- a/backend/data_export/tests/test_task.py +++ b/backend/data_export/tests/test_task.py @@ -97,7 +97,7 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "label": [], "Comments": []}, ] @@ -121,7 +121,7 @@ class TestExportCategory(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), } ] self.assertEqual(dataset, expected_dataset) @@ -167,7 +167,7 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "label": [], "Comments": []}, ] @@ -193,7 +193,7 @@ class TestExportSeq2seq(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), } ] self.assertEqual(dataset, expected_dataset) @@ -224,7 +224,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], - "Comments": [self.comment1.to_string()] + "Comments": [self.comment1.to_string()], }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ], @@ -233,7 +233,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [], "cats": [self.category2.to_string()], - "Comments": [self.comment2.to_string()] + "Comments": [self.comment2.to_string()], }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ], @@ -253,7 +253,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1.to_string(), self.comment2.to_string()] + "Comments": [self.comment1.to_string(), self.comment2.to_string()], }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ] @@ -268,7 +268,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": [self.category1.to_string()], - "Comments": [self.comment1.to_string()] + "Comments": [self.comment1.to_string()], }, ], self.project.annotator.username: [], @@ -285,7 +285,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, ] self.assertEqual(dataset, expected_dataset) @@ -335,7 +335,7 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "label": [], "Comments": []}, ] @@ -361,7 +361,7 @@ class TestExportSequenceLabeling(TestExport): { **self.data1, "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())], - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, ] self.assertEqual(dataset, expected_dataset) @@ -407,7 +407,7 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "label": [], "Comments": []}, ] @@ -433,7 +433,7 @@ class TestExportSpeechToText(TestExport): { **self.data1, "label": sorted([self.text1.text, self.text2.text]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), } ] self.assertEqual(dataset, expected_dataset) @@ -457,11 +457,7 @@ class TestExportImageClassification(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - { - **self.data1, - "label": [self.category1.to_string()], - "Comments": [self.comment1.to_string()] - }, + {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], self.project.approver.username: [ @@ -469,11 +465,7 @@ class TestExportImageClassification(TestExport): {**self.data2, "label": [], "Comments": []}, ], self.project.annotator.username: [ - { - **self.data1, - "label": [self.category2.to_string()], - "Comments": [self.comment2.to_string()] - }, + {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]}, {**self.data2, "label": [], "Comments": []}, ], } @@ -487,7 +479,7 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "label": [], "Comments": []}, ] @@ -511,7 +503,7 @@ class TestExportImageClassification(TestExport): { **self.data1, "label": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]) + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), } ] self.assertEqual(dataset, expected_dataset) @@ -761,7 +753,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1.to_dict()] + "Comments": [self.comment1.to_dict()], }, {**self.data2, "entities": [], "relations": [], "Comments": []}, ], @@ -770,7 +762,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span3.to_dict()], "relations": [], - "Comments": [self.comment2.to_dict()] + "Comments": [self.comment2.to_dict()], }, {**self.data2, "entities": [], "relations": [], "Comments": []}, ], @@ -790,7 +782,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1.to_dict(), self.comment2.to_dict()] + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], }, {**self.data2, "entities": [], "relations": [], "Comments": []}, ] @@ -805,7 +797,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1.to_dict()] + "Comments": [self.comment1.to_dict()], }, ], self.project.annotator.username: [], @@ -822,7 +814,7 @@ class TestExportRelation(TestExport): **self.data1, "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()], "relations": [self.relation.to_dict()], - "Comments": [self.comment1.to_dict(), self.comment2.to_dict()] + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], } ] self.assertEqual(dataset, expected_dataset) From b795bb358a307ce0eefadf4d8267586a859c9e0c Mon Sep 17 00:00:00 2001 From: Hironsan Date: Mon, 25 Jul 2022 14:11:13 +0900 Subject: [PATCH 8/9] Add a migration file for exported comment --- .../migrations/0004_exportedcomment.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 backend/data_export/migrations/0004_exportedcomment.py diff --git a/backend/data_export/migrations/0004_exportedcomment.py b/backend/data_export/migrations/0004_exportedcomment.py new file mode 100644 index 00000000..04201d27 --- /dev/null +++ b/backend/data_export/migrations/0004_exportedcomment.py @@ -0,0 +1,24 @@ +# Generated by Django 4.0.4 on 2022-07-25 05:07 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("examples", "0006_alter_example_upload_name"), + ("data_export", "0003_exportedsegmentation"), + ] + + operations = [ + migrations.CreateModel( + name="ExportedComment", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("examples.comment",), + ), + ] From ac7dc796fa16e962195d002f3320e006cd26f397 Mon Sep 17 00:00:00 2001 From: Casey Date: Mon, 25 Jul 2022 16:33:27 -0700 Subject: [PATCH 9/9] update test cases for new methods --- backend/data_export/pipeline/factories.py | 22 ++++++- backend/data_export/tests/test_task.py | 78 +++++++++++++---------- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index 614c01df..65b226d8 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -132,10 +132,26 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: RenameFormatter(**mapper_intent_detection), ] }, - BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]}, - SEGMENTATION: {JSONL.name: [DictFormatter(Segments.column), RenameFormatter(**mapper_segmentation)]}, + BOUNDING_BOX: { + JSONL.name: [ + DictFormatter(BoundingBoxes.column), + DictFormatter(Comments.column), + RenameFormatter(**mapper_bounding_box), + ] + }, + SEGMENTATION: { + JSONL.name: [ + DictFormatter(Segments.column), + DictFormatter(Comments.column), + RenameFormatter(**mapper_segmentation), + ] + }, IMAGE_CAPTIONING: { - JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_image_captioning)] + JSONL.name: [ + ListedCategoryFormatter(Texts.column), + ListedCategoryFormatter(Comments.column), + RenameFormatter(**mapper_image_captioning), + ] }, } return mapping[project.project_type][file_format] diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py index 8e25dbc2..31b65687 100644 --- a/backend/data_export/tests/test_task.py +++ b/backend/data_export/tests/test_task.py @@ -253,7 +253,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport): **self.data1, "entities": [list(self.span.to_tuple())], "cats": sorted([self.category1.to_string(), self.category2.to_string()]), - "Comments": [self.comment1.to_string(), self.comment2.to_string()], + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, {**self.data2, "entities": [], "cats": [], "Comments": []}, ] @@ -514,6 +514,8 @@ class TestExportBoundingBox(TestExport): self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative) self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed") self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) self.bbox1 = mommy.make( "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10 ) @@ -532,19 +534,17 @@ class TestExportBoundingBox(TestExport): { **self.data1, "bbox": [self.bbox1.to_dict()], + "Comments": [self.comment1.to_dict()], }, - {**self.data2, "bbox": []}, + {**self.data2, "bbox": [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, "bbox": []}, - {**self.data2, "bbox": []}, + {**self.data1, "bbox": [], "Comments": []}, + {**self.data2, "bbox": [], "Comments": []}, ], self.project.annotator.username: [ - { - **self.data1, - "bbox": [self.bbox2.to_dict()], - }, - {**self.data2, "bbox": []}, + {**self.data1, "bbox": [self.bbox2.to_dict()], "Comments": [self.comment2.to_dict()]}, + {**self.data2, "bbox": [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -557,15 +557,20 @@ class TestExportBoundingBox(TestExport): { **self.data1, "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()], + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], }, - {**self.data2, "bbox": []}, + {**self.data2, "bbox": [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) def test_confirmed_and_non_collaborative(self): self.prepare_data() datasets = self.export_dataset(confirmed_only=True) - expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]} + expected_datasets = { + self.project.admin.username: [ + {**self.data1, "bbox": [self.bbox1.to_dict()], "Comments": [self.comment1.to_dict()]} + ] + } for username, dataset in expected_datasets.items(): self.assertEqual(datasets[username], dataset) @@ -576,6 +581,7 @@ class TestExportBoundingBox(TestExport): { **self.data1, "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()], + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], } ] self.assertEqual(dataset, expected_dataset) @@ -586,6 +592,8 @@ class TestExportSegmentation(TestExport): self.project = prepare_project(SEGMENTATION, collaborative_annotation=collaborative) self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed") self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1]) self.seg2 = mommy.make( "ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2] @@ -600,22 +608,16 @@ class TestExportSegmentation(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - { - **self.data1, - self.column: [self.seg1.to_dict()], - }, - {**self.data2, self.column: []}, + {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]}, + {**self.data2, self.column: [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, self.column: []}, - {**self.data2, self.column: []}, + {**self.data1, self.column: [], "Comments": []}, + {**self.data2, self.column: [], "Comments": []}, ], self.project.annotator.username: [ - { - **self.data1, - self.column: [self.seg2.to_dict()], - }, - {**self.data2, self.column: []}, + {**self.data1, self.column: [self.seg2.to_dict()], "Comments": [self.comment2.to_dict()]}, + {**self.data2, self.column: [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -628,15 +630,20 @@ class TestExportSegmentation(TestExport): { **self.data1, self.column: [self.seg1.to_dict(), self.seg2.to_dict()], + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], }, - {**self.data2, self.column: []}, + {**self.data2, self.column: [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) def test_confirmed_and_non_collaborative(self): self.prepare_data() datasets = self.export_dataset(confirmed_only=True) - expected_datasets = {self.project.admin.username: [{**self.data1, self.column: [self.seg1.to_dict()]}]} + expected_datasets = { + self.project.admin.username: [ + {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]} + ] + } for username, dataset in expected_datasets.items(): self.assertEqual(datasets[username], dataset) @@ -647,6 +654,7 @@ class TestExportSegmentation(TestExport): { **self.data1, self.column: [self.seg1.to_dict(), self.seg2.to_dict()], + "Comments": [self.comment1.to_dict(), self.comment2.to_dict()], } ] self.assertEqual(dataset, expected_dataset) @@ -657,6 +665,8 @@ class TestExportImageCaptioning(TestExport): self.project = prepare_project(IMAGE_CAPTIONING, collaborative_annotation=collaborative) self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed") self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") + self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin) + self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator) self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin) self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator) mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) @@ -669,16 +679,16 @@ class TestExportImageCaptioning(TestExport): datasets = self.export_dataset() expected_datasets = { self.project.admin.username: [ - {**self.data1, self.column: [self.text1.text]}, - {**self.data2, self.column: []}, + {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]}, + {**self.data2, self.column: [], "Comments": []}, ], self.project.approver.username: [ - {**self.data1, self.column: []}, - {**self.data2, self.column: []}, + {**self.data1, self.column: [], "Comments": []}, + {**self.data2, self.column: [], "Comments": []}, ], self.project.annotator.username: [ - {**self.data1, self.column: [self.text2.text]}, - {**self.data2, self.column: []}, + {**self.data1, self.column: [self.text2.text], "Comments": [self.comment2.to_string()]}, + {**self.data2, self.column: [], "Comments": []}, ], } for username, dataset in expected_datasets.items(): @@ -691,8 +701,9 @@ class TestExportImageCaptioning(TestExport): { **self.data1, self.column: sorted([self.text1.text, self.text2.text]), + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), }, - {**self.data2, self.column: []}, + {**self.data2, self.column: [], "Comments": []}, ] self.assertEqual(dataset, expected_dataset) @@ -701,7 +712,7 @@ class TestExportImageCaptioning(TestExport): datasets = self.export_dataset(confirmed_only=True) expected_datasets = { self.project.admin.username: [ - {**self.data1, self.column: [self.text1.text]}, + {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]}, ], self.project.approver.username: [], self.project.annotator.username: [], @@ -716,6 +727,7 @@ class TestExportImageCaptioning(TestExport): { **self.data1, self.column: sorted([self.text1.text, self.text2.text]), + "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]), } ] self.assertEqual(dataset, expected_dataset)