From 9179916d6aca2fac50b2b2026cdac9479ac53974 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Fri, 8 Jul 2022 12:59:26 -0700
Subject: [PATCH 1/9] export comments

---
 backend/data_export/celery_tasks.py       | 10 +++++----
 backend/data_export/models.py             | 14 +++++++++++-
 backend/data_export/pipeline/comments.py  | 26 +++++++++++++++++++++++
 backend/data_export/pipeline/dataset.py   |  9 ++++++--
 backend/data_export/pipeline/factories.py | 10 ++++++++-
 5 files changed, 61 insertions(+), 8 deletions(-)
 create mode 100644 backend/data_export/pipeline/comments.py

diff --git a/backend/data_export/celery_tasks.py b/backend/data_export/celery_tasks.py
index 2e273e7a..5ad93e42 100644
--- a/backend/data_export/celery_tasks.py
+++ b/backend/data_export/celery_tasks.py
@@ -8,9 +8,9 @@ from django.conf import settings
 from django.shortcuts import get_object_or_404
 
 from .pipeline.dataset import Dataset
-from .pipeline.factories import create_formatter, create_labels, create_writer
+from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment
 from .pipeline.services import ExportApplicationService
-from data_export.models import ExportedExample
+from data_export.models import ExportedExample, ExportedComment
 from projects.models import Member, Project
 
 logger = get_task_logger(__name__)
@@ -23,7 +23,8 @@ def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only:
     else:
         examples = ExportedExample.objects.filter(project=project)
     labels = create_labels(project, examples)
-    dataset = Dataset(examples, labels, is_text_project)
+    comments = create_comment(examples)
+    dataset = Dataset(examples, labels, comments, is_text_project)
 
     service = ExportApplicationService(dataset, formatters, writer)
 
@@ -40,7 +41,8 @@ def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bo
         else:
             examples = ExportedExample.objects.filter(project=project)
         labels = create_labels(project, examples, member.user)
-        dataset = Dataset(examples, labels, is_text_project)
+        comments = create_comment(examples, member.user)
+        dataset = Dataset(examples, labels, comments, is_text_project)
 
         service = ExportApplicationService(dataset, formatters, writer)
 
diff --git a/backend/data_export/models.py b/backend/data_export/models.py
index f7492dfc..af4cd937 100644
--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple
 
 from django.db import models
 
-from examples.models import Example
+from examples.models import Example, Comment
 from labels.models import Category, Relation, Span, TextLabel
 from projects.models import Project
 
@@ -79,3 +79,15 @@ class ExportedText(TextLabel):
 
     class Meta:
         proxy = True
+
+
+class ExportedComment(Comment):
+    def to_string(self) -> str:
+        return self.text
+
+    def to_dict(self):
+        return {"id": self.example.id, "comment": self.text}
+
+    class Meta:
+        proxy = True
+
diff --git a/backend/data_export/pipeline/comments.py b/backend/data_export/pipeline/comments.py
new file mode 100644
index 00000000..4aeb6bab
--- /dev/null
+++ b/backend/data_export/pipeline/comments.py
@@ -0,0 +1,26 @@
+import abc
+from collections import defaultdict
+from typing import Dict, List, Tuple
+
+from django.db.models import QuerySet
+
+from data_export.models import (
+    ExportedExample,
+    ExportedComment,
+)
+
+class Comments(abc.ABC):
+    comment_class = ExportedComment
+    column = "Comments"
+    fields: Tuple[str, ...] = ("example", "user")  # To boost performance
+
+    def __init__(self, examples: QuerySet[ExportedExample], user=None):
+        self.comment_groups = defaultdict(list)
+        comments = self.comment_class.objects.filter(example__in=examples)
+        if user:
+            comments = comments.filter(user=user)
+        for comment in comments.select_related(*self.fields):
+            self.comment_groups[comment.example.id].append(comment)
+
+    def find_by(self, example_id: int) -> Dict[str, List[ExportedComment]]:
+        return {self.column: self.comment_groups[example_id]}
diff --git a/backend/data_export/pipeline/dataset.py b/backend/data_export/pipeline/dataset.py
index 9d184704..0bee9179 100644
--- a/backend/data_export/pipeline/dataset.py
+++ b/backend/data_export/pipeline/dataset.py
@@ -4,20 +4,25 @@ import pandas as pd
 from django.db.models.query import QuerySet
 
 from .labels import Labels
-from data_export.models import ExportedExample
+from .comments import Comments
+from data_export.models import ExportedExample, ExportedComment
 
 
 class Dataset:
-    def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True):
+    def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments],
+                 is_text_project=True):
         self.examples = examples
         self.labels = labels
         self.is_text_project = is_text_project
+        self.comments = comments
 
     def __iter__(self) -> Iterator[Dict[str, Any]]:
         for example in self.examples:
             data = example.to_dict(self.is_text_project)
             for labels in self.labels:
                 data.update(**labels.find_by(example.id))
+            for comment in self.comments:
+                data.update(**comment.find_by(example.id))
             yield data
 
     def to_dataframe(self) -> pd.DataFrame:
diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py
index 76f3bc7b..12e4afce 100644
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@@ -14,7 +14,8 @@ from .formatters import (
     TupledSpanFormatter,
 )
 from .labels import Categories, Labels, Relations, Spans, Texts
-from data_export.models import DATA, ExportedExample
+from .comments import Comments
+from data_export.models import DATA, ExportedExample, ExportedComment
 from projects.models import (
     DOCUMENT_CLASSIFICATION,
     IMAGE_CLASSIFICATION,
@@ -51,14 +52,17 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
         DOCUMENT_CLASSIFICATION: {
             CSV.name: [
                 JoinedCategoryFormatter(Categories.column),
+                JoinedCategoryFormatter(Comments.column),
                 RenameFormatter(**mapper_text_classification),
             ],
             JSON.name: [
                 ListedCategoryFormatter(Categories.column),
+                ListedCategoryFormatter(Comments.column),
                 RenameFormatter(**mapper_text_classification),
             ],
             JSONL.name: [
                 ListedCategoryFormatter(Categories.column),
+                ListedCategoryFormatter(Comments.column),
                 RenameFormatter(**mapper_text_classification),
             ],
             FastText.name: [FastTextCategoryFormatter(Categories.column)],
@@ -114,3 +118,7 @@ def create_labels(project: Project, examples: QuerySet[ExportedExample], user=No
     label_collections = select_label_collection(project)
     labels = [label_collection(examples=examples, user=user) for label_collection in label_collections]
     return labels
+
+
+def create_comment(examples: QuerySet[ExportedExample], user=None) -> List[Comments]:
+    return [Comments(examples=examples, user=user)]

From 6a445470851a3427eb4bc085f0ef79a15e74ef05 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Fri, 8 Jul 2022 13:09:32 -0700
Subject: [PATCH 2/9] delete unused to_dict

---
 backend/data_export/models.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/backend/data_export/models.py b/backend/data_export/models.py
index af4cd937..b4248699 100644
--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@@ -85,9 +85,6 @@ class ExportedComment(Comment):
     def to_string(self) -> str:
         return self.text
 
-    def to_dict(self):
-        return {"id": self.example.id, "comment": self.text}
-
     class Meta:
         proxy = True
 

From 73b03673635d734a4887d5d054fce216c5348902 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Fri, 8 Jul 2022 14:09:19 -0700
Subject: [PATCH 3/9] fix fasttext

---
 backend/data_export/pipeline/formatters.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/backend/data_export/pipeline/formatters.py b/backend/data_export/pipeline/formatters.py
index 56b9144e..41efa19d 100644
--- a/backend/data_export/pipeline/formatters.py
+++ b/backend/data_export/pipeline/formatters.py
@@ -46,12 +46,15 @@ class FastTextCategoryFormatter(Formatter):
         """Format the label column to `__label__LabelA __label__LabelB` format.
         Also, drop the columns except for `data` and `self.target_column`.
         """
-        dataset = dataset[[DATA, self.target_column]]
+        dataset = dataset[[DATA, self.target_column, "Comments"]]
         dataset[self.target_column] = dataset[self.target_column].apply(
             lambda labels: " ".join(sorted(f"__label__{label.to_string()}" for label in labels))
         )
         dataset[self.target_column] = dataset[self.target_column].fillna("")
-        dataset = dataset[self.target_column] + " " + dataset[DATA]
+        dataset["Comments"] = dataset["Comments"].apply(
+            lambda comments: "#".join(comment.to_string() for comment in comments)
+        )
+        dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"]
         return dataset
 
 

From 1537d515312819d5f1f3c7c230a0850dca61ba12 Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Mon, 11 Jul 2022 10:57:13 +0900
Subject: [PATCH 4/9] Format codes according to flake8, black, and isort

---
 backend/data_export/celery_tasks.py       | 9 +++++++--
 backend/data_export/models.py             | 3 +--
 backend/data_export/pipeline/comments.py  | 6 ++----
 backend/data_export/pipeline/dataset.py   | 9 +++++----
 backend/data_export/pipeline/factories.py | 4 ++--
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/backend/data_export/celery_tasks.py b/backend/data_export/celery_tasks.py
index 5ad93e42..d5f0b8cc 100644
--- a/backend/data_export/celery_tasks.py
+++ b/backend/data_export/celery_tasks.py
@@ -8,9 +8,14 @@ from django.conf import settings
 from django.shortcuts import get_object_or_404
 
 from .pipeline.dataset import Dataset
-from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment
+from .pipeline.factories import (
+    create_comment,
+    create_formatter,
+    create_labels,
+    create_writer,
+)
 from .pipeline.services import ExportApplicationService
-from data_export.models import ExportedExample, ExportedComment
+from data_export.models import ExportedExample
 from projects.models import Member, Project
 
 logger = get_task_logger(__name__)
diff --git a/backend/data_export/models.py b/backend/data_export/models.py
index b4248699..ae6dd201 100644
--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple
 
 from django.db import models
 
-from examples.models import Example, Comment
+from examples.models import Comment, Example
 from labels.models import Category, Relation, Span, TextLabel
 from projects.models import Project
 
@@ -87,4 +87,3 @@ class ExportedComment(Comment):
 
     class Meta:
         proxy = True
-
diff --git a/backend/data_export/pipeline/comments.py b/backend/data_export/pipeline/comments.py
index 4aeb6bab..1e42428e 100644
--- a/backend/data_export/pipeline/comments.py
+++ b/backend/data_export/pipeline/comments.py
@@ -4,10 +4,8 @@ from typing import Dict, List, Tuple
 
 from django.db.models import QuerySet
 
-from data_export.models import (
-    ExportedExample,
-    ExportedComment,
-)
+from data_export.models import ExportedComment, ExportedExample
+
 
 class Comments(abc.ABC):
     comment_class = ExportedComment
diff --git a/backend/data_export/pipeline/dataset.py b/backend/data_export/pipeline/dataset.py
index 0bee9179..c2c85c84 100644
--- a/backend/data_export/pipeline/dataset.py
+++ b/backend/data_export/pipeline/dataset.py
@@ -3,14 +3,15 @@ from typing import Any, Dict, Iterator, List
 import pandas as pd
 from django.db.models.query import QuerySet
 
-from .labels import Labels
 from .comments import Comments
-from data_export.models import ExportedExample, ExportedComment
+from .labels import Labels
+from data_export.models import ExportedExample
 
 
 class Dataset:
-    def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments],
-                 is_text_project=True):
+    def __init__(
+        self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], is_text_project=True
+    ):
         self.examples = examples
         self.labels = labels
         self.is_text_project = is_text_project
diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py
index 12e4afce..be139819 100644
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@@ -4,6 +4,7 @@ from django.db.models import QuerySet
 
 from . import writers
 from .catalog import CSV, JSON, JSONL, FastText
+from .comments import Comments
 from .formatters import (
     DictFormatter,
     FastTextCategoryFormatter,
@@ -14,8 +15,7 @@ from .formatters import (
     TupledSpanFormatter,
 )
 from .labels import Categories, Labels, Relations, Spans, Texts
-from .comments import Comments
-from data_export.models import DATA, ExportedExample, ExportedComment
+from data_export.models import DATA, ExportedExample
 from projects.models import (
     DOCUMENT_CLASSIFICATION,
     IMAGE_CLASSIFICATION,

From 7d34acf66a20d9394597dbd9035efd0fd5b24355 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Wed, 13 Jul 2022 15:27:16 -0700
Subject: [PATCH 5/9] update test cases

---
 backend/data_export/tests/test_dataset.py    |   6 +-
 backend/data_export/tests/test_formatters.py |   4 +-
 backend/data_export/tests/test_task.py       | 121 ++++++++++++-------
 3 files changed, 85 insertions(+), 46 deletions(-)

diff --git a/backend/data_export/tests/test_dataset.py b/backend/data_export/tests/test_dataset.py
index f7029ebf..df9eea57 100644
--- a/backend/data_export/tests/test_dataset.py
+++ b/backend/data_export/tests/test_dataset.py
@@ -17,9 +17,13 @@ class TestDataset(unittest.TestCase):
         label.find_by.return_value = {"labels": ["label"]}
         self.labels = MagicMock()
         self.labels.__iter__.return_value = [label]
+        comment = MagicMock()
+        comment.find_by.return_value = {"Comments": ["comment"]}
+        self.comments = MagicMock()
+        self.comments.__iter__.return_value = [comment]
 
     def test_to_dataframe(self):
         dataset = Dataset(self.examples, self.labels)
         df = dataset.to_dataframe()
-        expected = pd.DataFrame([{"data": "example", "labels": ["label"]}])
+        expected = pd.DataFrame([{"data": "example", "labels": ["label"], "Comments": ["comment"]}])
         assert_frame_equal(df, expected)
diff --git a/backend/data_export/tests/test_formatters.py b/backend/data_export/tests/test_formatters.py
index cb371b3a..09deb97b 100644
--- a/backend/data_export/tests/test_formatters.py
+++ b/backend/data_export/tests/test_formatters.py
@@ -78,12 +78,12 @@ class TestFastTextFormatter(unittest.TestCase):
         self.return_value = "Label"
         label = MagicMock()
         label.to_string.return_value = self.return_value
-        self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example"}])
+        self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": "comment"}])
 
     def test_format(self):
         formatter = FastTextCategoryFormatter(TARGET_COLUMN)
         dataset = formatter.format(self.dataset)
-        expected_dataset = pd.DataFrame([f"__label__{self.return_value} example"])
+        expected_dataset = pd.DataFrame([f"__label__{self.return_value} example comment"])
         self.assertEqual(dataset.to_csv(index=False, header=None), expected_dataset.to_csv(index=False, header=None))
 
 
diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py
index d1d11976..a60e2d78 100644
--- a/backend/data_export/tests/test_task.py
+++ b/backend/data_export/tests/test_task.py
@@ -61,6 +61,8 @@ class TestExportCategory(TestExport):
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2")
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -70,16 +72,16 @@ class TestExportCategory(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.category1.to_string()]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "label": []},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [], "Comments": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.category2.to_string()]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.category2.to_string()], "Comments": self.comment2},
+                {**self.data2, "label": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -92,6 +94,7 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": [self.comment1, self.comment2]
             },
             {**self.data2, "label": []},
         ]
@@ -100,7 +103,7 @@ class TestExportCategory(TestExport):
     def test_confirmed_and_non_collaborative(self):
         self.prepare_data()
         datasets = self.export_dataset(confirmed_only=True)
-        expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
+        expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}]}
         for username, dataset in expected_datasets.items():
             self.assertEqual(datasets[username], dataset)
 
@@ -111,6 +114,7 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": [self.comment1, self.comment2]
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -123,6 +127,8 @@ class TestExportSeq2seq(TestExport):
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
         self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
         self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -132,16 +138,16 @@ class TestExportSeq2seq(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "label": []},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [], "Comments": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.text2.text]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.text2.text], "Comments": self.comment2},
+                {**self.data2, "label": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -154,6 +160,7 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
+                "Comments": [self.comment1, self.comment2]
             },
             {**self.data2, "label": []},
         ]
@@ -164,7 +171,7 @@ class TestExportSeq2seq(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text]},
+                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
             ],
             self.project.approver.username: [],
             self.project.annotator.username: [],
@@ -179,6 +186,7 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
+                "Comments": [self.comment1, self.comment2]
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -191,6 +199,8 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         self.span = mommy.make(
             "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
         )
@@ -207,20 +217,22 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
+                    "Comments": self.comment1
                 },
-                {**self.data2, "entities": [], "cats": []},
+                {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
             self.project.annotator.username: [
                 {
                     **self.data1,
                     "entities": [],
                     "cats": [self.category2.to_string()],
+                    "Comments": self.comment2
                 },
-                {**self.data2, "entities": [], "cats": []},
+                {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "entities": [], "cats": []},
-                {**self.data2, "entities": [], "cats": []},
+                {**self.data1, "entities": [], "cats": [], "Comments": []},
+                {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -234,8 +246,9 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": self.comment1
             },
-            {**self.data2, "entities": [], "cats": []},
+            {**self.data2, "entities": [], "cats": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -248,6 +261,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
+                    "Comments": self.comment1
                 },
             ],
             self.project.annotator.username: [],
@@ -264,6 +278,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": [self.comment1, self.comment2]
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -281,6 +296,8 @@ class TestExportSequenceLabeling(TestExport):
         )
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
 
@@ -289,16 +306,16 @@ class TestExportSequenceLabeling(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [list(self.span1.to_tuple())]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [list(self.span2.to_tuple())]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": self.comment2},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "label": []},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [], "Comments": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -311,6 +328,7 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
+                "Comments": [self.comment1, self.comment2]
             },
             {**self.data2, "label": []},
         ]
@@ -321,7 +339,7 @@ class TestExportSequenceLabeling(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [list(self.span1.to_tuple())]},
+                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1},
             ],
             self.project.annotator.username: [],
             self.project.approver.username: [],
@@ -336,6 +354,7 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
+                "Comments": [self.comment1, self.comment2]
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -348,6 +367,8 @@ class TestExportSpeechToText(TestExport):
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
         self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
         self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_filename(self.example1)
         self.data2 = self.data_to_filename(self.example2)
@@ -357,16 +378,16 @@ class TestExportSpeechToText(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "label": []},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [], "Comments": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.text2.text]},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [self.text2.text], "Comments": self.comment2},
+                {**self.data2, "label": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -379,6 +400,7 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
+                "Comments": [self.comment1, self.comment2]
             },
             {**self.data2, "label": []},
         ]
@@ -389,7 +411,7 @@ class TestExportSpeechToText(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text]},
+                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
             ],
             self.project.annotator.username: [],
             self.project.approver.username: [],
@@ -404,6 +426,7 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
+                "Comments": [self.comment1, self.comment2]
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -416,6 +439,8 @@ class TestExportImageClassification(TestExport):
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_filename(self.example1)
         self.data2 = self.data_to_filename(self.example2)
@@ -428,19 +453,21 @@ class TestExportImageClassification(TestExport):
                 {
                     **self.data1,
                     "label": [self.category1.to_string()],
+                    "Comments": self.comment1
                 },
-                {**self.data2, "label": []},
+                {**self.data2, "label": [], "Comments": self.comment2},
             ],
             self.project.approver.username: [
-                {**self.data1, "label": []},
-                {**self.data2, "label": []},
+                {**self.data1, "label": [], "Comments": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
                 {
                     **self.data1,
                     "label": [self.category2.to_string()],
+                    "Comments": self.comment2
                 },
-                {**self.data2, "label": []},
+                {**self.data2, "label": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -453,8 +480,9 @@ class TestExportImageClassification(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": [self.comment1, self.comment2]
             },
-            {**self.data2, "label": []},
+            {**self.data2, "label": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -494,6 +522,8 @@ class TestExportRelation(TestExport):
         self.relation = mommy.make(
             "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
         )
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -507,20 +537,22 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
+                    "Comments": self.comment1
                 },
-                {**self.data2, "entities": [], "relations": []},
+                {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
             self.project.annotator.username: [
                 {
                     **self.data1,
                     "entities": [self.span3.to_dict()],
                     "relations": [],
+                    "Comments": self.comment2
                 },
-                {**self.data2, "entities": [], "relations": []},
+                {**self.data2, "entities": [], "relations": [], "Comments": self.comment2},
             ],
             self.project.approver.username: [
-                {**self.data1, "entities": [], "relations": []},
-                {**self.data2, "entities": [], "relations": []},
+                {**self.data1, "entities": [], "relations": [], "Comments": []},
+                {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -534,8 +566,9 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
+                "Comments": [self.comment1, self.comment2]
             },
-            {**self.data2, "entities": [], "relations": []},
+            {**self.data2, "entities": [], "relations": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -548,6 +581,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
+                    "Comments": self.comment1
                 },
             ],
             self.project.annotator.username: [],
@@ -564,6 +598,7 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
+                "Comments": [self.comment1, self.comment2]
             }
         ]
         self.assertEqual(dataset, expected_dataset)

From c236f81ba60e2f37f115050d9bb813c60c72a496 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Fri, 15 Jul 2022 16:11:57 -0700
Subject: [PATCH 6/9] update test cases

---
 backend/data_export/models.py                |   3 +
 backend/data_export/pipeline/factories.py    |  33 +++++-
 backend/data_export/pipeline/formatters.py   |   2 +-
 backend/data_export/tests/test_dataset.py    |   6 +-
 backend/data_export/tests/test_formatters.py |  13 ++-
 backend/data_export/tests/test_task.py       | 103 ++++++++++---------
 6 files changed, 100 insertions(+), 60 deletions(-)

diff --git a/backend/data_export/models.py b/backend/data_export/models.py
index ae6dd201..197bb6be 100644
--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@@ -85,5 +85,8 @@ class ExportedComment(Comment):
     def to_string(self) -> str:
         return self.text
 
+    def to_dict(self):
+        return {"id": self.id, "comment": self.text}
+
     class Meta:
         proxy = True
diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py
index be139819..2b2e2e96 100644
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@@ -71,29 +71,52 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
             JSONL.name: [
                 DictFormatter(Spans.column),
                 DictFormatter(Relations.column),
+                DictFormatter(Comments.column),
                 RenameFormatter(**mapper_relation_extraction),
             ]
             if use_relation
-            else [TupledSpanFormatter(Spans.column), RenameFormatter(**mapper_sequence_labeling)]
+            else [
+                TupledSpanFormatter(Spans.column),
+                ListedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_sequence_labeling)
+            ]
         },
         SEQ2SEQ: {
-            CSV.name: [JoinedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
-            JSON.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
-            JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
+            CSV.name: [
+                JoinedCategoryFormatter(Texts.column),
+                JoinedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_seq2seq)
+            ],
+            JSON.name: [
+                ListedCategoryFormatter(Texts.column),
+                ListedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_seq2seq)
+            ],
+            JSONL.name: [
+                ListedCategoryFormatter(Texts.column),
+                ListedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_seq2seq)
+            ],
         },
         IMAGE_CLASSIFICATION: {
             JSONL.name: [
                 ListedCategoryFormatter(Categories.column),
+                ListedCategoryFormatter(Comments.column),
                 RenameFormatter(**mapper_image_classification),
             ],
         },
         SPEECH2TEXT: {
-            JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_speech2text)],
+            JSONL.name: [
+                ListedCategoryFormatter(Texts.column),
+                ListedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_speech2text)
+            ],
         },
         INTENT_DETECTION_AND_SLOT_FILLING: {
             JSONL.name: [
                 ListedCategoryFormatter(Categories.column),
                 TupledSpanFormatter(Spans.column),
+                ListedCategoryFormatter(Comments.column),
                 RenameFormatter(**mapper_intent_detection),
             ]
         },
diff --git a/backend/data_export/pipeline/formatters.py b/backend/data_export/pipeline/formatters.py
index 41efa19d..1ce74270 100644
--- a/backend/data_export/pipeline/formatters.py
+++ b/backend/data_export/pipeline/formatters.py
@@ -52,7 +52,7 @@ class FastTextCategoryFormatter(Formatter):
         )
         dataset[self.target_column] = dataset[self.target_column].fillna("")
         dataset["Comments"] = dataset["Comments"].apply(
-            lambda comments: "#".join(comment.to_string() for comment in comments)
+            lambda comments: " ".join(f"__comment__{comment.to_string()}" for comment in comments)
         )
         dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"]
         return dataset
diff --git a/backend/data_export/tests/test_dataset.py b/backend/data_export/tests/test_dataset.py
index df9eea57..9ec57a37 100644
--- a/backend/data_export/tests/test_dataset.py
+++ b/backend/data_export/tests/test_dataset.py
@@ -18,12 +18,12 @@ class TestDataset(unittest.TestCase):
         self.labels = MagicMock()
         self.labels.__iter__.return_value = [label]
         comment = MagicMock()
-        comment.find_by.return_value = {"Comments": ["comment"]}
+        comment.find_by.return_value = {"comments": ["comment"]}
         self.comments = MagicMock()
         self.comments.__iter__.return_value = [comment]
 
     def test_to_dataframe(self):
-        dataset = Dataset(self.examples, self.labels)
+        dataset = Dataset(self.examples, self.labels, self.comments)
         df = dataset.to_dataframe()
-        expected = pd.DataFrame([{"data": "example", "labels": ["label"], "Comments": ["comment"]}])
+        expected = pd.DataFrame([{"data": "example", "labels": ["label"], "comments": ["comment"]}])
         assert_frame_equal(df, expected)
diff --git a/backend/data_export/tests/test_formatters.py b/backend/data_export/tests/test_formatters.py
index 09deb97b..c1c31661 100644
--- a/backend/data_export/tests/test_formatters.py
+++ b/backend/data_export/tests/test_formatters.py
@@ -75,15 +75,20 @@ class TestTupledSpanFormatter(unittest.TestCase):
 
 class TestFastTextFormatter(unittest.TestCase):
     def setUp(self):
-        self.return_value = "Label"
+        self.return_value_label = "Label"
+        self.return_value_comment = "Comment"
         label = MagicMock()
-        label.to_string.return_value = self.return_value
-        self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": "comment"}])
+        comment = MagicMock()
+        label.to_string.return_value = self.return_value_label
+        comment.to_string.return_value = self.return_value_comment
+        self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": [comment]}])
 
     def test_format(self):
         formatter = FastTextCategoryFormatter(TARGET_COLUMN)
         dataset = formatter.format(self.dataset)
-        expected_dataset = pd.DataFrame([f"__label__{self.return_value} example comment"])
+        expected_dataset = pd.DataFrame(
+            [f"__label__{self.return_value_label} example __comment__{self.return_value_comment}"]
+        )
         self.assertEqual(dataset.to_csv(index=False, header=None), expected_dataset.to_csv(index=False, header=None))
 
 
diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py
index a60e2d78..458f0317 100644
--- a/backend/data_export/tests/test_task.py
+++ b/backend/data_export/tests/test_task.py
@@ -62,7 +62,7 @@ class TestExportCategory(TestExport):
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -72,7 +72,7 @@ class TestExportCategory(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1},
+                {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
@@ -80,7 +80,7 @@ class TestExportCategory(TestExport):
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.category2.to_string()], "Comments": self.comment2},
+                {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
         }
@@ -94,16 +94,20 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
-            {**self.data2, "label": []},
+            {**self.data2, "label": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
     def test_confirmed_and_non_collaborative(self):
         self.prepare_data()
         datasets = self.export_dataset(confirmed_only=True)
-        expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()], "Comments": self.comment1}]}
+        expected_datasets = {
+            self.project.admin.username: [
+                {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
+            ]
+        }
         for username, dataset in expected_datasets.items():
             self.assertEqual(datasets[username], dataset)
 
@@ -114,7 +118,7 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -128,7 +132,7 @@ class TestExportSeq2seq(TestExport):
         self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
         self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -138,7 +142,7 @@ class TestExportSeq2seq(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
@@ -146,7 +150,7 @@ class TestExportSeq2seq(TestExport):
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.text2.text], "Comments": self.comment2},
+                {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
         }
@@ -160,9 +164,9 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
-            {**self.data2, "label": []},
+            {**self.data2, "label": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -171,7 +175,7 @@ class TestExportSeq2seq(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
             ],
             self.project.approver.username: [],
             self.project.annotator.username: [],
@@ -186,7 +190,7 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -200,7 +204,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         self.span = mommy.make(
             "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
         )
@@ -217,7 +221,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
-                    "Comments": self.comment1
+                    "Comments": [self.comment1.to_string()]
                 },
                 {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
@@ -226,7 +230,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [],
                     "cats": [self.category2.to_string()],
-                    "Comments": self.comment2
+                    "Comments": [self.comment2.to_string()]
                 },
                 {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
@@ -246,7 +250,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": self.comment1
+                "Comments": [self.comment1.to_string(), self.comment2.to_string()]
             },
             {**self.data2, "entities": [], "cats": [], "Comments": []},
         ]
@@ -261,7 +265,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
-                    "Comments": self.comment1
+                    "Comments": [self.comment1.to_string()]
                 },
             ],
             self.project.annotator.username: [],
@@ -278,7 +282,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -297,7 +301,7 @@ class TestExportSequenceLabeling(TestExport):
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
 
@@ -306,11 +310,11 @@ class TestExportSequenceLabeling(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1},
+                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": self.comment2},
+                {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": [self.comment2.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
@@ -328,9 +332,9 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
-            {**self.data2, "label": []},
+            {**self.data2, "label": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -339,7 +343,7 @@ class TestExportSequenceLabeling(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": self.comment1},
+                {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
             ],
             self.project.annotator.username: [],
             self.project.approver.username: [],
@@ -354,7 +358,7 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -368,7 +372,7 @@ class TestExportSpeechToText(TestExport):
         self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
         self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_filename(self.example1)
         self.data2 = self.data_to_filename(self.example2)
@@ -378,7 +382,7 @@ class TestExportSpeechToText(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
@@ -386,7 +390,7 @@ class TestExportSpeechToText(TestExport):
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, "label": [self.text2.text], "Comments": self.comment2},
+                {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
         }
@@ -400,9 +404,9 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
-            {**self.data2, "label": []},
+            {**self.data2, "label": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -411,7 +415,7 @@ class TestExportSpeechToText(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, "label": [self.text1.text], "Comments": self.comment1},
+                {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
             ],
             self.project.annotator.username: [],
             self.project.approver.username: [],
@@ -426,7 +430,7 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -440,7 +444,7 @@ class TestExportImageClassification(TestExport):
         self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
         self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_filename(self.example1)
         self.data2 = self.data_to_filename(self.example2)
@@ -453,9 +457,9 @@ class TestExportImageClassification(TestExport):
                 {
                     **self.data1,
                     "label": [self.category1.to_string()],
-                    "Comments": self.comment1
+                    "Comments": [self.comment1.to_string()]
                 },
-                {**self.data2, "label": [], "Comments": self.comment2},
+                {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
                 {**self.data1, "label": [], "Comments": []},
@@ -465,7 +469,7 @@ class TestExportImageClassification(TestExport):
                 {
                     **self.data1,
                     "label": [self.category2.to_string()],
-                    "Comments": self.comment2
+                    "Comments": [self.comment2.to_string()]
                 },
                 {**self.data2, "label": [], "Comments": []},
             ],
@@ -480,7 +484,7 @@ class TestExportImageClassification(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1, self.comment2]
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -489,7 +493,11 @@ class TestExportImageClassification(TestExport):
     def test_confirmed_and_non_collaborative(self):
         self.prepare_data()
         datasets = self.export_dataset(confirmed_only=True)
-        expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
+        expected_datasets = {
+            self.project.admin.username: [
+                {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
+            ]
+        }
         for username, dataset in expected_datasets.items():
             self.assertEqual(datasets[username], dataset)
 
@@ -500,6 +508,7 @@ class TestExportImageClassification(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -523,7 +532,7 @@ class TestExportRelation(TestExport):
             "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
         )
         self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
-        self.comment2 = mommy.make("ExportedComment", example=self.example2, user=self.project.annotator)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
         self.data1 = self.data_to_text(self.example1)
         self.data2 = self.data_to_text(self.example2)
@@ -537,7 +546,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
-                    "Comments": self.comment1
+                    "Comments": [self.comment1.to_dict()]
                 },
                 {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
@@ -546,9 +555,9 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span3.to_dict()],
                     "relations": [],
-                    "Comments": self.comment2
+                    "Comments": [self.comment2.to_dict()]
                 },
-                {**self.data2, "entities": [], "relations": [], "Comments": self.comment2},
+                {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
             self.project.approver.username: [
                 {**self.data1, "entities": [], "relations": [], "Comments": []},
@@ -566,7 +575,7 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
-                "Comments": [self.comment1, self.comment2]
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()]
             },
             {**self.data2, "entities": [], "relations": [], "Comments": []},
         ]
@@ -581,7 +590,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
-                    "Comments": self.comment1
+                    "Comments": [self.comment1.to_dict()]
                 },
             ],
             self.project.annotator.username: [],
@@ -598,7 +607,7 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
-                "Comments": [self.comment1, self.comment2]
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()]
             }
         ]
         self.assertEqual(dataset, expected_dataset)

From add5d4c71a722c1a47fb5e50c6a83109a6430ffa Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Mon, 25 Jul 2022 14:03:48 +0900
Subject: [PATCH 7/9] Apply black to fix codes

---
 backend/data_export/pipeline/factories.py | 10 ++---
 backend/data_export/tests/test_task.py    | 52 ++++++++++-------------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py
index f895df4a..614c01df 100644
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@@ -90,24 +90,24 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
             else [
                 TupledSpanFormatter(Spans.column),
                 ListedCategoryFormatter(Comments.column),
-                RenameFormatter(**mapper_sequence_labeling)
+                RenameFormatter(**mapper_sequence_labeling),
             ]
         },
         SEQ2SEQ: {
             CSV.name: [
                 JoinedCategoryFormatter(Texts.column),
                 JoinedCategoryFormatter(Comments.column),
-                RenameFormatter(**mapper_seq2seq)
+                RenameFormatter(**mapper_seq2seq),
             ],
             JSON.name: [
                 ListedCategoryFormatter(Texts.column),
                 ListedCategoryFormatter(Comments.column),
-                RenameFormatter(**mapper_seq2seq)
+                RenameFormatter(**mapper_seq2seq),
             ],
             JSONL.name: [
                 ListedCategoryFormatter(Texts.column),
                 ListedCategoryFormatter(Comments.column),
-                RenameFormatter(**mapper_seq2seq)
+                RenameFormatter(**mapper_seq2seq),
             ],
         },
         IMAGE_CLASSIFICATION: {
@@ -121,7 +121,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
             JSONL.name: [
                 ListedCategoryFormatter(Texts.column),
                 ListedCategoryFormatter(Comments.column),
-                RenameFormatter(**mapper_speech2text)
+                RenameFormatter(**mapper_speech2text),
             ],
         },
         INTENT_DETECTION_AND_SLOT_FILLING: {
diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py
index cf9df979..8e25dbc2 100644
--- a/backend/data_export/tests/test_task.py
+++ b/backend/data_export/tests/test_task.py
@@ -97,7 +97,7 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -121,7 +121,7 @@ class TestExportCategory(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -167,7 +167,7 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -193,7 +193,7 @@ class TestExportSeq2seq(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -224,7 +224,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
-                    "Comments": [self.comment1.to_string()]
+                    "Comments": [self.comment1.to_string()],
                 },
                 {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
@@ -233,7 +233,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [],
                     "cats": [self.category2.to_string()],
-                    "Comments": [self.comment2.to_string()]
+                    "Comments": [self.comment2.to_string()],
                 },
                 {**self.data2, "entities": [], "cats": [], "Comments": []},
             ],
@@ -253,7 +253,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1.to_string(), self.comment2.to_string()]
+                "Comments": [self.comment1.to_string(), self.comment2.to_string()],
             },
             {**self.data2, "entities": [], "cats": [], "Comments": []},
         ]
@@ -268,7 +268,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                     **self.data1,
                     "entities": [list(self.span.to_tuple())],
                     "cats": [self.category1.to_string()],
-                    "Comments": [self.comment1.to_string()]
+                    "Comments": [self.comment1.to_string()],
                 },
             ],
             self.project.annotator.username: [],
@@ -285,7 +285,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -335,7 +335,7 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -361,7 +361,7 @@ class TestExportSequenceLabeling(TestExport):
             {
                 **self.data1,
                 "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -407,7 +407,7 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -433,7 +433,7 @@ class TestExportSpeechToText(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.text1.text, self.text2.text]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -457,11 +457,7 @@ class TestExportImageClassification(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {
-                    **self.data1,
-                    "label": [self.category1.to_string()],
-                    "Comments": [self.comment1.to_string()]
-                },
+                {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.approver.username: [
@@ -469,11 +465,7 @@ class TestExportImageClassification(TestExport):
                 {**self.data2, "label": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {
-                    **self.data1,
-                    "label": [self.category2.to_string()],
-                    "Comments": [self.comment2.to_string()]
-                },
+                {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
                 {**self.data2, "label": [], "Comments": []},
             ],
         }
@@ -487,7 +479,7 @@ class TestExportImageClassification(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "label": [], "Comments": []},
         ]
@@ -511,7 +503,7 @@ class TestExportImageClassification(TestExport):
             {
                 **self.data1,
                 "label": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()])
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -761,7 +753,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
-                    "Comments": [self.comment1.to_dict()]
+                    "Comments": [self.comment1.to_dict()],
                 },
                 {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
@@ -770,7 +762,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span3.to_dict()],
                     "relations": [],
-                    "Comments": [self.comment2.to_dict()]
+                    "Comments": [self.comment2.to_dict()],
                 },
                 {**self.data2, "entities": [], "relations": [], "Comments": []},
             ],
@@ -790,7 +782,7 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
-                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()]
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             },
             {**self.data2, "entities": [], "relations": [], "Comments": []},
         ]
@@ -805,7 +797,7 @@ class TestExportRelation(TestExport):
                     **self.data1,
                     "entities": [self.span1.to_dict(), self.span2.to_dict()],
                     "relations": [self.relation.to_dict()],
-                    "Comments": [self.comment1.to_dict()]
+                    "Comments": [self.comment1.to_dict()],
                 },
             ],
             self.project.annotator.username: [],
@@ -822,7 +814,7 @@ class TestExportRelation(TestExport):
                 **self.data1,
                 "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
                 "relations": [self.relation.to_dict()],
-                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()]
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             }
         ]
         self.assertEqual(dataset, expected_dataset)

From b795bb358a307ce0eefadf4d8267586a859c9e0c Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Mon, 25 Jul 2022 14:11:13 +0900
Subject: [PATCH 8/9] Add a migration file for exported comment

---
 .../migrations/0004_exportedcomment.py        | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 backend/data_export/migrations/0004_exportedcomment.py

diff --git a/backend/data_export/migrations/0004_exportedcomment.py b/backend/data_export/migrations/0004_exportedcomment.py
new file mode 100644
index 00000000..04201d27
--- /dev/null
+++ b/backend/data_export/migrations/0004_exportedcomment.py
@@ -0,0 +1,24 @@
+# Generated by Django 4.0.4 on 2022-07-25 05:07
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("examples", "0006_alter_example_upload_name"),
+        ("data_export", "0003_exportedsegmentation"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="ExportedComment",
+            fields=[],
+            options={
+                "proxy": True,
+                "indexes": [],
+                "constraints": [],
+            },
+            bases=("examples.comment",),
+        ),
+    ]

From ac7dc796fa16e962195d002f3320e006cd26f397 Mon Sep 17 00:00:00 2001
From: Casey <jiaqzhang@expediagroup.com>
Date: Mon, 25 Jul 2022 16:33:27 -0700
Subject: [PATCH 9/9] update test cases for new methods

---
 backend/data_export/pipeline/factories.py | 22 ++++++-
 backend/data_export/tests/test_task.py    | 78 +++++++++++++----------
 2 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py
index 614c01df..65b226d8 100644
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@@ -132,10 +132,26 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
                 RenameFormatter(**mapper_intent_detection),
             ]
         },
-        BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]},
-        SEGMENTATION: {JSONL.name: [DictFormatter(Segments.column), RenameFormatter(**mapper_segmentation)]},
+        BOUNDING_BOX: {
+            JSONL.name: [
+                DictFormatter(BoundingBoxes.column),
+                DictFormatter(Comments.column),
+                RenameFormatter(**mapper_bounding_box),
+            ]
+        },
+        SEGMENTATION: {
+            JSONL.name: [
+                DictFormatter(Segments.column),
+                DictFormatter(Comments.column),
+                RenameFormatter(**mapper_segmentation),
+            ]
+        },
         IMAGE_CAPTIONING: {
-            JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_image_captioning)]
+            JSONL.name: [
+                ListedCategoryFormatter(Texts.column),
+                ListedCategoryFormatter(Comments.column),
+                RenameFormatter(**mapper_image_captioning),
+            ]
         },
     }
     return mapping[project.project_type][file_format]
diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py
index 8e25dbc2..31b65687 100644
--- a/backend/data_export/tests/test_task.py
+++ b/backend/data_export/tests/test_task.py
@@ -253,7 +253,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
                 **self.data1,
                 "entities": [list(self.span.to_tuple())],
                 "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
-                "Comments": [self.comment1.to_string(), self.comment2.to_string()],
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
             {**self.data2, "entities": [], "cats": [], "Comments": []},
         ]
@@ -514,6 +514,8 @@ class TestExportBoundingBox(TestExport):
         self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative)
         self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         self.bbox1 = mommy.make(
             "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
         )
@@ -532,19 +534,17 @@ class TestExportBoundingBox(TestExport):
                 {
                     **self.data1,
                     "bbox": [self.bbox1.to_dict()],
+                    "Comments": [self.comment1.to_dict()],
                 },
-                {**self.data2, "bbox": []},
+                {**self.data2, "bbox": [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, "bbox": []},
-                {**self.data2, "bbox": []},
+                {**self.data1, "bbox": [], "Comments": []},
+                {**self.data2, "bbox": [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {
-                    **self.data1,
-                    "bbox": [self.bbox2.to_dict()],
-                },
-                {**self.data2, "bbox": []},
+                {**self.data1, "bbox": [self.bbox2.to_dict()], "Comments": [self.comment2.to_dict()]},
+                {**self.data2, "bbox": [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -557,15 +557,20 @@ class TestExportBoundingBox(TestExport):
             {
                 **self.data1,
                 "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             },
-            {**self.data2, "bbox": []},
+            {**self.data2, "bbox": [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
     def test_confirmed_and_non_collaborative(self):
         self.prepare_data()
         datasets = self.export_dataset(confirmed_only=True)
-        expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]}
+        expected_datasets = {
+            self.project.admin.username: [
+                {**self.data1, "bbox": [self.bbox1.to_dict()], "Comments": [self.comment1.to_dict()]}
+            ]
+        }
         for username, dataset in expected_datasets.items():
             self.assertEqual(datasets[username], dataset)
 
@@ -576,6 +581,7 @@ class TestExportBoundingBox(TestExport):
             {
                 **self.data1,
                 "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -586,6 +592,8 @@ class TestExportSegmentation(TestExport):
         self.project = prepare_project(SEGMENTATION, collaborative_annotation=collaborative)
         self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1])
         self.seg2 = mommy.make(
             "ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2]
@@ -600,22 +608,16 @@ class TestExportSegmentation(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {
-                    **self.data1,
-                    self.column: [self.seg1.to_dict()],
-                },
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]},
+                {**self.data2, self.column: [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, self.column: []},
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [], "Comments": []},
+                {**self.data2, self.column: [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {
-                    **self.data1,
-                    self.column: [self.seg2.to_dict()],
-                },
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [self.seg2.to_dict()], "Comments": [self.comment2.to_dict()]},
+                {**self.data2, self.column: [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -628,15 +630,20 @@ class TestExportSegmentation(TestExport):
             {
                 **self.data1,
                 self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             },
-            {**self.data2, self.column: []},
+            {**self.data2, self.column: [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
     def test_confirmed_and_non_collaborative(self):
         self.prepare_data()
         datasets = self.export_dataset(confirmed_only=True)
-        expected_datasets = {self.project.admin.username: [{**self.data1, self.column: [self.seg1.to_dict()]}]}
+        expected_datasets = {
+            self.project.admin.username: [
+                {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]}
+            ]
+        }
         for username, dataset in expected_datasets.items():
             self.assertEqual(datasets[username], dataset)
 
@@ -647,6 +654,7 @@ class TestExportSegmentation(TestExport):
             {
                 **self.data1,
                 self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
+                "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
             }
         ]
         self.assertEqual(dataset, expected_dataset)
@@ -657,6 +665,8 @@ class TestExportImageCaptioning(TestExport):
         self.project = prepare_project(IMAGE_CAPTIONING, collaborative_annotation=collaborative)
         self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
         self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
+        self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
+        self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
         self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
         self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
         mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
@@ -669,16 +679,16 @@ class TestExportImageCaptioning(TestExport):
         datasets = self.export_dataset()
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, self.column: [self.text1.text]},
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
+                {**self.data2, self.column: [], "Comments": []},
             ],
             self.project.approver.username: [
-                {**self.data1, self.column: []},
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [], "Comments": []},
+                {**self.data2, self.column: [], "Comments": []},
             ],
             self.project.annotator.username: [
-                {**self.data1, self.column: [self.text2.text]},
-                {**self.data2, self.column: []},
+                {**self.data1, self.column: [self.text2.text], "Comments": [self.comment2.to_string()]},
+                {**self.data2, self.column: [], "Comments": []},
             ],
         }
         for username, dataset in expected_datasets.items():
@@ -691,8 +701,9 @@ class TestExportImageCaptioning(TestExport):
             {
                 **self.data1,
                 self.column: sorted([self.text1.text, self.text2.text]),
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             },
-            {**self.data2, self.column: []},
+            {**self.data2, self.column: [], "Comments": []},
         ]
         self.assertEqual(dataset, expected_dataset)
 
@@ -701,7 +712,7 @@ class TestExportImageCaptioning(TestExport):
         datasets = self.export_dataset(confirmed_only=True)
         expected_datasets = {
             self.project.admin.username: [
-                {**self.data1, self.column: [self.text1.text]},
+                {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
             ],
             self.project.approver.username: [],
             self.project.annotator.username: [],
@@ -716,6 +727,7 @@ class TestExportImageCaptioning(TestExport):
             {
                 **self.data1,
                 self.column: sorted([self.text1.text, self.text2.text]),
+                "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
             }
         ]
         self.assertEqual(dataset, expected_dataset)