export comments

2 years ago · 9179916d6a
5 changed files with 61 additions and 8 deletions
--- a/backend/data_export/celery_tasks.py
+++ b/backend/data_export/celery_tasks.py
@ -8,9 +8,9 @@ from django.conf import settings
 from django.shortcuts import get_object_or_404

 from .pipeline.dataset import Dataset
-from .pipeline.factories import create_formatter, create_labels, create_writer
+from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment
 from .pipeline.services import ExportApplicationService
-from data_export.models import ExportedExample
+from data_export.models import ExportedExample, ExportedComment
 from projects.models import Member, Project

 logger = get_task_logger(__name__)
@ -23,7 +23,8 @@ def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only:
    else:
        examples = ExportedExample.objects.filter(project=project)
    labels = create_labels(project, examples)
-    dataset = Dataset(examples, labels, is_text_project)
+    comments = create_comment(examples)
+    dataset = Dataset(examples, labels, comments, is_text_project)

    service = ExportApplicationService(dataset, formatters, writer)

@ -40,7 +41,8 @@ def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bo
        else:
            examples = ExportedExample.objects.filter(project=project)
        labels = create_labels(project, examples, member.user)
-        dataset = Dataset(examples, labels, is_text_project)
+        comments = create_comment(examples, member.user)
+        dataset = Dataset(examples, labels, comments, is_text_project)

        service = ExportApplicationService(dataset, formatters, writer)

--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple

 from django.db import models

-from examples.models import Example
+from examples.models import Example, Comment
 from labels.models import Category, Relation, Span, TextLabel
 from projects.models import Project

@ -79,3 +79,15 @@ class ExportedText(TextLabel):

    class Meta:
        proxy = True
+
+
+class ExportedComment(Comment):
+    def to_string(self) -> str:
+        return self.text
+
+    def to_dict(self):
+        return {"id": self.example.id, "comment": self.text}
+
+    class Meta:
+        proxy = True
+
--- a/backend/data_export/pipeline/comments.py
+++ b/backend/data_export/pipeline/comments.py
@ -0,0 +1,26 @@
+import abc
+from collections import defaultdict
+from typing import Dict, List, Tuple
+
+from django.db.models import QuerySet
+
+from data_export.models import (
+    ExportedExample,
+    ExportedComment,
+)
+
+class Comments(abc.ABC):
+    comment_class = ExportedComment
+    column = "Comments"
+    fields: Tuple[str, ...] = ("example", "user")  # To boost performance
+
+    def __init__(self, examples: QuerySet[ExportedExample], user=None):
+        self.comment_groups = defaultdict(list)
+        comments = self.comment_class.objects.filter(example__in=examples)
+        if user:
+            comments = comments.filter(user=user)
+        for comment in comments.select_related(*self.fields):
+            self.comment_groups[comment.example.id].append(comment)
+
+    def find_by(self, example_id: int) -> Dict[str, List[ExportedComment]]:
+        return {self.column: self.comment_groups[example_id]}
--- a/backend/data_export/pipeline/dataset.py
+++ b/backend/data_export/pipeline/dataset.py
@ -4,20 +4,25 @@ import pandas as pd
 from django.db.models.query import QuerySet

 from .labels import Labels
-from data_export.models import ExportedExample
+from .comments import Comments
+from data_export.models import ExportedExample, ExportedComment


 class Dataset:
-    def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True):
+    def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments],
+                 is_text_project=True):
        self.examples = examples
        self.labels = labels
        self.is_text_project = is_text_project
+        self.comments = comments

    def __iter__(self) -> Iterator[Dict[str, Any]]:
        for example in self.examples:
            data = example.to_dict(self.is_text_project)
            for labels in self.labels:
                data.update(**labels.find_by(example.id))
+            for comment in self.comments:
+                data.update(**comment.find_by(example.id))
            yield data

    def to_dataframe(self) -> pd.DataFrame:
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@ -14,7 +14,8 @@ from .formatters import (
    TupledSpanFormatter,
 )
 from .labels import Categories, Labels, Relations, Spans, Texts
-from data_export.models import DATA, ExportedExample
+from .comments import Comments
+from data_export.models import DATA, ExportedExample, ExportedComment
 from projects.models import (
    DOCUMENT_CLASSIFICATION,
    IMAGE_CLASSIFICATION,
@ -51,14 +52,17 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
        DOCUMENT_CLASSIFICATION: {
            CSV.name: [
                JoinedCategoryFormatter(Categories.column),
+                JoinedCategoryFormatter(Comments.column),
                RenameFormatter(**mapper_text_classification),
            ],
            JSON.name: [
                ListedCategoryFormatter(Categories.column),
+                ListedCategoryFormatter(Comments.column),
                RenameFormatter(**mapper_text_classification),
            ],
            JSONL.name: [
                ListedCategoryFormatter(Categories.column),
+                ListedCategoryFormatter(Comments.column),
                RenameFormatter(**mapper_text_classification),
            ],
            FastText.name: [FastTextCategoryFormatter(Categories.column)],
@ -114,3 +118,7 @@ def create_labels(project: Project, examples: QuerySet[ExportedExample], user=No
    label_collections = select_label_collection(project)
    labels = [label_collection(examples=examples, user=user) for label_collection in label_collections]
    return labels
+
+
+def create_comment(examples: QuerySet[ExportedExample], user=None) -> List[Comments]:
+    return [Comments(examples=examples, user=user)]