Browse Source

export comments

pull/1897/head
Casey 2 years ago
parent
commit
9179916d6a
5 changed files with 61 additions and 8 deletions
  1. 10
      backend/data_export/celery_tasks.py
  2. 14
      backend/data_export/models.py
  3. 26
      backend/data_export/pipeline/comments.py
  4. 9
      backend/data_export/pipeline/dataset.py
  5. 10
      backend/data_export/pipeline/factories.py

10
backend/data_export/celery_tasks.py

@ -8,9 +8,9 @@ from django.conf import settings
from django.shortcuts import get_object_or_404
from .pipeline.dataset import Dataset
from .pipeline.factories import create_formatter, create_labels, create_writer
from .pipeline.factories import create_formatter, create_labels, create_writer, create_comment
from .pipeline.services import ExportApplicationService
from data_export.models import ExportedExample
from data_export.models import ExportedExample, ExportedComment
from projects.models import Member, Project
logger = get_task_logger(__name__)
@ -23,7 +23,8 @@ def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only:
else:
examples = ExportedExample.objects.filter(project=project)
labels = create_labels(project, examples)
dataset = Dataset(examples, labels, is_text_project)
comments = create_comment(examples)
dataset = Dataset(examples, labels, comments, is_text_project)
service = ExportApplicationService(dataset, formatters, writer)
@ -40,7 +41,8 @@ def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bo
else:
examples = ExportedExample.objects.filter(project=project)
labels = create_labels(project, examples, member.user)
dataset = Dataset(examples, labels, is_text_project)
comments = create_comment(examples, member.user)
dataset = Dataset(examples, labels, comments, is_text_project)
service = ExportApplicationService(dataset, formatters, writer)

14
backend/data_export/models.py

@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple
from django.db import models
from examples.models import Example
from examples.models import Example, Comment
from labels.models import Category, Relation, Span, TextLabel
from projects.models import Project
@ -79,3 +79,15 @@ class ExportedText(TextLabel):
class Meta:
proxy = True
class ExportedComment(Comment):
def to_string(self) -> str:
return self.text
def to_dict(self):
return {"id": self.example.id, "comment": self.text}
class Meta:
proxy = True

26
backend/data_export/pipeline/comments.py

@ -0,0 +1,26 @@
import abc
from collections import defaultdict
from typing import Dict, List, Tuple
from django.db.models import QuerySet
from data_export.models import (
ExportedExample,
ExportedComment,
)
class Comments(abc.ABC):
comment_class = ExportedComment
column = "Comments"
fields: Tuple[str, ...] = ("example", "user") # To boost performance
def __init__(self, examples: QuerySet[ExportedExample], user=None):
self.comment_groups = defaultdict(list)
comments = self.comment_class.objects.filter(example__in=examples)
if user:
comments = comments.filter(user=user)
for comment in comments.select_related(*self.fields):
self.comment_groups[comment.example.id].append(comment)
def find_by(self, example_id: int) -> Dict[str, List[ExportedComment]]:
return {self.column: self.comment_groups[example_id]}

9
backend/data_export/pipeline/dataset.py

@ -4,20 +4,25 @@ import pandas as pd
from django.db.models.query import QuerySet
from .labels import Labels
from data_export.models import ExportedExample
from .comments import Comments
from data_export.models import ExportedExample, ExportedComment
class Dataset:
def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True):
def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments],
is_text_project=True):
self.examples = examples
self.labels = labels
self.is_text_project = is_text_project
self.comments = comments
def __iter__(self) -> Iterator[Dict[str, Any]]:
for example in self.examples:
data = example.to_dict(self.is_text_project)
for labels in self.labels:
data.update(**labels.find_by(example.id))
for comment in self.comments:
data.update(**comment.find_by(example.id))
yield data
def to_dataframe(self) -> pd.DataFrame:

10
backend/data_export/pipeline/factories.py

@ -14,7 +14,8 @@ from .formatters import (
TupledSpanFormatter,
)
from .labels import Categories, Labels, Relations, Spans, Texts
from data_export.models import DATA, ExportedExample
from .comments import Comments
from data_export.models import DATA, ExportedExample, ExportedComment
from projects.models import (
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
@ -51,14 +52,17 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
DOCUMENT_CLASSIFICATION: {
CSV.name: [
JoinedCategoryFormatter(Categories.column),
JoinedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
JSON.name: [
ListedCategoryFormatter(Categories.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
JSONL.name: [
ListedCategoryFormatter(Categories.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
FastText.name: [FastTextCategoryFormatter(Categories.column)],
@ -114,3 +118,7 @@ def create_labels(project: Project, examples: QuerySet[ExportedExample], user=No
label_collections = select_label_collection(project)
labels = [label_collection(examples=examples, user=user) for label_collection in label_collections]
return labels
def create_comment(examples: QuerySet[ExportedExample], user=None) -> List[Comments]:
return [Comments(examples=examples, user=user)]
Loading…
Cancel
Save