Browse Source

Merge pull request #1897 from CaseyZhang11/export-annotator-comment

Export annotator comment
pull/1927/head
Hiroki Nakayama 2 years ago
committed by GitHub
parent
commit
57c727b0c3
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 288 additions and 109 deletions
  1. 13
      backend/data_export/celery_tasks.py
  2. 24
      backend/data_export/migrations/0004_exportedcomment.py
  3. 13
      backend/data_export/models.py
  4. 24
      backend/data_export/pipeline/comments.py
  5. 8
      backend/data_export/pipeline/dataset.py
  6. 63
      backend/data_export/pipeline/factories.py
  7. 7
      backend/data_export/pipeline/formatters.py
  8. 8
      backend/data_export/tests/test_dataset.py
  9. 13
      backend/data_export/tests/test_formatters.py
  10. 224
      backend/data_export/tests/test_task.py

13
backend/data_export/celery_tasks.py

@ -8,7 +8,12 @@ from django.conf import settings
from django.shortcuts import get_object_or_404
from .pipeline.dataset import Dataset
from .pipeline.factories import create_formatter, create_labels, create_writer
from .pipeline.factories import (
create_comment,
create_formatter,
create_labels,
create_writer,
)
from .pipeline.services import ExportApplicationService
from data_export.models import ExportedExample
from projects.models import Member, Project
@ -23,7 +28,8 @@ def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only:
else:
examples = ExportedExample.objects.filter(project=project)
labels = create_labels(project, examples)
dataset = Dataset(examples, labels, is_text_project)
comments = create_comment(examples)
dataset = Dataset(examples, labels, comments, is_text_project)
service = ExportApplicationService(dataset, formatters, writer)
@ -40,7 +46,8 @@ def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bo
else:
examples = ExportedExample.objects.filter(project=project)
labels = create_labels(project, examples, member.user)
dataset = Dataset(examples, labels, is_text_project)
comments = create_comment(examples, member.user)
dataset = Dataset(examples, labels, comments, is_text_project)
service = ExportApplicationService(dataset, formatters, writer)

24
backend/data_export/migrations/0004_exportedcomment.py

@ -0,0 +1,24 @@
# Generated by Django 4.0.4 on 2022-07-25 05:07
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("examples", "0006_alter_example_upload_name"),
("data_export", "0003_exportedsegmentation"),
]
operations = [
migrations.CreateModel(
name="ExportedComment",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("examples.comment",),
),
]

13
backend/data_export/models.py

@ -2,7 +2,7 @@ from typing import Any, Dict, Protocol, Tuple
from django.db import models
from examples.models import Example
from examples.models import Comment, Example
from labels.models import BoundingBox, Category, Relation, Segmentation, Span, TextLabel
from projects.models import Project
@ -81,6 +81,17 @@ class ExportedText(TextLabel):
proxy = True
class ExportedComment(Comment):
def to_string(self) -> str:
return self.text
def to_dict(self):
return {"id": self.id, "comment": self.text}
class Meta:
proxy = True
class ExportedBoundingBox(BoundingBox):
def to_dict(self):
return {

24
backend/data_export/pipeline/comments.py

@ -0,0 +1,24 @@
import abc
from collections import defaultdict
from typing import Dict, List, Tuple
from django.db.models import QuerySet
from data_export.models import ExportedComment, ExportedExample
class Comments(abc.ABC):
comment_class = ExportedComment
column = "Comments"
fields: Tuple[str, ...] = ("example", "user") # To boost performance
def __init__(self, examples: QuerySet[ExportedExample], user=None):
self.comment_groups = defaultdict(list)
comments = self.comment_class.objects.filter(example__in=examples)
if user:
comments = comments.filter(user=user)
for comment in comments.select_related(*self.fields):
self.comment_groups[comment.example.id].append(comment)
def find_by(self, example_id: int) -> Dict[str, List[ExportedComment]]:
return {self.column: self.comment_groups[example_id]}

8
backend/data_export/pipeline/dataset.py

@ -3,21 +3,27 @@ from typing import Any, Dict, Iterator, List
import pandas as pd
from django.db.models.query import QuerySet
from .comments import Comments
from .labels import Labels
from data_export.models import ExportedExample
class Dataset:
def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True):
def __init__(
self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], is_text_project=True
):
self.examples = examples
self.labels = labels
self.is_text_project = is_text_project
self.comments = comments
def __iter__(self) -> Iterator[Dict[str, Any]]:
for example in self.examples:
data = example.to_dict(self.is_text_project)
for labels in self.labels:
data.update(**labels.find_by(example.id))
for comment in self.comments:
data.update(**comment.find_by(example.id))
yield data
def to_dataframe(self) -> pd.DataFrame:

63
backend/data_export/pipeline/factories.py

@ -4,6 +4,7 @@ from django.db.models import QuerySet
from . import writers
from .catalog import CSV, JSON, JSONL, FastText
from .comments import Comments
from .formatters import (
DictFormatter,
FastTextCategoryFormatter,
@ -63,14 +64,17 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
DOCUMENT_CLASSIFICATION: {
CSV.name: [
JoinedCategoryFormatter(Categories.column),
JoinedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
JSON.name: [
ListedCategoryFormatter(Categories.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
JSONL.name: [
ListedCategoryFormatter(Categories.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_text_classification),
],
FastText.name: [FastTextCategoryFormatter(Categories.column)],
@ -79,36 +83,75 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
JSONL.name: [
DictFormatter(Spans.column),
DictFormatter(Relations.column),
DictFormatter(Comments.column),
RenameFormatter(**mapper_relation_extraction),
]
if use_relation
else [TupledSpanFormatter(Spans.column), RenameFormatter(**mapper_sequence_labeling)]
else [
TupledSpanFormatter(Spans.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_sequence_labeling),
]
},
SEQ2SEQ: {
CSV.name: [JoinedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
JSON.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_seq2seq)],
CSV.name: [
JoinedCategoryFormatter(Texts.column),
JoinedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_seq2seq),
],
JSON.name: [
ListedCategoryFormatter(Texts.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_seq2seq),
],
JSONL.name: [
ListedCategoryFormatter(Texts.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_seq2seq),
],
},
IMAGE_CLASSIFICATION: {
JSONL.name: [
ListedCategoryFormatter(Categories.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_image_classification),
],
},
SPEECH2TEXT: {
JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_speech2text)],
JSONL.name: [
ListedCategoryFormatter(Texts.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_speech2text),
],
},
INTENT_DETECTION_AND_SLOT_FILLING: {
JSONL.name: [
ListedCategoryFormatter(Categories.column),
TupledSpanFormatter(Spans.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_intent_detection),
]
},
BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]},
SEGMENTATION: {JSONL.name: [DictFormatter(Segments.column), RenameFormatter(**mapper_segmentation)]},
BOUNDING_BOX: {
JSONL.name: [
DictFormatter(BoundingBoxes.column),
DictFormatter(Comments.column),
RenameFormatter(**mapper_bounding_box),
]
},
SEGMENTATION: {
JSONL.name: [
DictFormatter(Segments.column),
DictFormatter(Comments.column),
RenameFormatter(**mapper_segmentation),
]
},
IMAGE_CAPTIONING: {
JSONL.name: [ListedCategoryFormatter(Texts.column), RenameFormatter(**mapper_image_captioning)]
JSONL.name: [
ListedCategoryFormatter(Texts.column),
ListedCategoryFormatter(Comments.column),
RenameFormatter(**mapper_image_captioning),
]
},
}
return mapping[project.project_type][file_format]
@ -134,3 +177,7 @@ def create_labels(project: Project, examples: QuerySet[ExportedExample], user=No
label_collections = select_label_collection(project)
labels = [label_collection(examples=examples, user=user) for label_collection in label_collections]
return labels
def create_comment(examples: QuerySet[ExportedExample], user=None) -> List[Comments]:
return [Comments(examples=examples, user=user)]

7
backend/data_export/pipeline/formatters.py

@ -46,12 +46,15 @@ class FastTextCategoryFormatter(Formatter):
"""Format the label column to `__label__LabelA __label__LabelB` format.
Also, drop the columns except for `data` and `self.target_column`.
"""
dataset = dataset[[DATA, self.target_column]]
dataset = dataset[[DATA, self.target_column, "Comments"]]
dataset[self.target_column] = dataset[self.target_column].apply(
lambda labels: " ".join(sorted(f"__label__{label.to_string()}" for label in labels))
)
dataset[self.target_column] = dataset[self.target_column].fillna("")
dataset = dataset[self.target_column] + " " + dataset[DATA]
dataset["Comments"] = dataset["Comments"].apply(
lambda comments: " ".join(f"__comment__{comment.to_string()}" for comment in comments)
)
dataset = dataset[self.target_column] + " " + dataset[DATA] + " " + dataset["Comments"]
return dataset

8
backend/data_export/tests/test_dataset.py

@ -17,9 +17,13 @@ class TestDataset(unittest.TestCase):
label.find_by.return_value = {"labels": ["label"]}
self.labels = MagicMock()
self.labels.__iter__.return_value = [label]
comment = MagicMock()
comment.find_by.return_value = {"comments": ["comment"]}
self.comments = MagicMock()
self.comments.__iter__.return_value = [comment]
def test_to_dataframe(self):
dataset = Dataset(self.examples, self.labels)
dataset = Dataset(self.examples, self.labels, self.comments)
df = dataset.to_dataframe()
expected = pd.DataFrame([{"data": "example", "labels": ["label"]}])
expected = pd.DataFrame([{"data": "example", "labels": ["label"], "comments": ["comment"]}])
assert_frame_equal(df, expected)

13
backend/data_export/tests/test_formatters.py

@ -75,15 +75,20 @@ class TestTupledSpanFormatter(unittest.TestCase):
class TestFastTextFormatter(unittest.TestCase):
def setUp(self):
self.return_value = "Label"
self.return_value_label = "Label"
self.return_value_comment = "Comment"
label = MagicMock()
label.to_string.return_value = self.return_value
self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example"}])
comment = MagicMock()
label.to_string.return_value = self.return_value_label
comment.to_string.return_value = self.return_value_comment
self.dataset = pd.DataFrame([{TARGET_COLUMN: [label], DATA: "example", "Comments": [comment]}])
def test_format(self):
formatter = FastTextCategoryFormatter(TARGET_COLUMN)
dataset = formatter.format(self.dataset)
expected_dataset = pd.DataFrame([f"__label__{self.return_value} example"])
expected_dataset = pd.DataFrame(
[f"__label__{self.return_value_label} example __comment__{self.return_value_comment}"]
)
self.assertEqual(dataset.to_csv(index=False, header=None), expected_dataset.to_csv(index=False, header=None))

224
backend/data_export/tests/test_task.py

@ -64,6 +64,8 @@ class TestExportCategory(TestExport):
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2")
self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_text(self.example1)
self.data2 = self.data_to_text(self.example2)
@ -73,16 +75,16 @@ class TestExportCategory(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.category1.to_string()]},
{**self.data2, "label": []},
{**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "label": []},
{**self.data2, "label": []},
{**self.data1, "label": [], "Comments": []},
{**self.data2, "label": [], "Comments": []},
],
self.project.annotator.username: [
{**self.data1, "label": [self.category2.to_string()]},
{**self.data2, "label": []},
{**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -95,15 +97,20 @@ class TestExportCategory(TestExport):
{
**self.data1,
"label": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "label": []},
{**self.data2, "label": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
]
}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
@ -114,6 +121,7 @@ class TestExportCategory(TestExport):
{
**self.data1,
"label": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
}
]
self.assertEqual(dataset, expected_dataset)
@ -126,6 +134,8 @@ class TestExportSeq2seq(TestExport):
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_text(self.example1)
self.data2 = self.data_to_text(self.example2)
@ -135,16 +145,16 @@ class TestExportSeq2seq(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.text1.text]},
{**self.data2, "label": []},
{**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "label": []},
{**self.data2, "label": []},
{**self.data1, "label": [], "Comments": []},
{**self.data2, "label": [], "Comments": []},
],
self.project.annotator.username: [
{**self.data1, "label": [self.text2.text]},
{**self.data2, "label": []},
{**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -157,8 +167,9 @@ class TestExportSeq2seq(TestExport):
{
**self.data1,
"label": sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "label": []},
{**self.data2, "label": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -167,7 +178,7 @@ class TestExportSeq2seq(TestExport):
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.text1.text]},
{**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
],
self.project.approver.username: [],
self.project.annotator.username: [],
@ -182,6 +193,7 @@ class TestExportSeq2seq(TestExport):
{
**self.data1,
"label": sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
}
]
self.assertEqual(dataset, expected_dataset)
@ -194,6 +206,8 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
self.span = mommy.make(
"ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
)
@ -210,20 +224,22 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
**self.data1,
"entities": [list(self.span.to_tuple())],
"cats": [self.category1.to_string()],
"Comments": [self.comment1.to_string()],
},
{**self.data2, "entities": [], "cats": []},
{**self.data2, "entities": [], "cats": [], "Comments": []},
],
self.project.annotator.username: [
{
**self.data1,
"entities": [],
"cats": [self.category2.to_string()],
"Comments": [self.comment2.to_string()],
},
{**self.data2, "entities": [], "cats": []},
{**self.data2, "entities": [], "cats": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "entities": [], "cats": []},
{**self.data2, "entities": [], "cats": []},
{**self.data1, "entities": [], "cats": [], "Comments": []},
{**self.data2, "entities": [], "cats": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -237,8 +253,9 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
**self.data1,
"entities": [list(self.span.to_tuple())],
"cats": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "entities": [], "cats": []},
{**self.data2, "entities": [], "cats": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -251,6 +268,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
**self.data1,
"entities": [list(self.span.to_tuple())],
"cats": [self.category1.to_string()],
"Comments": [self.comment1.to_string()],
},
],
self.project.annotator.username: [],
@ -267,6 +285,7 @@ class TestExportIntentDetectionAndSlotFilling(TestExport):
**self.data1,
"entities": [list(self.span.to_tuple())],
"cats": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
]
self.assertEqual(dataset, expected_dataset)
@ -284,6 +303,8 @@ class TestExportSequenceLabeling(TestExport):
)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
self.data1 = self.data_to_text(self.example1)
self.data2 = self.data_to_text(self.example2)
@ -292,16 +313,16 @@ class TestExportSequenceLabeling(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [list(self.span1.to_tuple())]},
{**self.data2, "label": []},
{**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.annotator.username: [
{**self.data1, "label": [list(self.span2.to_tuple())]},
{**self.data2, "label": []},
{**self.data1, "label": [list(self.span2.to_tuple())], "Comments": [self.comment2.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "label": []},
{**self.data2, "label": []},
{**self.data1, "label": [], "Comments": []},
{**self.data2, "label": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -314,8 +335,9 @@ class TestExportSequenceLabeling(TestExport):
{
**self.data1,
"label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "label": []},
{**self.data2, "label": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -324,7 +346,7 @@ class TestExportSequenceLabeling(TestExport):
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [list(self.span1.to_tuple())]},
{**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
],
self.project.annotator.username: [],
self.project.approver.username: [],
@ -339,6 +361,7 @@ class TestExportSequenceLabeling(TestExport):
{
**self.data1,
"label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
]
self.assertEqual(dataset, expected_dataset)
@ -351,6 +374,8 @@ class TestExportSpeechToText(TestExport):
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_filename(self.example1)
self.data2 = self.data_to_filename(self.example2)
@ -360,16 +385,16 @@ class TestExportSpeechToText(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.text1.text]},
{**self.data2, "label": []},
{**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "label": []},
{**self.data2, "label": []},
{**self.data1, "label": [], "Comments": []},
{**self.data2, "label": [], "Comments": []},
],
self.project.annotator.username: [
{**self.data1, "label": [self.text2.text]},
{**self.data2, "label": []},
{**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -382,8 +407,9 @@ class TestExportSpeechToText(TestExport):
{
**self.data1,
"label": sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "label": []},
{**self.data2, "label": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -392,7 +418,7 @@ class TestExportSpeechToText(TestExport):
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.text1.text]},
{**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
],
self.project.annotator.username: [],
self.project.approver.username: [],
@ -407,6 +433,7 @@ class TestExportSpeechToText(TestExport):
{
**self.data1,
"label": sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
}
]
self.assertEqual(dataset, expected_dataset)
@ -419,6 +446,8 @@ class TestExportImageClassification(TestExport):
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_filename(self.example1)
self.data2 = self.data_to_filename(self.example2)
@ -428,22 +457,16 @@ class TestExportImageClassification(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{
**self.data1,
"label": [self.category1.to_string()],
},
{**self.data2, "label": []},
{**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "label": []},
{**self.data2, "label": []},
{**self.data1, "label": [], "Comments": []},
{**self.data2, "label": [], "Comments": []},
],
self.project.annotator.username: [
{
**self.data1,
"label": [self.category2.to_string()],
},
{**self.data2, "label": []},
{**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
{**self.data2, "label": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -456,15 +479,20 @@ class TestExportImageClassification(TestExport):
{
**self.data1,
"label": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, "label": []},
{**self.data2, "label": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
expected_datasets = {
self.project.admin.username: [
{**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
]
}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
@ -475,6 +503,7 @@ class TestExportImageClassification(TestExport):
{
**self.data1,
"label": sorted([self.category1.to_string(), self.category2.to_string()]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
}
]
self.assertEqual(dataset, expected_dataset)
@ -485,6 +514,8 @@ class TestExportBoundingBox(TestExport):
self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative)
self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
self.bbox1 = mommy.make(
"ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
)
@ -503,19 +534,17 @@ class TestExportBoundingBox(TestExport):
{
**self.data1,
"bbox": [self.bbox1.to_dict()],
"Comments": [self.comment1.to_dict()],
},
{**self.data2, "bbox": []},
{**self.data2, "bbox": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "bbox": []},
{**self.data2, "bbox": []},
{**self.data1, "bbox": [], "Comments": []},
{**self.data2, "bbox": [], "Comments": []},
],
self.project.annotator.username: [
{
**self.data1,
"bbox": [self.bbox2.to_dict()],
},
{**self.data2, "bbox": []},
{**self.data1, "bbox": [self.bbox2.to_dict()], "Comments": [self.comment2.to_dict()]},
{**self.data2, "bbox": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -528,15 +557,20 @@ class TestExportBoundingBox(TestExport):
{
**self.data1,
"bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
},
{**self.data2, "bbox": []},
{**self.data2, "bbox": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]}
expected_datasets = {
self.project.admin.username: [
{**self.data1, "bbox": [self.bbox1.to_dict()], "Comments": [self.comment1.to_dict()]}
]
}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
@ -547,6 +581,7 @@ class TestExportBoundingBox(TestExport):
{
**self.data1,
"bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
}
]
self.assertEqual(dataset, expected_dataset)
@ -557,6 +592,8 @@ class TestExportSegmentation(TestExport):
self.project = prepare_project(SEGMENTATION, collaborative_annotation=collaborative)
self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1])
self.seg2 = mommy.make(
"ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2]
@ -571,22 +608,16 @@ class TestExportSegmentation(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{
**self.data1,
self.column: [self.seg1.to_dict()],
},
{**self.data2, self.column: []},
{**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]},
{**self.data2, self.column: [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, self.column: []},
{**self.data2, self.column: []},
{**self.data1, self.column: [], "Comments": []},
{**self.data2, self.column: [], "Comments": []},
],
self.project.annotator.username: [
{
**self.data1,
self.column: [self.seg2.to_dict()],
},
{**self.data2, self.column: []},
{**self.data1, self.column: [self.seg2.to_dict()], "Comments": [self.comment2.to_dict()]},
{**self.data2, self.column: [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -599,15 +630,20 @@ class TestExportSegmentation(TestExport):
{
**self.data1,
self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
},
{**self.data2, self.column: []},
{**self.data2, self.column: [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {self.project.admin.username: [{**self.data1, self.column: [self.seg1.to_dict()]}]}
expected_datasets = {
self.project.admin.username: [
{**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]}
]
}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
@ -618,6 +654,7 @@ class TestExportSegmentation(TestExport):
{
**self.data1,
self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
}
]
self.assertEqual(dataset, expected_dataset)
@ -628,6 +665,8 @@ class TestExportImageCaptioning(TestExport):
self.project = prepare_project(IMAGE_CAPTIONING, collaborative_annotation=collaborative)
self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
@ -640,16 +679,16 @@ class TestExportImageCaptioning(TestExport):
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{**self.data1, self.column: [self.text1.text]},
{**self.data2, self.column: []},
{**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
{**self.data2, self.column: [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, self.column: []},
{**self.data2, self.column: []},
{**self.data1, self.column: [], "Comments": []},
{**self.data2, self.column: [], "Comments": []},
],
self.project.annotator.username: [
{**self.data1, self.column: [self.text2.text]},
{**self.data2, self.column: []},
{**self.data1, self.column: [self.text2.text], "Comments": [self.comment2.to_string()]},
{**self.data2, self.column: [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -662,8 +701,9 @@ class TestExportImageCaptioning(TestExport):
{
**self.data1,
self.column: sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
},
{**self.data2, self.column: []},
{**self.data2, self.column: [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -672,7 +712,7 @@ class TestExportImageCaptioning(TestExport):
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {
self.project.admin.username: [
{**self.data1, self.column: [self.text1.text]},
{**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
],
self.project.approver.username: [],
self.project.annotator.username: [],
@ -687,6 +727,7 @@ class TestExportImageCaptioning(TestExport):
{
**self.data1,
self.column: sorted([self.text1.text, self.text2.text]),
"Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
}
]
self.assertEqual(dataset, expected_dataset)
@ -709,6 +750,8 @@ class TestExportRelation(TestExport):
self.relation = mommy.make(
"ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
)
self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_text(self.example1)
self.data2 = self.data_to_text(self.example2)
@ -722,20 +765,22 @@ class TestExportRelation(TestExport):
**self.data1,
"entities": [self.span1.to_dict(), self.span2.to_dict()],
"relations": [self.relation.to_dict()],
"Comments": [self.comment1.to_dict()],
},
{**self.data2, "entities": [], "relations": []},
{**self.data2, "entities": [], "relations": [], "Comments": []},
],
self.project.annotator.username: [
{
**self.data1,
"entities": [self.span3.to_dict()],
"relations": [],
"Comments": [self.comment2.to_dict()],
},
{**self.data2, "entities": [], "relations": []},
{**self.data2, "entities": [], "relations": [], "Comments": []},
],
self.project.approver.username: [
{**self.data1, "entities": [], "relations": []},
{**self.data2, "entities": [], "relations": []},
{**self.data1, "entities": [], "relations": [], "Comments": []},
{**self.data2, "entities": [], "relations": [], "Comments": []},
],
}
for username, dataset in expected_datasets.items():
@ -749,8 +794,9 @@ class TestExportRelation(TestExport):
**self.data1,
"entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
"relations": [self.relation.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
},
{**self.data2, "entities": [], "relations": []},
{**self.data2, "entities": [], "relations": [], "Comments": []},
]
self.assertEqual(dataset, expected_dataset)
@ -763,6 +809,7 @@ class TestExportRelation(TestExport):
**self.data1,
"entities": [self.span1.to_dict(), self.span2.to_dict()],
"relations": [self.relation.to_dict()],
"Comments": [self.comment1.to_dict()],
},
],
self.project.annotator.username: [],
@ -779,6 +826,7 @@ class TestExportRelation(TestExport):
**self.data1,
"entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
"relations": [self.relation.to_dict()],
"Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
}
]
self.assertEqual(dataset, expected_dataset)
Loading…
Cancel
Save