diff --git a/backend/data_export/migrations/0002_exportedboundingbox.py b/backend/data_export/migrations/0002_exportedboundingbox.py new file mode 100644 index 00000000..b4da4347 --- /dev/null +++ b/backend/data_export/migrations/0002_exportedboundingbox.py @@ -0,0 +1,25 @@ +# Generated by Django 4.0.4 on 2022-06-30 02:44 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('labels', '0015_create_boundingbox_table'), + ('data_export', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='ExportedBoundingBox', + fields=[ + ], + options={ + 'proxy': True, + 'indexes': [], + 'constraints': [], + }, + bases=('labels.boundingbox',), + ), + ] diff --git a/backend/data_export/models.py b/backend/data_export/models.py index f7492dfc..ff8202ac 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Protocol, Tuple from django.db import models from examples.models import Example -from labels.models import Category, Relation, Span, TextLabel +from labels.models import BoundingBox, Category, Relation, Span, TextLabel from projects.models import Project DATA = "data" @@ -79,3 +79,14 @@ class ExportedText(TextLabel): class Meta: proxy = True + + +class ExportedBoundingBox(BoundingBox): + def to_dict(self): + return {"uuid": str(self.uuid), "x": self.x, "y": self.y, "width": self.width, "height": self.height} + + def to_tuple(self): + return self.x, self.y, self.width, self.height + + class Meta: + proxy = True diff --git a/backend/data_export/pipeline/catalog.py b/backend/data_export/pipeline/catalog.py index 74bbe831..cb4b5904 100644 --- a/backend/data_export/pipeline/catalog.py +++ b/backend/data_export/pipeline/catalog.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Dict, List, Type from projects.models import ( + BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, @@ -90,6 +91,9 @@ Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, INTENT_DETECTION_DIR IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification" Options.register(IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "example.jsonl") +BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box" +Options.register(BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl") + # Speech to Text SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text" Options.register(SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl") diff --git a/backend/data_export/pipeline/examples/bounding_box/example.jsonl b/backend/data_export/pipeline/examples/bounding_box/example.jsonl new file mode 100644 index 00000000..b928f732 --- /dev/null +++ b/backend/data_export/pipeline/examples/bounding_box/example.jsonl @@ -0,0 +1,13 @@ +{ + "filename": "cat.png", + "bbox": [ + { + "x": 11, + "y": 12, + "width": 64, + "height": 66, + "label": "cat" + } + ], + +} diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index 76f3bc7b..4b0dee53 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -13,9 +13,10 @@ from .formatters import ( RenameFormatter, TupledSpanFormatter, ) -from .labels import Categories, Labels, Relations, Spans, Texts +from .labels import BoundingBoxes, Categories, Labels, Relations, Spans, Texts from data_export.models import DATA, ExportedExample from projects.models import ( + BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, @@ -44,6 +45,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: mapper_sequence_labeling = {DATA: "text", Spans.column: "label"} mapper_seq2seq = {DATA: "text", Texts.column: "label"} mapper_image_classification = {DATA: "filename", Categories.column: "label"} + mapper_bounding_box = {DATA: "filename", BoundingBoxes.column: "bbox"} mapper_speech2text = {DATA: "filename", Texts.column: "label"} mapper_intent_detection = {DATA: "text", Categories.column: "cats"} mapper_relation_extraction = {DATA: "text"} @@ -93,6 +95,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: RenameFormatter(**mapper_intent_detection), ] }, + BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]}, } return mapping[project.project_type][file_format] @@ -106,6 +109,7 @@ def select_label_collection(project: Project) -> List[Type[Labels]]: IMAGE_CLASSIFICATION: [Categories], SPEECH2TEXT: [Texts], INTENT_DETECTION_AND_SLOT_FILLING: [Categories, Spans], + BOUNDING_BOX: [BoundingBoxes], } return mapping[project.project_type] diff --git a/backend/data_export/pipeline/labels.py b/backend/data_export/pipeline/labels.py index 156f0d62..976cc161 100644 --- a/backend/data_export/pipeline/labels.py +++ b/backend/data_export/pipeline/labels.py @@ -8,6 +8,7 @@ from typing import Dict, List, Tuple from django.db.models import QuerySet from data_export.models import ( + ExportedBoundingBox, ExportedCategory, ExportedExample, ExportedLabel, @@ -56,3 +57,9 @@ class Texts(Labels): label_class = ExportedText column = "labels" fields = ("example",) + + +class BoundingBoxes(Labels): + label_class = ExportedBoundingBox + column = "labels" + fields = ("example", "label") diff --git a/backend/data_export/tests/test_catalog.py b/backend/data_export/tests/test_catalog.py index dfcf7a0d..83c0df63 100644 --- a/backend/data_export/tests/test_catalog.py +++ b/backend/data_export/tests/test_catalog.py @@ -2,6 +2,7 @@ import unittest from ..pipeline.catalog import Options from projects.models import ( + BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, @@ -14,6 +15,7 @@ from projects.models import ( class TestOptions(unittest.TestCase): def test_return_at_least_one_option(self): tasks = [ + BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, diff --git a/backend/data_export/tests/test_task.py b/backend/data_export/tests/test_task.py index d1d11976..225ed06f 100644 --- a/backend/data_export/tests/test_task.py +++ b/backend/data_export/tests/test_task.py @@ -8,6 +8,7 @@ from model_mommy import mommy from ..celery_tasks import export_dataset from data_export.models import DATA from projects.models import ( + BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, @@ -477,6 +478,78 @@ class TestExportImageClassification(TestExport): self.assertEqual(dataset, expected_dataset) +class TestExportBoundingBox(TestExport): + def prepare_data(self, collaborative=False): + self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative) + self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed") + self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed") + self.bbox1 = mommy.make( + "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10 + ) + self.bbox2 = mommy.make( + "ExportedBoundingBox", example=self.example1, user=self.project.annotator, x=10, y=10, width=20, height=20 + ) + mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin) + self.data1 = self.data_to_filename(self.example1) + self.data2 = self.data_to_filename(self.example2) + + def test_unconfirmed_and_non_collaborative(self): + self.prepare_data() + datasets = self.export_dataset() + expected_datasets = { + self.project.admin.username: [ + { + **self.data1, + "bbox": [self.bbox1.to_dict()], + }, + {**self.data2, "bbox": []}, + ], + self.project.approver.username: [ + {**self.data1, "bbox": []}, + {**self.data2, "bbox": []}, + ], + self.project.annotator.username: [ + { + **self.data1, + "bbox": [self.bbox2.to_dict()], + }, + {**self.data2, "bbox": []}, + ], + } + for username, dataset in expected_datasets.items(): + self.assertEqual(datasets[username], dataset) + + def test_unconfirmed_and_collaborative(self): + self.prepare_data(collaborative=True) + dataset = self.export_dataset() + expected_dataset = [ + { + **self.data1, + "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()], + }, + {**self.data2, "bbox": []}, + ] + self.assertEqual(dataset, expected_dataset) + + def test_confirmed_and_non_collaborative(self): + self.prepare_data() + datasets = self.export_dataset(confirmed_only=True) + expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]} + for username, dataset in expected_datasets.items(): + self.assertEqual(datasets[username], dataset) + + def test_confirmed_and_collaborative(self): + self.prepare_data(collaborative=True) + dataset = self.export_dataset(confirmed_only=True) + expected_dataset = [ + { + **self.data1, + "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()], + } + ] + self.assertEqual(dataset, expected_dataset) + + class TestExportRelation(TestExport): def prepare_data(self, collaborative=False): self.project = prepare_project(SEQUENCE_LABELING, use_relation=True, collaborative_annotation=collaborative)