Browse Source

Support data export for bounding box

pull/1899/head
Hironsan 2 years ago
parent
commit
84f0e774a2
8 changed files with 141 additions and 2 deletions
  1. 25
      backend/data_export/migrations/0002_exportedboundingbox.py
  2. 13
      backend/data_export/models.py
  3. 4
      backend/data_export/pipeline/catalog.py
  4. 13
      backend/data_export/pipeline/examples/bounding_box/example.jsonl
  5. 6
      backend/data_export/pipeline/factories.py
  6. 7
      backend/data_export/pipeline/labels.py
  7. 2
      backend/data_export/tests/test_catalog.py
  8. 73
      backend/data_export/tests/test_task.py

25
backend/data_export/migrations/0002_exportedboundingbox.py

@ -0,0 +1,25 @@
# Generated by Django 4.0.4 on 2022-06-30 02:44
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('labels', '0015_create_boundingbox_table'),
('data_export', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='ExportedBoundingBox',
fields=[
],
options={
'proxy': True,
'indexes': [],
'constraints': [],
},
bases=('labels.boundingbox',),
),
]

13
backend/data_export/models.py

@ -3,7 +3,7 @@ from typing import Any, Dict, Protocol, Tuple
from django.db import models
from examples.models import Example
from labels.models import Category, Relation, Span, TextLabel
from labels.models import BoundingBox, Category, Relation, Span, TextLabel
from projects.models import Project
DATA = "data"
@ -79,3 +79,14 @@ class ExportedText(TextLabel):
class Meta:
proxy = True
class ExportedBoundingBox(BoundingBox):
def to_dict(self):
return {"uuid": str(self.uuid), "x": self.x, "y": self.y, "width": self.width, "height": self.height}
def to_tuple(self):
return self.x, self.y, self.width, self.height
class Meta:
proxy = True

4
backend/data_export/pipeline/catalog.py

@ -3,6 +3,7 @@ from pathlib import Path
from typing import Dict, List, Type
from projects.models import (
BOUNDING_BOX,
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING,
@ -90,6 +91,9 @@ Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, INTENT_DETECTION_DIR
IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
Options.register(IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "example.jsonl")
BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box"
Options.register(BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl")
# Speech to Text
SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
Options.register(SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl")

13
backend/data_export/pipeline/examples/bounding_box/example.jsonl

@ -0,0 +1,13 @@
{
"filename": "cat.png",
"bbox": [
{
"x": 11,
"y": 12,
"width": 64,
"height": 66,
"label": "cat"
}
],
}

6
backend/data_export/pipeline/factories.py

@ -13,9 +13,10 @@ from .formatters import (
RenameFormatter,
TupledSpanFormatter,
)
from .labels import Categories, Labels, Relations, Spans, Texts
from .labels import BoundingBoxes, Categories, Labels, Relations, Spans, Texts
from data_export.models import DATA, ExportedExample
from projects.models import (
BOUNDING_BOX,
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING,
@ -44,6 +45,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
mapper_sequence_labeling = {DATA: "text", Spans.column: "label"}
mapper_seq2seq = {DATA: "text", Texts.column: "label"}
mapper_image_classification = {DATA: "filename", Categories.column: "label"}
mapper_bounding_box = {DATA: "filename", BoundingBoxes.column: "bbox"}
mapper_speech2text = {DATA: "filename", Texts.column: "label"}
mapper_intent_detection = {DATA: "text", Categories.column: "cats"}
mapper_relation_extraction = {DATA: "text"}
@ -93,6 +95,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
RenameFormatter(**mapper_intent_detection),
]
},
BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]},
}
return mapping[project.project_type][file_format]
@ -106,6 +109,7 @@ def select_label_collection(project: Project) -> List[Type[Labels]]:
IMAGE_CLASSIFICATION: [Categories],
SPEECH2TEXT: [Texts],
INTENT_DETECTION_AND_SLOT_FILLING: [Categories, Spans],
BOUNDING_BOX: [BoundingBoxes],
}
return mapping[project.project_type]

7
backend/data_export/pipeline/labels.py

@ -8,6 +8,7 @@ from typing import Dict, List, Tuple
from django.db.models import QuerySet
from data_export.models import (
ExportedBoundingBox,
ExportedCategory,
ExportedExample,
ExportedLabel,
@ -56,3 +57,9 @@ class Texts(Labels):
label_class = ExportedText
column = "labels"
fields = ("example",)
class BoundingBoxes(Labels):
label_class = ExportedBoundingBox
column = "labels"
fields = ("example", "label")

2
backend/data_export/tests/test_catalog.py

@ -2,6 +2,7 @@ import unittest
from ..pipeline.catalog import Options
from projects.models import (
BOUNDING_BOX,
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING,
@ -14,6 +15,7 @@ from projects.models import (
class TestOptions(unittest.TestCase):
def test_return_at_least_one_option(self):
tasks = [
BOUNDING_BOX,
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING,

73
backend/data_export/tests/test_task.py

@ -8,6 +8,7 @@ from model_mommy import mommy
from ..celery_tasks import export_dataset
from data_export.models import DATA
from projects.models import (
BOUNDING_BOX,
DOCUMENT_CLASSIFICATION,
IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING,
@ -477,6 +478,78 @@ class TestExportImageClassification(TestExport):
self.assertEqual(dataset, expected_dataset)
class TestExportBoundingBox(TestExport):
def prepare_data(self, collaborative=False):
self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative)
self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
self.bbox1 = mommy.make(
"ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
)
self.bbox2 = mommy.make(
"ExportedBoundingBox", example=self.example1, user=self.project.annotator, x=10, y=10, width=20, height=20
)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
self.data1 = self.data_to_filename(self.example1)
self.data2 = self.data_to_filename(self.example2)
def test_unconfirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset()
expected_datasets = {
self.project.admin.username: [
{
**self.data1,
"bbox": [self.bbox1.to_dict()],
},
{**self.data2, "bbox": []},
],
self.project.approver.username: [
{**self.data1, "bbox": []},
{**self.data2, "bbox": []},
],
self.project.annotator.username: [
{
**self.data1,
"bbox": [self.bbox2.to_dict()],
},
{**self.data2, "bbox": []},
],
}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
def test_unconfirmed_and_collaborative(self):
self.prepare_data(collaborative=True)
dataset = self.export_dataset()
expected_dataset = [
{
**self.data1,
"bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
},
{**self.data2, "bbox": []},
]
self.assertEqual(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
datasets = self.export_dataset(confirmed_only=True)
expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]}
for username, dataset in expected_datasets.items():
self.assertEqual(datasets[username], dataset)
def test_confirmed_and_collaborative(self):
self.prepare_data(collaborative=True)
dataset = self.export_dataset(confirmed_only=True)
expected_dataset = [
{
**self.data1,
"bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
}
]
self.assertEqual(dataset, expected_dataset)
class TestExportRelation(TestExport):
def prepare_data(self, collaborative=False):
self.project = prepare_project(SEQUENCE_LABELING, use_relation=True, collaborative_annotation=collaborative)

Loading…
Cancel
Save