From 668abd2b478a32950ac9030f5f6621612a42372c Mon Sep 17 00:00:00 2001 From: Hironsan Date: Thu, 30 Jun 2022 16:13:41 +0900 Subject: [PATCH] Support data export for segmentation --- .../migrations/0003_exportedsegmentation.py | 24 +++++++++++++++++++ backend/data_export/models.py | 10 +++++++- backend/data_export/pipeline/catalog.py | 4 ++++ .../examples/segmentation/example.jsonl | 9 +++++++ backend/data_export/pipeline/factories.py | 6 ++++- backend/data_export/pipeline/labels.py | 7 ++++++ 6 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 backend/data_export/migrations/0003_exportedsegmentation.py create mode 100644 backend/data_export/pipeline/examples/segmentation/example.jsonl diff --git a/backend/data_export/migrations/0003_exportedsegmentation.py b/backend/data_export/migrations/0003_exportedsegmentation.py new file mode 100644 index 00000000..caeed6ef --- /dev/null +++ b/backend/data_export/migrations/0003_exportedsegmentation.py @@ -0,0 +1,24 @@ +# Generated by Django 4.0.4 on 2022-06-30 06:57 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("labels", "0016_segmentation"), + ("data_export", "0002_exportedboundingbox"), + ] + + operations = [ + migrations.CreateModel( + name="ExportedSegmentation", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("labels.segmentation",), + ), + ] diff --git a/backend/data_export/models.py b/backend/data_export/models.py index ff8202ac..0e5703eb 100644 --- a/backend/data_export/models.py +++ b/backend/data_export/models.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Protocol, Tuple from django.db import models from examples.models import Example -from labels.models import BoundingBox, Category, Relation, Span, TextLabel +from labels.models import BoundingBox, Category, Relation, Segmentation, Span, TextLabel from projects.models import Project DATA = "data" @@ -90,3 +90,11 @@ class ExportedBoundingBox(BoundingBox): class Meta: proxy = True + + +class ExportedSegmentation(Segmentation): + def to_dict(self): + return {"uuid": str(self.uuid), "points": self.points} + + class Meta: + proxy = True diff --git a/backend/data_export/pipeline/catalog.py b/backend/data_export/pipeline/catalog.py index cb4b5904..5017a5c7 100644 --- a/backend/data_export/pipeline/catalog.py +++ b/backend/data_export/pipeline/catalog.py @@ -7,6 +7,7 @@ from projects.models import ( DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, + SEGMENTATION, SEQ2SEQ, SEQUENCE_LABELING, SPEECH2TEXT, @@ -94,6 +95,9 @@ Options.register(IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "exampl BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box" Options.register(BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl") +SEGMENTATION_DIR = EXAMPLE_DIR / "segmentation" +Options.register(SEGMENTATION, JSONL, SEGMENTATION_DIR / "example.jsonl") + # Speech to Text SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text" Options.register(SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl") diff --git a/backend/data_export/pipeline/examples/segmentation/example.jsonl b/backend/data_export/pipeline/examples/segmentation/example.jsonl new file mode 100644 index 00000000..fb7d92c8 --- /dev/null +++ b/backend/data_export/pipeline/examples/segmentation/example.jsonl @@ -0,0 +1,9 @@ +{ + "filename": "cat.png", + "segmentation": [ + { + "points": [38, 81, 32, 78, 47, 82], + "label": "cat" + } + ], +} diff --git a/backend/data_export/pipeline/factories.py b/backend/data_export/pipeline/factories.py index 4b0dee53..42a3efe7 100644 --- a/backend/data_export/pipeline/factories.py +++ b/backend/data_export/pipeline/factories.py @@ -13,13 +13,14 @@ from .formatters import ( RenameFormatter, TupledSpanFormatter, ) -from .labels import BoundingBoxes, Categories, Labels, Relations, Spans, Texts +from .labels import BoundingBoxes, Categories, Labels, Relations, Segments, Spans, Texts from data_export.models import DATA, ExportedExample from projects.models import ( BOUNDING_BOX, DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, + SEGMENTATION, SEQ2SEQ, SEQUENCE_LABELING, SPEECH2TEXT, @@ -46,6 +47,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: mapper_seq2seq = {DATA: "text", Texts.column: "label"} mapper_image_classification = {DATA: "filename", Categories.column: "label"} mapper_bounding_box = {DATA: "filename", BoundingBoxes.column: "bbox"} + mapper_segmentation = {DATA: "filename", BoundingBoxes.column: "segmentation"} mapper_speech2text = {DATA: "filename", Texts.column: "label"} mapper_intent_detection = {DATA: "text", Categories.column: "cats"} mapper_relation_extraction = {DATA: "text"} @@ -96,6 +98,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]: ] }, BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]}, + SEGMENTATION: {JSONL.name: [DictFormatter(Segments.column), RenameFormatter(**mapper_segmentation)]}, } return mapping[project.project_type][file_format] @@ -110,6 +113,7 @@ def select_label_collection(project: Project) -> List[Type[Labels]]: SPEECH2TEXT: [Texts], INTENT_DETECTION_AND_SLOT_FILLING: [Categories, Spans], BOUNDING_BOX: [BoundingBoxes], + SEGMENTATION: [Segments], } return mapping[project.project_type] diff --git a/backend/data_export/pipeline/labels.py b/backend/data_export/pipeline/labels.py index 976cc161..af35ea13 100644 --- a/backend/data_export/pipeline/labels.py +++ b/backend/data_export/pipeline/labels.py @@ -13,6 +13,7 @@ from data_export.models import ( ExportedExample, ExportedLabel, ExportedRelation, + ExportedSegmentation, ExportedSpan, ExportedText, ) @@ -63,3 +64,9 @@ class BoundingBoxes(Labels): label_class = ExportedBoundingBox column = "labels" fields = ("example", "label") + + +class Segments(Labels): + label_class = ExportedSegmentation + column = "labels" + fields = ("example", "label")