Support data export for segmentation

2 years ago · 668abd2b47
6 changed files with 58 additions and 2 deletions
--- a/backend/data_export/migrations/0003_exportedsegmentation.py
+++ b/backend/data_export/migrations/0003_exportedsegmentation.py
@ -0,0 +1,24 @@
+# Generated by Django 4.0.4 on 2022-06-30 06:57
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("labels", "0016_segmentation"),
+        ("data_export", "0002_exportedboundingbox"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="ExportedSegmentation",
+            fields=[],
+            options={
+                "proxy": True,
+                "indexes": [],
+                "constraints": [],
+            },
+            bases=("labels.segmentation",),
+        ),
+    ]
--- a/backend/data_export/models.py
+++ b/backend/data_export/models.py
@ -3,7 +3,7 @@ from typing import Any, Dict, Protocol, Tuple
 from django.db import models

 from examples.models import Example
-from labels.models import BoundingBox, Category, Relation, Span, TextLabel
+from labels.models import BoundingBox, Category, Relation, Segmentation, Span, TextLabel
 from projects.models import Project

 DATA = "data"
@ -90,3 +90,11 @@ class ExportedBoundingBox(BoundingBox):

    class Meta:
        proxy = True
+
+
+class ExportedSegmentation(Segmentation):
+    def to_dict(self):
+        return {"uuid": str(self.uuid), "points": self.points}
+
+    class Meta:
+        proxy = True
--- a/backend/data_export/pipeline/catalog.py
+++ b/backend/data_export/pipeline/catalog.py
@ -7,6 +7,7 @@ from projects.models import (
    DOCUMENT_CLASSIFICATION,
    IMAGE_CLASSIFICATION,
    INTENT_DETECTION_AND_SLOT_FILLING,
+    SEGMENTATION,
    SEQ2SEQ,
    SEQUENCE_LABELING,
    SPEECH2TEXT,
@ -94,6 +95,9 @@ Options.register(IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "exampl
 BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box"
 Options.register(BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl")

+SEGMENTATION_DIR = EXAMPLE_DIR / "segmentation"
+Options.register(SEGMENTATION, JSONL, SEGMENTATION_DIR / "example.jsonl")
+
 # Speech to Text
 SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
 Options.register(SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl")
--- a/backend/data_export/pipeline/examples/segmentation/example.jsonl
+++ b/backend/data_export/pipeline/examples/segmentation/example.jsonl
@ -0,0 +1,9 @@
+{
+  "filename": "cat.png",
+  "segmentation": [
+    {
+      "points": [38, 81, 32, 78, 47, 82],
+      "label": "cat"
+    }
+  ],
+}
--- a/backend/data_export/pipeline/factories.py
+++ b/backend/data_export/pipeline/factories.py
@ -13,13 +13,14 @@ from .formatters import (
    RenameFormatter,
    TupledSpanFormatter,
 )
-from .labels import BoundingBoxes, Categories, Labels, Relations, Spans, Texts
+from .labels import BoundingBoxes, Categories, Labels, Relations, Segments, Spans, Texts
 from data_export.models import DATA, ExportedExample
 from projects.models import (
    BOUNDING_BOX,
    DOCUMENT_CLASSIFICATION,
    IMAGE_CLASSIFICATION,
    INTENT_DETECTION_AND_SLOT_FILLING,
+    SEGMENTATION,
    SEQ2SEQ,
    SEQUENCE_LABELING,
    SPEECH2TEXT,
@ -46,6 +47,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
    mapper_seq2seq = {DATA: "text", Texts.column: "label"}
    mapper_image_classification = {DATA: "filename", Categories.column: "label"}
    mapper_bounding_box = {DATA: "filename", BoundingBoxes.column: "bbox"}
+    mapper_segmentation = {DATA: "filename", BoundingBoxes.column: "segmentation"}
    mapper_speech2text = {DATA: "filename", Texts.column: "label"}
    mapper_intent_detection = {DATA: "text", Categories.column: "cats"}
    mapper_relation_extraction = {DATA: "text"}
@ -96,6 +98,7 @@ def create_formatter(project: Project, file_format: str) -> List[Formatter]:
            ]
        },
        BOUNDING_BOX: {JSONL.name: [DictFormatter(BoundingBoxes.column), RenameFormatter(**mapper_bounding_box)]},
+        SEGMENTATION: {JSONL.name: [DictFormatter(Segments.column), RenameFormatter(**mapper_segmentation)]},
    }
    return mapping[project.project_type][file_format]

@ -110,6 +113,7 @@ def select_label_collection(project: Project) -> List[Type[Labels]]:
        SPEECH2TEXT: [Texts],
        INTENT_DETECTION_AND_SLOT_FILLING: [Categories, Spans],
        BOUNDING_BOX: [BoundingBoxes],
+        SEGMENTATION: [Segments],
    }
    return mapping[project.project_type]

--- a/backend/data_export/pipeline/labels.py
+++ b/backend/data_export/pipeline/labels.py
@ -13,6 +13,7 @@ from data_export.models import (
    ExportedExample,
    ExportedLabel,
    ExportedRelation,
+    ExportedSegmentation,
    ExportedSpan,
    ExportedText,
 )
@ -63,3 +64,9 @@ class BoundingBoxes(Labels):
    label_class = ExportedBoundingBox
    column = "labels"
    fields = ("example", "label")
+
+
+class Segments(Labels):
+    label_class = ExportedSegmentation
+    column = "labels"
+    fields = ("example", "label")