doccano/backend/data_export/pipeline/catalog.py


								from collections import defaultdict

								from typing import Dict, List, Type


								from pydantic import BaseModel

								from typing_extensions import Literal


								from . import examples

								from projects.models import (

								    DOCUMENT_CLASSIFICATION,

								    IMAGE_CLASSIFICATION,

								    INTENT_DETECTION_AND_SLOT_FILLING,

								    SEQ2SEQ,

								    SEQUENCE_LABELING,

								    SPEECH2TEXT,

								)


								class Format:

								    name = ""


								    @classmethod

								    def dict(cls):

								        return {

								            "name": cls.name,

								        }


								class CSV(Format):

								    name = "CSV"

								    extension = "csv"


								class FastText(Format):

								    name = "fastText"

								    extension = "txt"


								class JSON(Format):

								    name = "JSON"

								    extension = "json"


								class JSONL(Format):

								    name = "JSONL"

								    extension = "jsonl"


								class IntentAndSlot(Format):

								    name = "JSONL(intent and slot)"

								    extension = "jsonl"


								class JSONLRelation(Format):

								    name = "JSONL(relation)"

								    extension = "jsonl"


								class OptionDelimiter(BaseModel):

								    delimiter: Literal[",", "\t", ";", "|", " "] = ","


								class OptionNone(BaseModel):

								    pass


								class Options:

								    options: Dict[str, List] = defaultdict(list)


								    @classmethod

								    def filter_by_task(cls, task_name: str):

								        options = cls.options[task_name]

								        return [

								            {**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options

								        ]


								    @classmethod

								    def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], example: str):

								        cls.options[task].append((file_format, option, example))


								# Text Classification

								Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)

								Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)

								Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)

								Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)


								# Sequence Labeling

								Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)

								Options.register(SEQUENCE_LABELING, JSONLRelation, OptionNone, examples.ENTITY_AND_RELATION_JSONL)


								# Sequence to sequence

								Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)

								Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)

								Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)


								# Intent detection and slot filling

								Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)


								# Image Classification

								Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)


								# Speech to Text

								Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)