doccano/backend/data_export/pipeline/catalog.py


								from collections import defaultdict

								from typing import Dict, List, Type


								from pydantic import BaseModel

								from typing_extensions import Literal


								from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,

								                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,

								                        SEQUENCE_LABELING, SPEECH2TEXT)

								from . import examples


								class Format:

								    name = ''


								    @classmethod

								    def dict(cls):

								        return {

								            'name': cls.name,

								        }


								class CSV(Format):

								    name = 'CSV'

								    extension = 'csv'


								class FastText(Format):

								    name = 'fastText'

								    extension = 'txt'


								class JSON(Format):

								    name = 'JSON'

								    extension = 'json'


								class JSONL(Format):

								    name = 'JSONL'

								    extension = 'jsonl'


								class IntentAndSlot(Format):

								    name = 'JSONL(intent and slot)'

								    extension = 'jsonl'


								class OptionDelimiter(BaseModel):

								    delimiter: Literal[',', '\t', ';', '|', ' '] = ','


								class OptionNone(BaseModel):

								    pass


								class Options:

								    options: Dict[str, List] = defaultdict(list)


								    @classmethod

								    def filter_by_task(cls, task_name: str):

								        options = cls.options[task_name]

								        return [

								            {

								                **format.dict(),

								                **option.schema(),

								                'example': example

								            } for format, option, example in options

								        ]


								    @classmethod

								    def register(cls,

								                 task: str,

								                 format: Type[Format],

								                 option: Type[BaseModel],

								                 example: str):

								        cls.options[task].append((format, option, example))


								# Text Classification

								Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)

								Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)

								Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)

								Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)


								# Sequence Labeling

								Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)


								# Sequence to sequence

								Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)

								Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)

								Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)


								# Intent detection and slot filling

								Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)


								# Image Classification

								Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)


								# Speech to Text

								Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)