|
|
from collections import defaultdict from typing import Dict, List, Type
from pydantic import BaseModel from typing_extensions import Literal
from . import examples from projects.models import ( DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, SEQUENCE_LABELING, SPEECH2TEXT, )
class Format: name = ""
@classmethod def dict(cls): return { "name": cls.name, }
class CSV(Format): name = "CSV" extension = "csv"
class FastText(Format): name = "fastText" extension = "txt"
class JSON(Format): name = "JSON" extension = "json"
class JSONL(Format): name = "JSONL" extension = "jsonl"
class IntentAndSlot(Format): name = "JSONL(intent and slot)" extension = "jsonl"
class OptionDelimiter(BaseModel): delimiter: Literal[",", "\t", ";", "|", " "] = ","
class OptionNone(BaseModel): pass
class Options: options: Dict[str, List] = defaultdict(list)
@classmethod def filter_by_task(cls, task_name: str): options = cls.options[task_name] return [{**format.dict(), **option.schema(), "example": example} for format, option, example in options]
@classmethod def register(cls, task: str, format: Type[Format], option: Type[BaseModel], example: str): cls.options[task].append((format, option, example))
# Text Classification Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV) Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText) Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON) Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
# Sequence Labeling Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
# Sequence to sequence Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV) Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON) Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
# Intent detection and slot filling Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)
# Image Classification Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)
# Speech to Text Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)
|