You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
2.2 KiB

from collections import defaultdict
from typing import Dict, List, Type
from pydantic import BaseModel
from typing_extensions import Literal
from . import examples
class Format:
name = ''
def dict(cls):
return {
class CSV(Format):
name = 'CSV'
extension = 'csv'
class FastText(Format):
name = 'fastText'
extension = 'txt'
class JSON(Format):
name = 'JSON'
extension = 'json'
class JSONL(Format):
name = 'JSONL'
extension = 'jsonl'
class OptionDelimiter(BaseModel):
delimiter: Literal[',', '\t', ';', '|', ' '] = ','
class OptionNone(BaseModel):
class Options:
options: Dict[str, List] = defaultdict(list)
def filter_by_task(cls, task_name: str):
options = cls.options[task_name]
return [
'example': example
} for format, option, example in options
def register(cls,
task: str,
format: Type[Format],
option: Type[BaseModel],
example: str):
cls.options[task].append((format, option, example))
# Text Classification
Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)
Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)
Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)
Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
# Sequence Labeling
Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
# Sequence to sequence
Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)
Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)
Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
# Image Classification
Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)