Browse Source

Add dataset catalog

pull/1310/head
Hironsan 3 years ago
parent
commit
ded677d038
1 changed files with 56 additions and 0 deletions
  1. 56
      app/api/views/upload/catalog.py

56
app/api/views/upload/catalog.py

@ -0,0 +1,56 @@
from collections import defaultdict
from typing import Dict, List, Type
from pydantic import BaseModel
from ...models import DOCUMENT_CLASSIFICATION, SEQUENCE_LABELING, SEQ2SEQ
CSV = 'CSV'
FastText = 'fastText'
JSON = 'JSON'
JSONL = 'JSONL'
EXCEL = 'Excel'
CoNLL = 'CoNLL'
class OptionColumn(BaseModel):
column_data: str = 'text'
column_label: str = 'label'
class OptionDelimiter(OptionColumn):
delimiter: str = ','
class OptionNone(BaseModel):
pass
class Options:
options: Dict[str, List] = defaultdict(list)
@classmethod
def filter_by_task(cls, task_name: str):
options = cls.options[task_name]
return [{'name': name, **option.schema()} for name, option in options]
@classmethod
def register(cls, task: str, name: str, option: Type[BaseModel]):
cls.options[task].append((name, option))
# Text Classification
Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter)
Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone)
Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn)
Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn)
Options.register(DOCUMENT_CLASSIFICATION, EXCEL, OptionColumn)
# Sequence Labeling
Options.register(SEQUENCE_LABELING, JSONL, OptionColumn)
Options.register(SEQUENCE_LABELING, CoNLL, OptionNone)
# Sequence to sequence
Options.register(SEQ2SEQ, CSV, OptionDelimiter)
Options.register(SEQ2SEQ, JSON, OptionColumn)
Options.register(SEQ2SEQ, JSONL, OptionColumn)
Options.register(SEQ2SEQ, EXCEL, OptionColumn)
Loading…
Cancel
Save