diff --git a/app/api/views/upload/catalog.py b/app/api/views/upload/catalog.py index 795bebf6..33735441 100644 --- a/app/api/views/upload/catalog.py +++ b/app/api/views/upload/catalog.py @@ -6,12 +6,47 @@ from typing_extensions import Literal from ...models import DOCUMENT_CLASSIFICATION, SEQ2SEQ, SEQUENCE_LABELING -CSV = 'CSV' -FastText = 'fastText' -JSON = 'JSON' -JSONL = 'JSONL' -EXCEL = 'Excel' -CoNLL = 'CoNLL' + +class Format: + name = '' + accept_types = '' + + @classmethod + def dict(cls): + return { + 'name': cls.name, + 'accept_types': cls.accept_types + } + + +class CSV(Format): + name = 'CSV' + accept_types = 'text/csv' + + +class FastText(Format): + name = 'fastText' + accept_types = 'text/plain' + + +class JSON(Format): + name = 'JSON' + accept_types = 'application/json' + + +class JSONL(Format): + name = 'JSONL' + accept_types = 'application/jsonl' + + +class Excel(Format): + name = 'Excel' + accept_types = 'application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + + +class CoNLL(Format): + name = 'CoNLL' + accept_types = 'text/*' class OptionColumn(BaseModel): @@ -27,17 +62,21 @@ class OptionNone(BaseModel): pass +class OptionCoNLL(BaseModel): + scheme: Literal['IOB2', 'IOE2', 'IOBES', 'BILOU'] = 'IOB2' + + class Options: options: Dict[str, List] = defaultdict(list) @classmethod def filter_by_task(cls, task_name: str): options = cls.options[task_name] - return [{'name': name, **option.schema()} for name, option in options] + return [{**format.dict(), **option.schema()} for format, option in options] @classmethod - def register(cls, task: str, name: str, option: Type[BaseModel]): - cls.options[task].append((name, option)) + def register(cls, task: str, format: Type[Format], option: Type[BaseModel]): + cls.options[task].append((format, option)) # Text Classification @@ -45,7 +84,7 @@ Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter) Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone) Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn) Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn) -Options.register(DOCUMENT_CLASSIFICATION, EXCEL, OptionColumn) +Options.register(DOCUMENT_CLASSIFICATION, Excel, OptionColumn) # Sequence Labeling Options.register(SEQUENCE_LABELING, JSONL, OptionColumn) @@ -55,4 +94,4 @@ Options.register(SEQUENCE_LABELING, CoNLL, OptionNone) Options.register(SEQ2SEQ, CSV, OptionDelimiter) Options.register(SEQ2SEQ, JSON, OptionColumn) Options.register(SEQ2SEQ, JSONL, OptionColumn) -Options.register(SEQ2SEQ, EXCEL, OptionColumn) +Options.register(SEQ2SEQ, Excel, OptionColumn) diff --git a/app/api/views/upload/factory.py b/app/api/views/upload/factory.py index 7b5bfd42..557b10f8 100644 --- a/app/api/views/upload/factory.py +++ b/app/api/views/upload/factory.py @@ -11,26 +11,24 @@ def get_data_class(project_type: str): def get_dataset_class(format: str): - if format == catalog.CSV: - return dataset.CsvDataset - elif format == catalog.JSONL: - return dataset.JSONLDataset - elif format == catalog.JSONL: - return dataset.JSONDataset - elif format == catalog.FastText: - return dataset.FastTextDataset - elif format == catalog.EXCEL: - return dataset.ExcelDataset - else: + mapping = { + catalog.CSV.name: dataset.CsvDataset, + catalog.JSONL.name: dataset.JSONLDataset, + catalog.JSON.name: dataset.JSONDataset, + catalog.FastText.name: dataset.FastTextDataset, + catalog.Excel.name: dataset.ExcelDataset + } + if format not in mapping: ValueError(f'Invalid format: {format}') + return mapping[format] def get_label_class(project_type: str): - if project_type == DOCUMENT_CLASSIFICATION: - return label.CategoryLabel - elif project_type == SEQUENCE_LABELING: - return label.OffsetLabel - elif project_type == SEQ2SEQ: - return label.TextLabel - else: + mapping = { + DOCUMENT_CLASSIFICATION: label.CategoryLabel, + SEQUENCE_LABELING: label.OffsetLabel, + SEQ2SEQ: label.TextLabel + } + if project_type not in mapping: ValueError(f'Invalid project type: {project_type}') + return mapping[project_type] diff --git a/frontend/domain/models/upload/catalog.ts b/frontend/domain/models/upload/catalog.ts index 076e9a02..f7f43fe0 100644 --- a/frontend/domain/models/upload/catalog.ts +++ b/frontend/domain/models/upload/catalog.ts @@ -1,13 +1,14 @@ export class Catalog { constructor( public name: string, + public accept_types: string, public properties: object ) {} static valueOf( - { name, properties }: - { name: string, properties: object } + { name, accept_types, properties }: + { name: string, accept_types: string, properties: object } ): Catalog { - return new Catalog(name, properties) + return new Catalog(name, accept_types, properties) } } diff --git a/frontend/package.json b/frontend/package.json index a6048abb..2e5a7f9d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -24,6 +24,7 @@ "chart.js": "^2.9.3", "codemirror": "^5.55.0", "filepond": "^4.26.1", + "filepond-plugin-file-validate-type": "^1.2.6", "js-cookie": "^2.2.1", "lodash": "^4.17.21", "nuxt": "^2.11.0", diff --git a/frontend/services/application/upload/catalogData.ts b/frontend/services/application/upload/catalogData.ts index f3ba06a6..0a8360ab 100644 --- a/frontend/services/application/upload/catalogData.ts +++ b/frontend/services/application/upload/catalogData.ts @@ -3,10 +3,12 @@ import { Catalog } from '~/domain/models/upload/catalog' export class CatalogDTO { name: string + acceptTypes: string properties: object constructor(item: Catalog) { this.name = item.name + this.acceptTypes = item.accept_types this.properties = item.properties } } diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 5935a7bb..718093ea 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -5731,6 +5731,11 @@ file-uri-to-path@1.0.0: resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd" integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw== +filepond-plugin-file-validate-type@^1.2.6: + version "1.2.6" + resolved "https://registry.yarnpkg.com/filepond-plugin-file-validate-type/-/filepond-plugin-file-validate-type-1.2.6.tgz#5a03e04fb8f70f0df37ff9a285d3d2343cdc55f2" + integrity sha512-3JFxpb4o5D4BssP/Drjn2GPpejg/QXzU4e3/Pq8IvIJuokHz95b6+RPYstQ+6ITKREYup0N8F/CO/OQfhVkUgg== + filepond@^4.26.1: version "4.26.1" resolved "https://registry.yarnpkg.com/filepond/-/filepond-4.26.1.tgz#c0ab300fcf367d395129bf491290693d48142fe8"