From d418d6df5a44d40d1dc9bc4100cf6a735bb1420b Mon Sep 17 00:00:00 2001 From: Hironsan Date: Fri, 9 Apr 2021 07:49:21 +0900 Subject: [PATCH] Add text file dataset to catalog --- app/api/views/upload/catalog.py | 8 ++++++++ app/api/views/upload/factory.py | 1 + 2 files changed, 9 insertions(+) diff --git a/app/api/views/upload/catalog.py b/app/api/views/upload/catalog.py index 33735441..aa093a36 100644 --- a/app/api/views/upload/catalog.py +++ b/app/api/views/upload/catalog.py @@ -44,6 +44,11 @@ class Excel(Format): accept_types = 'application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' +class TextFile(Format): + name = 'TextFile' + accept_types = 'text/*' + + class CoNLL(Format): name = 'CoNLL' accept_types = 'text/*' @@ -80,6 +85,7 @@ class Options: # Text Classification +Options.register(DOCUMENT_CLASSIFICATION, TextFile, OptionNone) Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter) Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone) Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn) @@ -87,10 +93,12 @@ Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn) Options.register(DOCUMENT_CLASSIFICATION, Excel, OptionColumn) # Sequence Labeling +Options.register(SEQUENCE_LABELING, TextFile, OptionNone) Options.register(SEQUENCE_LABELING, JSONL, OptionColumn) Options.register(SEQUENCE_LABELING, CoNLL, OptionNone) # Sequence to sequence +Options.register(SEQ2SEQ, TextFile, OptionNone) Options.register(SEQ2SEQ, CSV, OptionDelimiter) Options.register(SEQ2SEQ, JSON, OptionColumn) Options.register(SEQ2SEQ, JSONL, OptionColumn) diff --git a/app/api/views/upload/factory.py b/app/api/views/upload/factory.py index 557b10f8..1ddd3170 100644 --- a/app/api/views/upload/factory.py +++ b/app/api/views/upload/factory.py @@ -12,6 +12,7 @@ def get_data_class(project_type: str): def get_dataset_class(format: str): mapping = { + catalog.TextFile.name: dataset.TextFileDataset, catalog.CSV.name: dataset.CsvDataset, catalog.JSONL.name: dataset.JSONLDataset, catalog.JSON.name: dataset.JSONDataset,