From 370c123ff4880a93ebbaa1aaa69084ea87fe1a11 Mon Sep 17 00:00:00 2001 From: Hironsan Date: Thu, 19 May 2022 08:04:20 +0900 Subject: [PATCH] Update frontend to handle relation dataset import --- backend/data_import/celery_tasks.py | 4 +- backend/data_import/datasets.py | 17 +- backend/data_import/pipeline/catalog.py | 263 ++++++++++++++---- backend/data_import/tests/test_tasks.py | 53 ++-- backend/data_import/views.py | 9 +- frontend/domain/models/upload/catalog.ts | 6 + .../domain/models/upload/parseRepository.ts | 2 +- .../pages/projects/_id/dataset/import.vue | 14 +- .../repositories/upload/apiParseRepository.ts | 3 +- .../application/upload/catalogData.ts | 4 + .../upload/parseApplicationService.ts | 4 +- 11 files changed, 281 insertions(+), 98 deletions(-) diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py index 2f9d08b0..7f3c4249 100644 --- a/backend/data_import/celery_tasks.py +++ b/backend/data_import/celery_tasks.py @@ -53,7 +53,7 @@ def check_uploaded_files(upload_ids: List[str], file_format: str): @shared_task -def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], **kwargs): +def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], task: str, **kwargs): project = get_object_or_404(Project, pk=project_id) user = get_object_or_404(get_user_model(), pk=user_id) @@ -64,7 +64,7 @@ def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], for tu in temporary_uploads ] - dataset = load_dataset(file_format, filenames, project, **kwargs) + dataset = load_dataset(task, file_format, filenames, project, **kwargs) dataset.save(user, batch_size=settings.IMPORT_BATCH_SIZE) upload_to_store(temporary_uploads) errors.extend(dataset.errors) diff --git a/backend/data_import/datasets.py b/backend/data_import/datasets.py index d424bc14..d8876a67 100644 --- a/backend/data_import/datasets.py +++ b/backend/data_import/datasets.py @@ -4,6 +4,7 @@ from typing import List, Type from django.contrib.auth.models import User from .models import DummyLabelType +from .pipeline.catalog import RELATION_EXTRACTION from .pipeline.data import BaseData, BinaryData, TextData from .pipeline.exceptions import FileParseException from .pipeline.factories import create_parser @@ -194,23 +195,23 @@ class CategoryAndSpanDataset(Dataset): return self.reader.errors + self.example_maker.errors + self.category_maker.errors + self.span_maker.errors -def select_dataset(project: Project) -> Type[Dataset]: - use_relation = getattr(project, "use_relation", False) +def select_dataset(task: str, project: Project) -> Type[Dataset]: mapping = { DOCUMENT_CLASSIFICATION: TextClassificationDataset, - SEQUENCE_LABELING: RelationExtractionDataset if use_relation else SequenceLabelingDataset, + SEQUENCE_LABELING: SequenceLabelingDataset, + RELATION_EXTRACTION: RelationExtractionDataset, SEQ2SEQ: Seq2seqDataset, INTENT_DETECTION_AND_SLOT_FILLING: CategoryAndSpanDataset, IMAGE_CLASSIFICATION: BinaryDataset, SPEECH2TEXT: BinaryDataset, } - if project.project_type not in mapping: - ValueError(f"Invalid project type: {project.project_type}") - return mapping[project.project_type] + if task not in mapping: + task = project.project_type + return mapping[task] -def load_dataset(file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset: +def load_dataset(task: str, file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset: parser = create_parser(file_format, **kwargs) reader = Reader(data_files, parser) - dataset_class = select_dataset(project) + dataset_class = select_dataset(task, project) return dataset_class(reader, project, **kwargs) diff --git a/backend/data_import/pipeline/catalog.py b/backend/data_import/pipeline/catalog.py index 131e800b..ba1330ef 100644 --- a/backend/data_import/pipeline/catalog.py +++ b/backend/data_import/pipeline/catalog.py @@ -1,4 +1,5 @@ from collections import defaultdict +from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Type @@ -14,7 +15,20 @@ from projects.models import ( SPEECH2TEXT, ) +# Define the example directories EXAMPLE_DIR = Path(__file__).parent.resolve() / "examples" +TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic" +TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification" +SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling" +RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction" +SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence" +INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection" +IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification" +SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text" + +# Define the task identifiers +RELATION_EXTRACTION = "RelationExtraction" + encodings = Literal[ "Auto", "ascii", @@ -177,85 +191,236 @@ class AudioFile(Format): accept_types = "audio/ogg, audio/aac, audio/mpeg, audio/wav" -class OptionColumn(BaseModel): +class ArgColumn(BaseModel): encoding: encodings = "utf_8" column_data: str = "text" column_label: str = "label" -class OptionDelimiter(OptionColumn): +class ArgDelimiter(ArgColumn): encoding: encodings = "utf_8" delimiter: Literal[",", "\t", ";", "|", " "] = "," -class OptionEncoding(BaseModel): +class ArgEncoding(BaseModel): encoding: encodings = "utf_8" -class OptionCoNLL(BaseModel): +class ArgCoNLL(BaseModel): encoding: encodings = "utf_8" scheme: Literal["IOB2", "IOE2", "IOBES", "BILOU"] = "IOB2" delimiter: Literal[" ", ""] = " " -class OptionNone(BaseModel): +class ArgNone(BaseModel): pass +@dataclass +class Option: + display_name: str + task_id: str + file_format: Type[Format] + arg: Type[BaseModel] + file: Path + + @property + def example(self) -> str: + with open(self.file, "r", encoding="utf-8") as f: + return f.read() + + def dict(self) -> Dict: + return { + **self.file_format.dict(), + **self.arg.schema(), + "example": self.example, + "task_id": self.task_id, + "display_name": self.display_name, + } + + class Options: options: Dict[str, List] = defaultdict(list) @classmethod - def filter_by_task(cls, task_name: str): + def filter_by_task(cls, task_name: str, use_relation: bool = False): options = cls.options[task_name] - return [ - {**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options - ] + if use_relation: + options = cls.options[task_name] + cls.options[RELATION_EXTRACTION] + return [option.dict() for option in options] @classmethod - def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], file: Path): - example = cls.load_example(file) - cls.options[task].append((file_format, option, example)) - - @staticmethod - def load_example(file) -> str: - with open(file, encoding="utf-8") as f: - return f.read() - + def register(cls, option: Option): + cls.options[option.task_id].append(option) + + +# Text tasks +text_tasks = [DOCUMENT_CLASSIFICATION, SEQUENCE_LABELING, SEQ2SEQ, INTENT_DETECTION_AND_SLOT_FILLING] +for task_id in text_tasks: + Options.register( + Option( + display_name=TextFile.name, + task_id=task_id, + file_format=TextFile, + arg=ArgEncoding, + file=TASK_AGNOSTIC_DIR / "text_files.txt", + ) + ) + Options.register( + Option( + display_name=TextLine.name, + task_id=task_id, + file_format=TextLine, + arg=ArgEncoding, + file=TASK_AGNOSTIC_DIR / "text_lines.txt", + ) + ) + +# Text Classification +Options.register( + Option( + display_name=CSV.name, + task_id=DOCUMENT_CLASSIFICATION, + file_format=CSV, + arg=ArgDelimiter, + file=TEXT_CLASSIFICATION_DIR / "example.csv", + ) +) +Options.register( + Option( + display_name=FastText.name, + task_id=DOCUMENT_CLASSIFICATION, + file_format=FastText, + arg=ArgEncoding, + file=TEXT_CLASSIFICATION_DIR / "example.txt", + ) +) +Options.register( + Option( + display_name=JSON.name, + task_id=DOCUMENT_CLASSIFICATION, + file_format=JSON, + arg=ArgColumn, + file=TEXT_CLASSIFICATION_DIR / "example.json", + ) +) +Options.register( + Option( + display_name=JSONL.name, + task_id=DOCUMENT_CLASSIFICATION, + file_format=JSONL, + arg=ArgColumn, + file=TEXT_CLASSIFICATION_DIR / "example.jsonl", + ) +) +Options.register( + Option( + display_name=Excel.name, + task_id=DOCUMENT_CLASSIFICATION, + file_format=Excel, + arg=ArgColumn, + file=TEXT_CLASSIFICATION_DIR / "example.csv", + ) +) -TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic" -TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification" -Options.register(DOCUMENT_CLASSIFICATION, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt") -Options.register(DOCUMENT_CLASSIFICATION, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt") -Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, TEXT_CLASSIFICATION_DIR / "example.csv") -Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionEncoding, TEXT_CLASSIFICATION_DIR / "example.txt") -Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.json") -Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.jsonl") -Options.register(DOCUMENT_CLASSIFICATION, Excel, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.csv") +# Sequence Labelling +Options.register( + Option( + display_name=JSONL.name, + task_id=SEQUENCE_LABELING, + file_format=JSONL, + arg=ArgColumn, + file=SEQUENCE_LABELING_DIR / "example.jsonl", + ) +) +Options.register( + Option( + display_name=CoNLL.name, + task_id=SEQUENCE_LABELING, + file_format=CoNLL, + arg=ArgCoNLL, + file=SEQUENCE_LABELING_DIR / "example.txt", + ) +) -SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling" -RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction" -Options.register(SEQUENCE_LABELING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt") -Options.register(SEQUENCE_LABELING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt") -Options.register(SEQUENCE_LABELING, JSONL, OptionColumn, SEQUENCE_LABELING_DIR / "example.jsonl") -Options.register(SEQUENCE_LABELING, CoNLL, OptionCoNLL, SEQUENCE_LABELING_DIR / "example.txt") -Options.register(SEQUENCE_LABELING, JSONL, OptionNone, RELATION_EXTRACTION_DIR / "example.jsonl") +# Relation Extraction +Options.register( + Option( + display_name="JSONL(Relation)", + task_id=RELATION_EXTRACTION, + file_format=JSONL, + arg=ArgNone, + file=RELATION_EXTRACTION_DIR / "example.jsonl", + ) +) -SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence" -Options.register(SEQ2SEQ, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt") -Options.register(SEQ2SEQ, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt") -Options.register(SEQ2SEQ, CSV, OptionDelimiter, SEQ2SEQ_DIR / "example.csv") -Options.register(SEQ2SEQ, JSON, OptionColumn, SEQ2SEQ_DIR / "example.json") -Options.register(SEQ2SEQ, JSONL, OptionColumn, SEQ2SEQ_DIR / "example.jsonl") -Options.register(SEQ2SEQ, Excel, OptionColumn, SEQ2SEQ_DIR / "example.csv") +# Seq2seq +Options.register( + Option( + display_name=CSV.name, + task_id=SEQ2SEQ, + file_format=CSV, + arg=ArgDelimiter, + file=SEQ2SEQ_DIR / "example.csv", + ) +) +Options.register( + Option( + display_name=JSON.name, + task_id=SEQ2SEQ, + file_format=JSON, + arg=ArgColumn, + file=SEQ2SEQ_DIR / "example.json", + ) +) +Options.register( + Option( + display_name=JSONL.name, + task_id=SEQ2SEQ, + file_format=JSONL, + arg=ArgColumn, + file=SEQ2SEQ_DIR / "example.jsonl", + ) +) +Options.register( + Option( + display_name=Excel.name, + task_id=SEQ2SEQ, + file_format=Excel, + arg=ArgColumn, + file=SEQ2SEQ_DIR / "example.csv", + ) +) -INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection" -Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt") -Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt") -Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, OptionNone, INTENT_DETECTION_DIR / "example.jsonl") +# Intent detection +Options.register( + Option( + display_name=JSONL.name, + task_id=INTENT_DETECTION_AND_SLOT_FILLING, + file_format=JSONL, + arg=ArgNone, + file=INTENT_DETECTION_DIR / "example.jsonl", + ) +) -IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification" -Options.register(IMAGE_CLASSIFICATION, ImageFile, OptionNone, IMAGE_CLASSIFICATION_DIR / "image_files.txt") +# Image Classification +Options.register( + Option( + display_name=ImageFile.name, + task_id=IMAGE_CLASSIFICATION, + file_format=ImageFile, + arg=ArgNone, + file=IMAGE_CLASSIFICATION_DIR / "image_files.txt", + ) +) -SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text" -Options.register(SPEECH2TEXT, AudioFile, OptionNone, SPEECH_TO_TEXT_DIR / "audio_files.txt") +# Speech to Text +Options.register( + Option( + display_name=AudioFile.name, + task_id=SPEECH2TEXT, + file_format=AudioFile, + arg=ArgNone, + file=SPEECH_TO_TEXT_DIR / "audio_files.txt", + ) +) diff --git a/backend/data_import/tests/test_tasks.py b/backend/data_import/tests/test_tasks.py index f5f6038e..54c71434 100644 --- a/backend/data_import/tests/test_tasks.py +++ b/backend/data_import/tests/test_tasks.py @@ -8,6 +8,7 @@ from django_drf_filepond.models import StoredUpload, TemporaryUpload from django_drf_filepond.utils import _get_file_id from data_import.celery_tasks import import_dataset +from data_import.pipeline.catalog import RELATION_EXTRACTION from examples.models import Example from label_types.models import SpanType from labels.models import Category, Span @@ -40,7 +41,7 @@ class TestImportData(TestCase): except StoredUpload.DoesNotExist: pass - def import_dataset(self, filename, file_format, kwargs=None): + def import_dataset(self, filename, file_format, task, kwargs=None): file_path = str(self.data_path / filename) TemporaryUpload.objects.create( upload_id=self.upload_id, @@ -51,7 +52,7 @@ class TestImportData(TestCase): ) upload_ids = [self.upload_id] kwargs = kwargs or {} - return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, **kwargs) + return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, task, **kwargs) @override_settings(MAX_UPLOAD_SIZE=0) @@ -62,7 +63,7 @@ class TestMaxFileSize(TestImportData): filename = "text_classification/example.jsonl" file_format = "JSONL" kwargs = {"column_label": "labels"} - response = self.import_dataset(filename, file_format, kwargs) + response = self.import_dataset(filename, file_format, self.task, kwargs) self.assertEqual(len(response["error"]), 1) self.assertIn("maximum file size", response["error"][0]["message"]) @@ -89,80 +90,80 @@ class TestImportClassificationData(TestImportData): file_format = "JSONL" kwargs = {"column_label": "labels"} dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])] - self.import_dataset(filename, file_format, kwargs) + self.import_dataset(filename, file_format, self.task, kwargs) self.assert_examples(dataset) def test_csv(self): filename = "text_classification/example.csv" file_format = "CSV" dataset = [("exampleA", ["positive"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_csv_out_of_order_columns(self): filename = "text_classification/example_out_of_order_columns.csv" file_format = "CSV" dataset = [("exampleA", ["positive"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_fasttext(self): filename = "text_classification/example_fasttext.txt" file_format = "fastText" dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_excel(self): filename = "text_classification/example.xlsx" file_format = "Excel" dataset = [("exampleA", ["positive"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_json(self): filename = "text_classification/example.json" file_format = "JSON" dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_textfile(self): filename = "example.txt" file_format = "TextFile" dataset = [("exampleA\nexampleB\n\nexampleC\n", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_textline(self): filename = "example.txt" file_format = "TextLine" dataset = [("exampleA", []), ("exampleB", []), ("exampleC", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_wrong_jsonl(self): filename = "text_classification/example.json" file_format = "JSONL" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assert_parse_error(response) def test_wrong_json(self): filename = "text_classification/example.jsonl" file_format = "JSON" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assert_parse_error(response) def test_wrong_excel(self): filename = "text_classification/example.jsonl" file_format = "Excel" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assert_parse_error(response) def test_wrong_csv(self): filename = "text_classification/example.jsonl" file_format = "CSV" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assert_parse_error(response) @@ -186,26 +187,26 @@ class TestImportSequenceLabelingData(TestImportData): filename = "sequence_labeling/example.jsonl" file_format = "JSONL" dataset = [("exampleA", [[0, 1, "LOC"]]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_conll(self): filename = "sequence_labeling/example.conll" file_format = "CoNLL" dataset = [("JAPAN GET", [[0, 5, "LOC"]]), ("Nadim Ladki", [[0, 11, "PER"]])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_wrong_conll(self): filename = "sequence_labeling/example.jsonl" file_format = "CoNLL" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assert_parse_error(response) def test_jsonl_with_overlapping(self): filename = "sequence_labeling/example_overlapping.jsonl" file_format = "JSONL" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assertEqual(len(response["error"]), 0) @@ -241,7 +242,7 @@ class TestImportRelationExtractionData(TestImportData): [[0, 6, "ORG"], [22, 39, "DATE"], [44, 54, "PERSON"], [59, 70, "PERSON"]], ), ] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, RELATION_EXTRACTION) self.assert_examples(dataset) @@ -259,21 +260,21 @@ class TestImportSeq2seqData(TestImportData): filename = "seq2seq/example.jsonl" file_format = "JSONL" dataset = [("exampleA", ["label1"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_json(self): filename = "seq2seq/example.json" file_format = "JSON" dataset = [("exampleA", ["label1"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) def test_csv(self): filename = "seq2seq/example.csv" file_format = "CSV" dataset = [("exampleA", ["label1"]), ("exampleB", [])] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) @@ -298,7 +299,7 @@ class TestImportIntentDetectionAndSlotFillingData(TestImportData): ("exampleC", {"cats": [], "entities": [(0, 1, "LOC")]}), ("exampleD", {"cats": [], "entities": []}), ] - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assert_examples(dataset) @@ -308,7 +309,7 @@ class TestImportImageClassificationData(TestImportData): def test_example(self): filename = "images/1500x500.jpeg" file_format = "ImageFile" - self.import_dataset(filename, file_format) + self.import_dataset(filename, file_format, self.task) self.assertEqual(Example.objects.count(), 1) @@ -319,6 +320,6 @@ class TestFileTypeChecking(TestImportData): def test_example(self): filename = "images/example.ico" file_format = "ImageFile" - response = self.import_dataset(filename, file_format) + response = self.import_dataset(filename, file_format, self.task) self.assertEqual(len(response["error"]), 1) self.assertIn("unexpected", response["error"][0]["message"]) diff --git a/backend/data_import/views.py b/backend/data_import/views.py index bb3cfdf6..25639753 100644 --- a/backend/data_import/views.py +++ b/backend/data_import/views.py @@ -16,7 +16,8 @@ class DatasetCatalog(APIView): def get(self, request, *args, **kwargs): project_id = kwargs["project_id"] project = get_object_or_404(Project, pk=project_id) - options = Options.filter_by_task(project.project_type) + use_relation = getattr(project, "use_relation", False) + options = Options.filter_by_task(project.project_type, use_relation) return Response(data=options, status=status.HTTP_200_OK) @@ -26,11 +27,13 @@ class DatasetImportAPI(APIView): def post(self, request, *args, **kwargs): upload_ids = request.data.pop("uploadIds") file_format = request.data.pop("format") - task = import_dataset.delay( + task = request.data.pop("task") + celery_task = import_dataset.delay( user_id=request.user.id, project_id=self.kwargs["project_id"], file_format=file_format, upload_ids=upload_ids, + task=task, **request.data, ) - return Response({"task_id": task.task_id}) + return Response({"task_id": celery_task.task_id}) diff --git a/frontend/domain/models/upload/catalog.ts b/frontend/domain/models/upload/catalog.ts index 0f0c7718..f6497bfa 100644 --- a/frontend/domain/models/upload/catalog.ts +++ b/frontend/domain/models/upload/catalog.ts @@ -5,6 +5,12 @@ export class Catalog { example: string; properties: object; + @Expose({ name: 'task_id' }) + taskId: string; + + @Expose({ name: 'display_name' }) + displayName: string; + @Expose({ name: 'accept_types' }) acceptTypes: string; } diff --git a/frontend/domain/models/upload/parseRepository.ts b/frontend/domain/models/upload/parseRepository.ts index 703caf56..fcd5d94a 100644 --- a/frontend/domain/models/upload/parseRepository.ts +++ b/frontend/domain/models/upload/parseRepository.ts @@ -1,5 +1,5 @@ export interface ParseRepository { - analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise + analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise revert(serverId: string): void } diff --git a/frontend/pages/projects/_id/dataset/import.vue b/frontend/pages/projects/_id/dataset/import.vue index d48aa37e..adf388c7 100644 --- a/frontend/pages/projects/_id/dataset/import.vue +++ b/frontend/pages/projects/_id/dataset/import.vue @@ -13,7 +13,7 @@ @@ -156,7 +156,7 @@ export default { return this.uploadedFiles.length === 0 || this.taskId !== null || !this.valid }, properties() { - const item = this.catalog.find(item => item.name === this.selected) + const item = this.catalog.find(item => item.displayName === this.selected) if (item) { return item.properties } else { @@ -174,7 +174,7 @@ export default { return Object.fromEntries(textFields) }, acceptedFileTypes() { - const item = this.catalog.find(item => item.name === this.selected) + const item = this.catalog.find(item => item.displayName === this.selected) if (item) { return item.acceptTypes } else { @@ -182,7 +182,7 @@ export default { } }, example() { - const item = this.catalog.find(item => item.name === this.selected) + const item = this.catalog.find(item => item.displayName === this.selected) if (item) { const column_data = 'column_data' const column_label = 'column_label' @@ -201,7 +201,7 @@ export default { watch: { selected() { - const item = this.catalog.find(item => item.name === this.selected) + const item = this.catalog.find(item => item.displayName === this.selected) for (const [key, value] of Object.entries(item.properties)) { this.option[key] = value.default } @@ -239,9 +239,11 @@ export default { }, async importDataset() { this.isImporting = true + const item = this.catalog.find(item => item.displayName === this.selected) this.taskId = await this.$services.parse.analyze( this.$route.params.id, - this.selected, + item.name, + item.taskId, this.uploadedFiles.map(item => item.serverId), this.option ) diff --git a/frontend/repositories/upload/apiParseRepository.ts b/frontend/repositories/upload/apiParseRepository.ts index b7ba25c8..d60ddcad 100644 --- a/frontend/repositories/upload/apiParseRepository.ts +++ b/frontend/repositories/upload/apiParseRepository.ts @@ -6,10 +6,11 @@ export class APIParseRepository implements ParseRepository { private readonly request = ApiService ) {} - async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise { + async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise { const url = `/projects/${projectId}/upload` const data = { format, + task, uploadIds, ...option } diff --git a/frontend/services/application/upload/catalogData.ts b/frontend/services/application/upload/catalogData.ts index 6d7580b4..781bbc95 100644 --- a/frontend/services/application/upload/catalogData.ts +++ b/frontend/services/application/upload/catalogData.ts @@ -6,11 +6,15 @@ export class CatalogDTO { example: string acceptTypes: string properties: object + taskId: string + displayName: string constructor(item: Catalog) { this.name = item.name this.example = item.example this.acceptTypes = item.acceptTypes this.properties = item.properties + this.displayName = item.displayName + this.taskId = item.taskId } } diff --git a/frontend/services/application/upload/parseApplicationService.ts b/frontend/services/application/upload/parseApplicationService.ts index 1998dbb1..888f012d 100644 --- a/frontend/services/application/upload/parseApplicationService.ts +++ b/frontend/services/application/upload/parseApplicationService.ts @@ -5,8 +5,8 @@ export class ParseApplicationService { private readonly repository: ParseRepository ) {} - public async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise { - const item = await this.repository.analyze(projectId, format, uploadIds, option) + public async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise { + const item = await this.repository.analyze(projectId, format, task, uploadIds, option) return item }