From 370c123ff4880a93ebbaa1aaa69084ea87fe1a11 Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Thu, 19 May 2022 08:04:20 +0900
Subject: [PATCH] Update frontend to handle relation dataset import

---
 backend/data_import/celery_tasks.py           |   4 +-
 backend/data_import/datasets.py               |  17 +-
 backend/data_import/pipeline/catalog.py       | 263 ++++++++++++++----
 backend/data_import/tests/test_tasks.py       |  53 ++--
 backend/data_import/views.py                  |   9 +-
 frontend/domain/models/upload/catalog.ts      |   6 +
 .../domain/models/upload/parseRepository.ts   |   2 +-
 .../pages/projects/_id/dataset/import.vue     |  14 +-
 .../repositories/upload/apiParseRepository.ts |   3 +-
 .../application/upload/catalogData.ts         |   4 +
 .../upload/parseApplicationService.ts         |   4 +-
 11 files changed, 281 insertions(+), 98 deletions(-)

diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py
index 2f9d08b0..7f3c4249 100644
--- a/backend/data_import/celery_tasks.py
+++ b/backend/data_import/celery_tasks.py
@@ -53,7 +53,7 @@ def check_uploaded_files(upload_ids: List[str], file_format: str):
 
 
 @shared_task
-def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], **kwargs):
+def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str], task: str, **kwargs):
     project = get_object_or_404(Project, pk=project_id)
     user = get_object_or_404(get_user_model(), pk=user_id)
 
@@ -64,7 +64,7 @@ def import_dataset(user_id, project_id, file_format: str, upload_ids: List[str],
         for tu in temporary_uploads
     ]
 
-    dataset = load_dataset(file_format, filenames, project, **kwargs)
+    dataset = load_dataset(task, file_format, filenames, project, **kwargs)
     dataset.save(user, batch_size=settings.IMPORT_BATCH_SIZE)
     upload_to_store(temporary_uploads)
     errors.extend(dataset.errors)
diff --git a/backend/data_import/datasets.py b/backend/data_import/datasets.py
index d424bc14..d8876a67 100644
--- a/backend/data_import/datasets.py
+++ b/backend/data_import/datasets.py
@@ -4,6 +4,7 @@ from typing import List, Type
 from django.contrib.auth.models import User
 
 from .models import DummyLabelType
+from .pipeline.catalog import RELATION_EXTRACTION
 from .pipeline.data import BaseData, BinaryData, TextData
 from .pipeline.exceptions import FileParseException
 from .pipeline.factories import create_parser
@@ -194,23 +195,23 @@ class CategoryAndSpanDataset(Dataset):
         return self.reader.errors + self.example_maker.errors + self.category_maker.errors + self.span_maker.errors
 
 
-def select_dataset(project: Project) -> Type[Dataset]:
-    use_relation = getattr(project, "use_relation", False)
+def select_dataset(task: str, project: Project) -> Type[Dataset]:
     mapping = {
         DOCUMENT_CLASSIFICATION: TextClassificationDataset,
-        SEQUENCE_LABELING: RelationExtractionDataset if use_relation else SequenceLabelingDataset,
+        SEQUENCE_LABELING: SequenceLabelingDataset,
+        RELATION_EXTRACTION: RelationExtractionDataset,
         SEQ2SEQ: Seq2seqDataset,
         INTENT_DETECTION_AND_SLOT_FILLING: CategoryAndSpanDataset,
         IMAGE_CLASSIFICATION: BinaryDataset,
         SPEECH2TEXT: BinaryDataset,
     }
-    if project.project_type not in mapping:
-        ValueError(f"Invalid project type: {project.project_type}")
-    return mapping[project.project_type]
+    if task not in mapping:
+        task = project.project_type
+    return mapping[task]
 
 
-def load_dataset(file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset:
+def load_dataset(task: str, file_format: str, data_files: List[FileName], project: Project, **kwargs) -> Dataset:
     parser = create_parser(file_format, **kwargs)
     reader = Reader(data_files, parser)
-    dataset_class = select_dataset(project)
+    dataset_class = select_dataset(task, project)
     return dataset_class(reader, project, **kwargs)
diff --git a/backend/data_import/pipeline/catalog.py b/backend/data_import/pipeline/catalog.py
index 131e800b..ba1330ef 100644
--- a/backend/data_import/pipeline/catalog.py
+++ b/backend/data_import/pipeline/catalog.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Type
 
@@ -14,7 +15,20 @@ from projects.models import (
     SPEECH2TEXT,
 )
 
+# Define the example directories
 EXAMPLE_DIR = Path(__file__).parent.resolve() / "examples"
+TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic"
+TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
+SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
+RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
+SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
+INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
+IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
+SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
+
+# Define the task identifiers
+RELATION_EXTRACTION = "RelationExtraction"
+
 encodings = Literal[
     "Auto",
     "ascii",
@@ -177,85 +191,236 @@ class AudioFile(Format):
     accept_types = "audio/ogg, audio/aac, audio/mpeg, audio/wav"
 
 
-class OptionColumn(BaseModel):
+class ArgColumn(BaseModel):
     encoding: encodings = "utf_8"
     column_data: str = "text"
     column_label: str = "label"
 
 
-class OptionDelimiter(OptionColumn):
+class ArgDelimiter(ArgColumn):
     encoding: encodings = "utf_8"
     delimiter: Literal[",", "\t", ";", "|", " "] = ","
 
 
-class OptionEncoding(BaseModel):
+class ArgEncoding(BaseModel):
     encoding: encodings = "utf_8"
 
 
-class OptionCoNLL(BaseModel):
+class ArgCoNLL(BaseModel):
     encoding: encodings = "utf_8"
     scheme: Literal["IOB2", "IOE2", "IOBES", "BILOU"] = "IOB2"
     delimiter: Literal[" ", ""] = " "
 
 
-class OptionNone(BaseModel):
+class ArgNone(BaseModel):
     pass
 
 
+@dataclass
+class Option:
+    display_name: str
+    task_id: str
+    file_format: Type[Format]
+    arg: Type[BaseModel]
+    file: Path
+
+    @property
+    def example(self) -> str:
+        with open(self.file, "r", encoding="utf-8") as f:
+            return f.read()
+
+    def dict(self) -> Dict:
+        return {
+            **self.file_format.dict(),
+            **self.arg.schema(),
+            "example": self.example,
+            "task_id": self.task_id,
+            "display_name": self.display_name,
+        }
+
+
 class Options:
     options: Dict[str, List] = defaultdict(list)
 
     @classmethod
-    def filter_by_task(cls, task_name: str):
+    def filter_by_task(cls, task_name: str, use_relation: bool = False):
         options = cls.options[task_name]
-        return [
-            {**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options
-        ]
+        if use_relation:
+            options = cls.options[task_name] + cls.options[RELATION_EXTRACTION]
+        return [option.dict() for option in options]
 
     @classmethod
-    def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], file: Path):
-        example = cls.load_example(file)
-        cls.options[task].append((file_format, option, example))
-
-    @staticmethod
-    def load_example(file) -> str:
-        with open(file, encoding="utf-8") as f:
-            return f.read()
-
+    def register(cls, option: Option):
+        cls.options[option.task_id].append(option)
+
+
+# Text tasks
+text_tasks = [DOCUMENT_CLASSIFICATION, SEQUENCE_LABELING, SEQ2SEQ, INTENT_DETECTION_AND_SLOT_FILLING]
+for task_id in text_tasks:
+    Options.register(
+        Option(
+            display_name=TextFile.name,
+            task_id=task_id,
+            file_format=TextFile,
+            arg=ArgEncoding,
+            file=TASK_AGNOSTIC_DIR / "text_files.txt",
+        )
+    )
+    Options.register(
+        Option(
+            display_name=TextLine.name,
+            task_id=task_id,
+            file_format=TextLine,
+            arg=ArgEncoding,
+            file=TASK_AGNOSTIC_DIR / "text_lines.txt",
+        )
+    )
+
+# Text Classification
+Options.register(
+    Option(
+        display_name=CSV.name,
+        task_id=DOCUMENT_CLASSIFICATION,
+        file_format=CSV,
+        arg=ArgDelimiter,
+        file=TEXT_CLASSIFICATION_DIR / "example.csv",
+    )
+)
+Options.register(
+    Option(
+        display_name=FastText.name,
+        task_id=DOCUMENT_CLASSIFICATION,
+        file_format=FastText,
+        arg=ArgEncoding,
+        file=TEXT_CLASSIFICATION_DIR / "example.txt",
+    )
+)
+Options.register(
+    Option(
+        display_name=JSON.name,
+        task_id=DOCUMENT_CLASSIFICATION,
+        file_format=JSON,
+        arg=ArgColumn,
+        file=TEXT_CLASSIFICATION_DIR / "example.json",
+    )
+)
+Options.register(
+    Option(
+        display_name=JSONL.name,
+        task_id=DOCUMENT_CLASSIFICATION,
+        file_format=JSONL,
+        arg=ArgColumn,
+        file=TEXT_CLASSIFICATION_DIR / "example.jsonl",
+    )
+)
+Options.register(
+    Option(
+        display_name=Excel.name,
+        task_id=DOCUMENT_CLASSIFICATION,
+        file_format=Excel,
+        arg=ArgColumn,
+        file=TEXT_CLASSIFICATION_DIR / "example.csv",
+    )
+)
 
-TASK_AGNOSTIC_DIR = EXAMPLE_DIR / "task_agnostic"
-TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
-Options.register(DOCUMENT_CLASSIFICATION, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
-Options.register(DOCUMENT_CLASSIFICATION, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
-Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, TEXT_CLASSIFICATION_DIR / "example.csv")
-Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionEncoding, TEXT_CLASSIFICATION_DIR / "example.txt")
-Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.json")
-Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.jsonl")
-Options.register(DOCUMENT_CLASSIFICATION, Excel, OptionColumn, TEXT_CLASSIFICATION_DIR / "example.csv")
+# Sequence Labelling
+Options.register(
+    Option(
+        display_name=JSONL.name,
+        task_id=SEQUENCE_LABELING,
+        file_format=JSONL,
+        arg=ArgColumn,
+        file=SEQUENCE_LABELING_DIR / "example.jsonl",
+    )
+)
+Options.register(
+    Option(
+        display_name=CoNLL.name,
+        task_id=SEQUENCE_LABELING,
+        file_format=CoNLL,
+        arg=ArgCoNLL,
+        file=SEQUENCE_LABELING_DIR / "example.txt",
+    )
+)
 
-SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
-RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
-Options.register(SEQUENCE_LABELING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
-Options.register(SEQUENCE_LABELING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
-Options.register(SEQUENCE_LABELING, JSONL, OptionColumn, SEQUENCE_LABELING_DIR / "example.jsonl")
-Options.register(SEQUENCE_LABELING, CoNLL, OptionCoNLL, SEQUENCE_LABELING_DIR / "example.txt")
-Options.register(SEQUENCE_LABELING, JSONL, OptionNone, RELATION_EXTRACTION_DIR / "example.jsonl")
+# Relation Extraction
+Options.register(
+    Option(
+        display_name="JSONL(Relation)",
+        task_id=RELATION_EXTRACTION,
+        file_format=JSONL,
+        arg=ArgNone,
+        file=RELATION_EXTRACTION_DIR / "example.jsonl",
+    )
+)
 
-SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
-Options.register(SEQ2SEQ, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
-Options.register(SEQ2SEQ, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
-Options.register(SEQ2SEQ, CSV, OptionDelimiter, SEQ2SEQ_DIR / "example.csv")
-Options.register(SEQ2SEQ, JSON, OptionColumn, SEQ2SEQ_DIR / "example.json")
-Options.register(SEQ2SEQ, JSONL, OptionColumn, SEQ2SEQ_DIR / "example.jsonl")
-Options.register(SEQ2SEQ, Excel, OptionColumn, SEQ2SEQ_DIR / "example.csv")
+# Seq2seq
+Options.register(
+    Option(
+        display_name=CSV.name,
+        task_id=SEQ2SEQ,
+        file_format=CSV,
+        arg=ArgDelimiter,
+        file=SEQ2SEQ_DIR / "example.csv",
+    )
+)
+Options.register(
+    Option(
+        display_name=JSON.name,
+        task_id=SEQ2SEQ,
+        file_format=JSON,
+        arg=ArgColumn,
+        file=SEQ2SEQ_DIR / "example.json",
+    )
+)
+Options.register(
+    Option(
+        display_name=JSONL.name,
+        task_id=SEQ2SEQ,
+        file_format=JSONL,
+        arg=ArgColumn,
+        file=SEQ2SEQ_DIR / "example.jsonl",
+    )
+)
+Options.register(
+    Option(
+        display_name=Excel.name,
+        task_id=SEQ2SEQ,
+        file_format=Excel,
+        arg=ArgColumn,
+        file=SEQ2SEQ_DIR / "example.csv",
+    )
+)
 
-INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
-Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextFile, OptionEncoding, TASK_AGNOSTIC_DIR / "text_files.txt")
-Options.register(INTENT_DETECTION_AND_SLOT_FILLING, TextLine, OptionEncoding, TASK_AGNOSTIC_DIR / "text_lines.txt")
-Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, OptionNone, INTENT_DETECTION_DIR / "example.jsonl")
+# Intent detection
+Options.register(
+    Option(
+        display_name=JSONL.name,
+        task_id=INTENT_DETECTION_AND_SLOT_FILLING,
+        file_format=JSONL,
+        arg=ArgNone,
+        file=INTENT_DETECTION_DIR / "example.jsonl",
+    )
+)
 
-IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
-Options.register(IMAGE_CLASSIFICATION, ImageFile, OptionNone, IMAGE_CLASSIFICATION_DIR / "image_files.txt")
+# Image Classification
+Options.register(
+    Option(
+        display_name=ImageFile.name,
+        task_id=IMAGE_CLASSIFICATION,
+        file_format=ImageFile,
+        arg=ArgNone,
+        file=IMAGE_CLASSIFICATION_DIR / "image_files.txt",
+    )
+)
 
-SPEECH_TO_TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
-Options.register(SPEECH2TEXT, AudioFile, OptionNone, SPEECH_TO_TEXT_DIR / "audio_files.txt")
+# Speech to Text
+Options.register(
+    Option(
+        display_name=AudioFile.name,
+        task_id=SPEECH2TEXT,
+        file_format=AudioFile,
+        arg=ArgNone,
+        file=SPEECH_TO_TEXT_DIR / "audio_files.txt",
+    )
+)
diff --git a/backend/data_import/tests/test_tasks.py b/backend/data_import/tests/test_tasks.py
index f5f6038e..54c71434 100644
--- a/backend/data_import/tests/test_tasks.py
+++ b/backend/data_import/tests/test_tasks.py
@@ -8,6 +8,7 @@ from django_drf_filepond.models import StoredUpload, TemporaryUpload
 from django_drf_filepond.utils import _get_file_id
 
 from data_import.celery_tasks import import_dataset
+from data_import.pipeline.catalog import RELATION_EXTRACTION
 from examples.models import Example
 from label_types.models import SpanType
 from labels.models import Category, Span
@@ -40,7 +41,7 @@ class TestImportData(TestCase):
         except StoredUpload.DoesNotExist:
             pass
 
-    def import_dataset(self, filename, file_format, kwargs=None):
+    def import_dataset(self, filename, file_format, task, kwargs=None):
         file_path = str(self.data_path / filename)
         TemporaryUpload.objects.create(
             upload_id=self.upload_id,
@@ -51,7 +52,7 @@ class TestImportData(TestCase):
         )
         upload_ids = [self.upload_id]
         kwargs = kwargs or {}
-        return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, **kwargs)
+        return import_dataset(self.user.id, self.project.item.id, file_format, upload_ids, task, **kwargs)
 
 
 @override_settings(MAX_UPLOAD_SIZE=0)
@@ -62,7 +63,7 @@ class TestMaxFileSize(TestImportData):
         filename = "text_classification/example.jsonl"
         file_format = "JSONL"
         kwargs = {"column_label": "labels"}
-        response = self.import_dataset(filename, file_format, kwargs)
+        response = self.import_dataset(filename, file_format, self.task, kwargs)
         self.assertEqual(len(response["error"]), 1)
         self.assertIn("maximum file size", response["error"][0]["message"])
 
@@ -89,80 +90,80 @@ class TestImportClassificationData(TestImportData):
         file_format = "JSONL"
         kwargs = {"column_label": "labels"}
         dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
-        self.import_dataset(filename, file_format, kwargs)
+        self.import_dataset(filename, file_format, self.task, kwargs)
         self.assert_examples(dataset)
 
     def test_csv(self):
         filename = "text_classification/example.csv"
         file_format = "CSV"
         dataset = [("exampleA", ["positive"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_csv_out_of_order_columns(self):
         filename = "text_classification/example_out_of_order_columns.csv"
         file_format = "CSV"
         dataset = [("exampleA", ["positive"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_fasttext(self):
         filename = "text_classification/example_fasttext.txt"
         file_format = "fastText"
         dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_excel(self):
         filename = "text_classification/example.xlsx"
         file_format = "Excel"
         dataset = [("exampleA", ["positive"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_json(self):
         filename = "text_classification/example.json"
         file_format = "JSON"
         dataset = [("exampleA", ["positive"]), ("exampleB", ["positive", "negative"]), ("exampleC", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_textfile(self):
         filename = "example.txt"
         file_format = "TextFile"
         dataset = [("exampleA\nexampleB\n\nexampleC\n", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_textline(self):
         filename = "example.txt"
         file_format = "TextLine"
         dataset = [("exampleA", []), ("exampleB", []), ("exampleC", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_wrong_jsonl(self):
         filename = "text_classification/example.json"
         file_format = "JSONL"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assert_parse_error(response)
 
     def test_wrong_json(self):
         filename = "text_classification/example.jsonl"
         file_format = "JSON"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assert_parse_error(response)
 
     def test_wrong_excel(self):
         filename = "text_classification/example.jsonl"
         file_format = "Excel"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assert_parse_error(response)
 
     def test_wrong_csv(self):
         filename = "text_classification/example.jsonl"
         file_format = "CSV"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assert_parse_error(response)
 
 
@@ -186,26 +187,26 @@ class TestImportSequenceLabelingData(TestImportData):
         filename = "sequence_labeling/example.jsonl"
         file_format = "JSONL"
         dataset = [("exampleA", [[0, 1, "LOC"]]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_conll(self):
         filename = "sequence_labeling/example.conll"
         file_format = "CoNLL"
         dataset = [("JAPAN GET", [[0, 5, "LOC"]]), ("Nadim Ladki", [[0, 11, "PER"]])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_wrong_conll(self):
         filename = "sequence_labeling/example.jsonl"
         file_format = "CoNLL"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assert_parse_error(response)
 
     def test_jsonl_with_overlapping(self):
         filename = "sequence_labeling/example_overlapping.jsonl"
         file_format = "JSONL"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assertEqual(len(response["error"]), 0)
 
 
@@ -241,7 +242,7 @@ class TestImportRelationExtractionData(TestImportData):
                 [[0, 6, "ORG"], [22, 39, "DATE"], [44, 54, "PERSON"], [59, 70, "PERSON"]],
             ),
         ]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, RELATION_EXTRACTION)
         self.assert_examples(dataset)
 
 
@@ -259,21 +260,21 @@ class TestImportSeq2seqData(TestImportData):
         filename = "seq2seq/example.jsonl"
         file_format = "JSONL"
         dataset = [("exampleA", ["label1"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_json(self):
         filename = "seq2seq/example.json"
         file_format = "JSON"
         dataset = [("exampleA", ["label1"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
     def test_csv(self):
         filename = "seq2seq/example.csv"
         file_format = "CSV"
         dataset = [("exampleA", ["label1"]), ("exampleB", [])]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
 
@@ -298,7 +299,7 @@ class TestImportIntentDetectionAndSlotFillingData(TestImportData):
             ("exampleC", {"cats": [], "entities": [(0, 1, "LOC")]}),
             ("exampleD", {"cats": [], "entities": []}),
         ]
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assert_examples(dataset)
 
 
@@ -308,7 +309,7 @@ class TestImportImageClassificationData(TestImportData):
     def test_example(self):
         filename = "images/1500x500.jpeg"
         file_format = "ImageFile"
-        self.import_dataset(filename, file_format)
+        self.import_dataset(filename, file_format, self.task)
         self.assertEqual(Example.objects.count(), 1)
 
 
@@ -319,6 +320,6 @@ class TestFileTypeChecking(TestImportData):
     def test_example(self):
         filename = "images/example.ico"
         file_format = "ImageFile"
-        response = self.import_dataset(filename, file_format)
+        response = self.import_dataset(filename, file_format, self.task)
         self.assertEqual(len(response["error"]), 1)
         self.assertIn("unexpected", response["error"][0]["message"])
diff --git a/backend/data_import/views.py b/backend/data_import/views.py
index bb3cfdf6..25639753 100644
--- a/backend/data_import/views.py
+++ b/backend/data_import/views.py
@@ -16,7 +16,8 @@ class DatasetCatalog(APIView):
     def get(self, request, *args, **kwargs):
         project_id = kwargs["project_id"]
         project = get_object_or_404(Project, pk=project_id)
-        options = Options.filter_by_task(project.project_type)
+        use_relation = getattr(project, "use_relation", False)
+        options = Options.filter_by_task(project.project_type, use_relation)
         return Response(data=options, status=status.HTTP_200_OK)
 
 
@@ -26,11 +27,13 @@ class DatasetImportAPI(APIView):
     def post(self, request, *args, **kwargs):
         upload_ids = request.data.pop("uploadIds")
         file_format = request.data.pop("format")
-        task = import_dataset.delay(
+        task = request.data.pop("task")
+        celery_task = import_dataset.delay(
             user_id=request.user.id,
             project_id=self.kwargs["project_id"],
             file_format=file_format,
             upload_ids=upload_ids,
+            task=task,
             **request.data,
         )
-        return Response({"task_id": task.task_id})
+        return Response({"task_id": celery_task.task_id})
diff --git a/frontend/domain/models/upload/catalog.ts b/frontend/domain/models/upload/catalog.ts
index 0f0c7718..f6497bfa 100644
--- a/frontend/domain/models/upload/catalog.ts
+++ b/frontend/domain/models/upload/catalog.ts
@@ -5,6 +5,12 @@ export class Catalog {
   example: string;
   properties: object;
 
+  @Expose({ name: 'task_id' })
+  taskId: string;
+
+  @Expose({ name: 'display_name' })
+  displayName: string;
+
   @Expose({ name: 'accept_types' })
   acceptTypes: string;
 }
diff --git a/frontend/domain/models/upload/parseRepository.ts b/frontend/domain/models/upload/parseRepository.ts
index 703caf56..fcd5d94a 100644
--- a/frontend/domain/models/upload/parseRepository.ts
+++ b/frontend/domain/models/upload/parseRepository.ts
@@ -1,5 +1,5 @@
 export interface ParseRepository {
-  analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string>
+  analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string>
 
   revert(serverId: string): void
 }
diff --git a/frontend/pages/projects/_id/dataset/import.vue b/frontend/pages/projects/_id/dataset/import.vue
index d48aa37e..adf388c7 100644
--- a/frontend/pages/projects/_id/dataset/import.vue
+++ b/frontend/pages/projects/_id/dataset/import.vue
@@ -13,7 +13,7 @@
       <v-select
         v-model="selected"
         :items="catalog"
-        item-text="name"
+        item-text="displayName"
         label="File format"
         outlined
       />
@@ -156,7 +156,7 @@ export default {
       return this.uploadedFiles.length === 0 || this.taskId !== null || !this.valid
     },
     properties() {
-      const item = this.catalog.find(item => item.name === this.selected)
+      const item = this.catalog.find(item => item.displayName === this.selected)
       if (item) {
         return item.properties
       } else {
@@ -174,7 +174,7 @@ export default {
       return Object.fromEntries(textFields)
     },
     acceptedFileTypes() {
-      const item = this.catalog.find(item => item.name === this.selected)
+      const item = this.catalog.find(item => item.displayName === this.selected)
       if (item) {
         return item.acceptTypes
       } else {
@@ -182,7 +182,7 @@ export default {
       }
     },
     example() {
-      const item = this.catalog.find(item => item.name === this.selected)
+      const item = this.catalog.find(item => item.displayName === this.selected)
       if (item) {
         const column_data = 'column_data'
         const column_label = 'column_label'
@@ -201,7 +201,7 @@ export default {
 
   watch: {
     selected() {
-      const item = this.catalog.find(item => item.name === this.selected)
+      const item = this.catalog.find(item => item.displayName === this.selected)
       for (const [key, value] of Object.entries(item.properties)) {
         this.option[key] = value.default
       }
@@ -239,9 +239,11 @@ export default {
     },
     async importDataset() {
       this.isImporting = true
+      const item = this.catalog.find(item => item.displayName === this.selected)
       this.taskId = await this.$services.parse.analyze(
         this.$route.params.id,
-        this.selected,
+        item.name,
+        item.taskId,
         this.uploadedFiles.map(item => item.serverId),
         this.option
       )
diff --git a/frontend/repositories/upload/apiParseRepository.ts b/frontend/repositories/upload/apiParseRepository.ts
index b7ba25c8..d60ddcad 100644
--- a/frontend/repositories/upload/apiParseRepository.ts
+++ b/frontend/repositories/upload/apiParseRepository.ts
@@ -6,10 +6,11 @@ export class APIParseRepository implements ParseRepository {
     private readonly request = ApiService
   ) {}
 
-  async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string> {
+  async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string> {
     const url = `/projects/${projectId}/upload`
     const data = {
       format,
+      task,
       uploadIds,
       ...option
     }
diff --git a/frontend/services/application/upload/catalogData.ts b/frontend/services/application/upload/catalogData.ts
index 6d7580b4..781bbc95 100644
--- a/frontend/services/application/upload/catalogData.ts
+++ b/frontend/services/application/upload/catalogData.ts
@@ -6,11 +6,15 @@ export class CatalogDTO {
   example: string
   acceptTypes: string
   properties: object
+  taskId: string
+  displayName: string
 
   constructor(item: Catalog) {
     this.name = item.name
     this.example = item.example
     this.acceptTypes = item.acceptTypes
     this.properties = item.properties
+    this.displayName = item.displayName
+    this.taskId = item.taskId
   }
 }
diff --git a/frontend/services/application/upload/parseApplicationService.ts b/frontend/services/application/upload/parseApplicationService.ts
index 1998dbb1..888f012d 100644
--- a/frontend/services/application/upload/parseApplicationService.ts
+++ b/frontend/services/application/upload/parseApplicationService.ts
@@ -5,8 +5,8 @@ export class ParseApplicationService {
     private readonly repository: ParseRepository
   ) {}
 
-  public async analyze(projectId: string, format: string, uploadIds: number[], option: object): Promise<string> {
-    const item = await this.repository.analyze(projectId, format, uploadIds, option)
+  public async analyze(projectId: string, format: string, task: string, uploadIds: number[], option: object): Promise<string> {
+    const item = await this.repository.analyze(projectId, format, task, uploadIds, option)
     return item
   }