From fd7bc43947b6763974d17029c9a6113eabcd9f3f Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:00:28 +0900
Subject: [PATCH 1/6] Move data import's code and data to data_import app

---
 Pipfile                                       |   2 +-
 backend/api/celery_tasks.py                   |  19 ---------------
 backend/api/tests/api/utils.py                |   2 +-
 backend/api/urls.py                           |  15 ++----------
 backend/app/settings.py                       |   1 +
 backend/app/urls.py                           |   1 +
 .../tests/upload => data_import}/__init__.py  |   0
 .../__init__.py => data_import/admin.py}      |   0
 backend/data_import/apps.py                   |   6 +++++
 backend/data_import/celery_tasks.py           |  23 ++++++++++++++++++
 backend/data_import/migrations/__init__.py    |   0
 backend/data_import/models.py                 |   0
 backend/data_import/pipeline/__init__.py      |   0
 .../pipeline}/builders.py                     |   4 +--
 .../pipeline}/catalog.py                      |   6 ++---
 .../pipeline}/cleaners.py                     |   4 +--
 .../upload => data_import/pipeline}/data.py   |   2 +-
 .../pipeline}/examples.py                     |   0
 .../pipeline/exceptions.py}                   |   0
 .../pipeline}/factories.py                    |  22 ++++++++---------
 .../pipeline/labels.py}                       |   8 +++---
 .../pipeline}/parsers.py                      |   2 +-
 .../pipeline}/readers.py                      |   4 +--
 .../pipeline}/writers.py                      |   4 +--
 backend/data_import/tests/__init__.py         |   0
 .../tests/data/example.txt                    |   0
 .../tests/data/example.utf16.csv              | Bin
 .../tests/data/intent/example.jsonl           |   0
 .../tests/data/label/invalid_labels.json      |   0
 .../tests/data/label/valid_labels.json        |   0
 .../tests/data/seq2seq/example.csv            |   0
 .../tests/data/seq2seq/example.json           |   0
 .../tests/data/seq2seq/example.jsonl          |   0
 .../data/sequence_labeling/example.conll      |   0
 .../data/sequence_labeling/example.jsonl      |   0
 .../example_overlapping.jsonl                 |   0
 .../sequence_labeling/labeling.invalid.conll  |   0
 .../sequence_labeling/labeling.trailing.conll |   0
 .../data/text_classification/example.csv      |   0
 .../text_classification/example.invalid.2.csv |   0
 .../example.invalid.2.xlsx                    | Bin
 .../data/text_classification/example.json     |   0
 .../data/text_classification/example.jsonl    |   0
 .../data/text_classification/example.xlsx     | Bin
 .../text_classification/example_fasttext.txt  |   0
 .../example_one_column_no_header.xlsx         | Bin
 .../example_out_of_order_columns.csv          |   0
 .../tests}/test_builder.py                    |   8 +++---
 .../tests}/test_parser.py                     |   2 +-
 .../{api => data_import}/tests/test_tasks.py  |  12 ++++-----
 .../tests/test_views.py}                      |   4 +--
 backend/data_import/urls.py                   |  17 +++++++++++++
 .../views.py}                                 |   8 +++---
 53 files changed, 97 insertions(+), 79 deletions(-)
 rename backend/{api/tests/upload => data_import}/__init__.py (100%)
 rename backend/{api/views/upload/__init__.py => data_import/admin.py} (100%)
 create mode 100644 backend/data_import/apps.py
 create mode 100644 backend/data_import/celery_tasks.py
 create mode 100644 backend/data_import/migrations/__init__.py
 create mode 100644 backend/data_import/models.py
 create mode 100644 backend/data_import/pipeline/__init__.py
 rename backend/{api/views/upload => data_import/pipeline}/builders.py (97%)
 rename backend/{api/views/upload => data_import/pipeline}/catalog.py (96%)
 rename backend/{api/views/upload => data_import/pipeline}/cleaners.py (94%)
 rename backend/{api/views/upload => data_import/pipeline}/data.py (96%)
 rename backend/{api/views/upload => data_import/pipeline}/examples.py (100%)
 rename backend/{api/views/upload/exception.py => data_import/pipeline/exceptions.py} (100%)
 rename backend/{api/views/upload => data_import/pipeline}/factories.py (81%)
 rename backend/{api/views/upload/label.py => data_import/pipeline/labels.py} (94%)
 rename backend/{api/views/upload => data_import/pipeline}/parsers.py (99%)
 rename backend/{api/views/upload => data_import/pipeline}/readers.py (98%)
 rename backend/{api/views/upload => data_import/pipeline}/writers.py (97%)
 create mode 100644 backend/data_import/tests/__init__.py
 rename backend/{api => data_import}/tests/data/example.txt (100%)
 rename backend/{api => data_import}/tests/data/example.utf16.csv (100%)
 rename backend/{api => data_import}/tests/data/intent/example.jsonl (100%)
 rename backend/{api => data_import}/tests/data/label/invalid_labels.json (100%)
 rename backend/{api => data_import}/tests/data/label/valid_labels.json (100%)
 rename backend/{api => data_import}/tests/data/seq2seq/example.csv (100%)
 rename backend/{api => data_import}/tests/data/seq2seq/example.json (100%)
 rename backend/{api => data_import}/tests/data/seq2seq/example.jsonl (100%)
 rename backend/{api => data_import}/tests/data/sequence_labeling/example.conll (100%)
 rename backend/{api => data_import}/tests/data/sequence_labeling/example.jsonl (100%)
 rename backend/{api => data_import}/tests/data/sequence_labeling/example_overlapping.jsonl (100%)
 rename backend/{api => data_import}/tests/data/sequence_labeling/labeling.invalid.conll (100%)
 rename backend/{api => data_import}/tests/data/sequence_labeling/labeling.trailing.conll (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.csv (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.invalid.2.csv (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.invalid.2.xlsx (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.json (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.jsonl (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example.xlsx (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example_fasttext.txt (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example_one_column_no_header.xlsx (100%)
 rename backend/{api => data_import}/tests/data/text_classification/example_out_of_order_columns.csv (100%)
 rename backend/{api/tests/upload => data_import/tests}/test_builder.py (94%)
 rename backend/{api/tests/upload => data_import/tests}/test_parser.py (98%)
 rename backend/{api => data_import}/tests/test_tasks.py (96%)
 rename backend/{api/tests/api/test_upload.py => data_import/tests/test_views.py} (89%)
 create mode 100644 backend/data_import/urls.py
 rename backend/{api/views/import_dataset.py => data_import/views.py} (93%)

diff --git a/Pipfile b/Pipfile
index e69f61d3..6bbecc10 100644
--- a/Pipfile
+++ b/Pipfile
@@ -60,6 +60,6 @@ python_version = "3.8"
 isort = "isort api -c --skip migrations"
 flake8 = "flake8 --filename \"*.py\" --extend-exclude \"server,api/migrations,api/views/__init__.py,authentification,api/apps.py\""
 wait_for_db = "python manage.py wait_for_db"
-test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests"
+test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests"
 migrate = "python manage.py migrate"
 collectstatic = "python manage.py collectstatic --noinput"
diff --git a/backend/api/celery_tasks.py b/backend/api/celery_tasks.py
index 9be04894..ce6bb997 100644
--- a/backend/api/celery_tasks.py
+++ b/backend/api/celery_tasks.py
@@ -1,34 +1,15 @@
 from celery import shared_task
 from celery.utils.log import get_task_logger
 from django.conf import settings
-from django.contrib.auth import get_user_model
 from django.shortcuts import get_object_or_404
 
 from .models import Project
 from .views.download.factory import create_repository, create_writer
 from .views.download.service import ExportApplicationService
-from .views.upload.factories import (create_bulder, create_cleaner,
-                                     create_parser)
-from .views.upload.readers import Reader
-from .views.upload.writers import BulkWriter
 
 logger = get_task_logger(__name__)
 
 
-@shared_task
-def ingest_data(user_id, project_id, filenames, format: str, **kwargs):
-    project = get_object_or_404(Project, pk=project_id)
-    user = get_object_or_404(get_user_model(), pk=user_id)
-
-    parser = create_parser(format, **kwargs)
-    builder = create_bulder(project, **kwargs)
-    reader = Reader(filenames=filenames, parser=parser, builder=builder)
-    cleaner = create_cleaner(project)
-    writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE)
-    writer.save(reader, project, user, cleaner)
-    return {'error': writer.errors}
-
-
 @shared_task
 def export_dataset(project_id, format: str, export_approved=False):
     project = get_object_or_404(Project, pk=project_id)
diff --git a/backend/api/tests/api/utils.py b/backend/api/tests/api/utils.py
index 4ecf5bf3..01138075 100644
--- a/backend/api/tests/api/utils.py
+++ b/backend/api/tests/api/utils.py
@@ -15,7 +15,7 @@ from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
                        SEQUENCE_LABELING, SPEECH2TEXT)
 
-DATA_DIR = os.path.join(os.path.dirname(__file__), '../data')
+DATA_DIR = os.path.join(os.path.dirname(__file__), '../../../data_import/tests/data')
 
 
 ProjectData = namedtuple('ProjectData', ['item', 'users'])
diff --git a/backend/api/urls.py b/backend/api/urls.py
index 3e5b4799..2520249f 100644
--- a/backend/api/urls.py
+++ b/backend/api/urls.py
@@ -1,21 +1,11 @@
 from django.urls import include, path
 
 from .views import (annotation, auto_labeling, comment, example, example_state,
-                    export_dataset, health, import_dataset, import_export,
-                    label, project, tag, task)
+                    export_dataset, health, import_export, label, project, tag,
+                    task)
 from .views.tasks import category, relation, span, text
 
 urlpatterns_project = [
-    path(
-        route='upload',
-        view=import_dataset.UploadAPI.as_view(),
-        name='upload'
-    ),
-    path(
-        route='catalog',
-        view=import_dataset.DatasetCatalog.as_view(),
-        name='catalog'
-    ),
     path(
         route='download-format',
         view=export_dataset.DownloadDatasetCatalog.as_view(),
@@ -214,7 +204,6 @@ urlpatterns = [
         view=health.Health.as_view(),
         name='health'
     ),
-    path('fp/', include('django_drf_filepond.urls')),
     path(
         route='features',
         view=import_export.Features.as_view(),
diff --git a/backend/app/settings.py b/backend/app/settings.py
index 3856c681..ba9a643e 100644
--- a/backend/app/settings.py
+++ b/backend/app/settings.py
@@ -56,6 +56,7 @@ INSTALLED_APPS = [
     'members.apps.MembersConfig',
     'metrics.apps.MetricsConfig',
     'users.apps.UsersConfig',
+    'data_import.apps.DataImportConfig',
     'rest_framework',
     'rest_framework.authtoken',
     'django_filters',
diff --git a/backend/app/urls.py b/backend/app/urls.py
index 6a9a2961..8195845e 100644
--- a/backend/app/urls.py
+++ b/backend/app/urls.py
@@ -43,6 +43,7 @@ urlpatterns += [
     path('v1/', include('api.urls')),
     path('v1/', include('roles.urls')),
     path('v1/', include('users.urls')),
+    path('v1/', include('data_import.urls')),
     path('v1/projects/<int:project_id>/', include('members.urls')),
     path('v1/projects/<int:project_id>/metrics/', include('metrics.urls')),
     path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
diff --git a/backend/api/tests/upload/__init__.py b/backend/data_import/__init__.py
similarity index 100%
rename from backend/api/tests/upload/__init__.py
rename to backend/data_import/__init__.py
diff --git a/backend/api/views/upload/__init__.py b/backend/data_import/admin.py
similarity index 100%
rename from backend/api/views/upload/__init__.py
rename to backend/data_import/admin.py
diff --git a/backend/data_import/apps.py b/backend/data_import/apps.py
new file mode 100644
index 00000000..23260a7e
--- /dev/null
+++ b/backend/data_import/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class DataImportConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'data_import'
diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py
new file mode 100644
index 00000000..a2d9cfc0
--- /dev/null
+++ b/backend/data_import/celery_tasks.py
@@ -0,0 +1,23 @@
+from celery import shared_task
+from django.conf import settings
+from django.contrib.auth import get_user_model
+from django.shortcuts import get_object_or_404
+
+from api.models import Project
+from .pipeline.factories import create_parser, create_bulder, create_cleaner
+from .pipeline.readers import Reader
+from .pipeline.writers import BulkWriter
+
+
+@shared_task
+def ingest_data(user_id, project_id, filenames, format: str, **kwargs):
+    project = get_object_or_404(Project, pk=project_id)
+    user = get_object_or_404(get_user_model(), pk=user_id)
+
+    parser = create_parser(format, **kwargs)
+    builder = create_bulder(project, **kwargs)
+    reader = Reader(filenames=filenames, parser=parser, builder=builder)
+    cleaner = create_cleaner(project)
+    writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE)
+    writer.save(reader, project, user, cleaner)
+    return {'error': writer.errors}
diff --git a/backend/data_import/migrations/__init__.py b/backend/data_import/migrations/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/data_import/models.py b/backend/data_import/models.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/data_import/pipeline/__init__.py b/backend/data_import/pipeline/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/views/upload/builders.py b/backend/data_import/pipeline/builders.py
similarity index 97%
rename from backend/api/views/upload/builders.py
rename to backend/data_import/pipeline/builders.py
index c8829b15..dad1de98 100644
--- a/backend/api/views/upload/builders.py
+++ b/backend/data_import/pipeline/builders.py
@@ -5,8 +5,8 @@ from typing import Any, Dict, List, Optional, Type, TypeVar
 from pydantic import ValidationError
 
 from .data import BaseData
-from .exception import FileParseException
-from .label import Label
+from .exceptions import FileParseException
+from .labels import Label
 from .readers import Builder, Record
 
 logger = getLogger(__name__)
diff --git a/backend/api/views/upload/catalog.py b/backend/data_import/pipeline/catalog.py
similarity index 96%
rename from backend/api/views/upload/catalog.py
rename to backend/data_import/pipeline/catalog.py
index 99f23b22..acc69c59 100644
--- a/backend/api/views/upload/catalog.py
+++ b/backend/data_import/pipeline/catalog.py
@@ -4,9 +4,9 @@ from typing import Dict, List, Type
 from pydantic import BaseModel
 from typing_extensions import Literal
 
-from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
-                       INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                       SEQUENCE_LABELING, SPEECH2TEXT)
+from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, SPEECH2TEXT)
 from . import examples
 
 encodings = Literal[
diff --git a/backend/api/views/upload/cleaners.py b/backend/data_import/pipeline/cleaners.py
similarity index 94%
rename from backend/api/views/upload/cleaners.py
rename to backend/data_import/pipeline/cleaners.py
index 093a5e8a..73d6a6bb 100644
--- a/backend/api/views/upload/cleaners.py
+++ b/backend/data_import/pipeline/cleaners.py
@@ -1,7 +1,7 @@
 from typing import List
 
-from ...models import Project
-from .label import CategoryLabel, Label, SpanLabel
+from api.models import Project
+from .labels import CategoryLabel, Label, SpanLabel
 
 
 class Cleaner:
diff --git a/backend/api/views/upload/data.py b/backend/data_import/pipeline/data.py
similarity index 96%
rename from backend/api/views/upload/data.py
rename to backend/data_import/pipeline/data.py
index 5a069950..98a36ac7 100644
--- a/backend/api/views/upload/data.py
+++ b/backend/data_import/pipeline/data.py
@@ -4,7 +4,7 @@ from typing import Any, Dict
 
 from pydantic import BaseModel, validator
 
-from ...models import Example, Project
+from api.models import Example, Project
 
 
 class BaseData(BaseModel, abc.ABC):
diff --git a/backend/api/views/upload/examples.py b/backend/data_import/pipeline/examples.py
similarity index 100%
rename from backend/api/views/upload/examples.py
rename to backend/data_import/pipeline/examples.py
diff --git a/backend/api/views/upload/exception.py b/backend/data_import/pipeline/exceptions.py
similarity index 100%
rename from backend/api/views/upload/exception.py
rename to backend/data_import/pipeline/exceptions.py
diff --git a/backend/api/views/upload/factories.py b/backend/data_import/pipeline/factories.py
similarity index 81%
rename from backend/api/views/upload/factories.py
rename to backend/data_import/pipeline/factories.py
index a022f624..ab61eae2 100644
--- a/backend/api/views/upload/factories.py
+++ b/backend/data_import/pipeline/factories.py
@@ -1,7 +1,7 @@
-from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
-                       INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                       SEQUENCE_LABELING, SPEECH2TEXT)
-from . import builders, catalog, cleaners, data, label, parsers, readers
+from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, SPEECH2TEXT)
+from . import builders, catalog, cleaners, data, labels, parsers, readers
 
 
 def get_data_class(project_type: str):
@@ -37,11 +37,11 @@ def create_parser(file_format: str, **kwargs):
 
 def get_label_class(project_type: str):
     mapping = {
-        DOCUMENT_CLASSIFICATION: label.CategoryLabel,
-        SEQUENCE_LABELING: label.SpanLabel,
-        SEQ2SEQ: label.TextLabel,
-        IMAGE_CLASSIFICATION: label.CategoryLabel,
-        SPEECH2TEXT: label.TextLabel,
+        DOCUMENT_CLASSIFICATION: labels.CategoryLabel,
+        SEQUENCE_LABELING: labels.SpanLabel,
+        SEQ2SEQ: labels.TextLabel,
+        IMAGE_CLASSIFICATION: labels.CategoryLabel,
+        SPEECH2TEXT: labels.TextLabel,
     }
     if project_type not in mapping:
         ValueError(f'Invalid project type: {project_type}')
@@ -71,11 +71,11 @@ def create_bulder(project, **kwargs):
         label_columns = [
             builders.LabelColumn(
                 name='cats',
-                value_class=label.CategoryLabel
+                value_class=labels.CategoryLabel
             ),
             builders.LabelColumn(
                 name='entities',
-                value_class=label.SpanLabel
+                value_class=labels.SpanLabel
             )
         ]
     else:
diff --git a/backend/api/views/upload/label.py b/backend/data_import/pipeline/labels.py
similarity index 94%
rename from backend/api/views/upload/label.py
rename to backend/data_import/pipeline/labels.py
index f6541920..667f537e 100644
--- a/backend/api/views/upload/label.py
+++ b/backend/data_import/pipeline/labels.py
@@ -3,10 +3,10 @@ from typing import Any, Dict, Optional, Union
 
 from pydantic import BaseModel, validator
 
-from ...models import Category, CategoryType
-from ...models import Label as LabelModel
-from ...models import Project, Span, SpanType
-from ...models import TextLabel as TL
+from api.models import Category, CategoryType
+from api.models import Label as LabelModel
+from api.models import Project, Span, SpanType
+from api.models import TextLabel as TL
 
 
 class Label(BaseModel, abc.ABC):
diff --git a/backend/api/views/upload/parsers.py b/backend/data_import/pipeline/parsers.py
similarity index 99%
rename from backend/api/views/upload/parsers.py
rename to backend/data_import/pipeline/parsers.py
index d1cd84b8..f2d43bf4 100644
--- a/backend/api/views/upload/parsers.py
+++ b/backend/data_import/pipeline/parsers.py
@@ -10,7 +10,7 @@ import pyexcel.exceptions
 from chardet import UniversalDetector
 from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens
 
-from .exception import FileParseException
+from .exceptions import FileParseException
 from .readers import DEFAULT_LABEL_COLUMN, DEFAULT_TEXT_COLUMN, Parser
 
 DEFAULT_ENCODING = 'Auto'
diff --git a/backend/api/views/upload/readers.py b/backend/data_import/pipeline/readers.py
similarity index 98%
rename from backend/api/views/upload/readers.py
rename to backend/data_import/pipeline/readers.py
index bd87ee15..a1338442 100644
--- a/backend/api/views/upload/readers.py
+++ b/backend/data_import/pipeline/readers.py
@@ -4,8 +4,8 @@ from typing import Any, Dict, Iterator, List, Type
 
 from .cleaners import Cleaner
 from .data import BaseData
-from .exception import FileParseException
-from .label import Label
+from .exceptions import FileParseException
+from .labels import Label
 
 DEFAULT_TEXT_COLUMN = 'text'
 DEFAULT_LABEL_COLUMN = 'label'
diff --git a/backend/api/views/upload/writers.py b/backend/data_import/pipeline/writers.py
similarity index 97%
rename from backend/api/views/upload/writers.py
rename to backend/data_import/pipeline/writers.py
index e3157bea..a79bd442 100644
--- a/backend/api/views/upload/writers.py
+++ b/backend/data_import/pipeline/writers.py
@@ -5,8 +5,8 @@ from typing import Any, Dict, List
 
 from django.conf import settings
 
-from ...models import CategoryType, Example, Project, SpanType
-from .exception import FileParseException
+from api.models import CategoryType, Example, Project, SpanType
+from .exceptions import FileParseException
 from .readers import BaseReader
 
 
diff --git a/backend/data_import/tests/__init__.py b/backend/data_import/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/api/tests/data/example.txt b/backend/data_import/tests/data/example.txt
similarity index 100%
rename from backend/api/tests/data/example.txt
rename to backend/data_import/tests/data/example.txt
diff --git a/backend/api/tests/data/example.utf16.csv b/backend/data_import/tests/data/example.utf16.csv
similarity index 100%
rename from backend/api/tests/data/example.utf16.csv
rename to backend/data_import/tests/data/example.utf16.csv
diff --git a/backend/api/tests/data/intent/example.jsonl b/backend/data_import/tests/data/intent/example.jsonl
similarity index 100%
rename from backend/api/tests/data/intent/example.jsonl
rename to backend/data_import/tests/data/intent/example.jsonl
diff --git a/backend/api/tests/data/label/invalid_labels.json b/backend/data_import/tests/data/label/invalid_labels.json
similarity index 100%
rename from backend/api/tests/data/label/invalid_labels.json
rename to backend/data_import/tests/data/label/invalid_labels.json
diff --git a/backend/api/tests/data/label/valid_labels.json b/backend/data_import/tests/data/label/valid_labels.json
similarity index 100%
rename from backend/api/tests/data/label/valid_labels.json
rename to backend/data_import/tests/data/label/valid_labels.json
diff --git a/backend/api/tests/data/seq2seq/example.csv b/backend/data_import/tests/data/seq2seq/example.csv
similarity index 100%
rename from backend/api/tests/data/seq2seq/example.csv
rename to backend/data_import/tests/data/seq2seq/example.csv
diff --git a/backend/api/tests/data/seq2seq/example.json b/backend/data_import/tests/data/seq2seq/example.json
similarity index 100%
rename from backend/api/tests/data/seq2seq/example.json
rename to backend/data_import/tests/data/seq2seq/example.json
diff --git a/backend/api/tests/data/seq2seq/example.jsonl b/backend/data_import/tests/data/seq2seq/example.jsonl
similarity index 100%
rename from backend/api/tests/data/seq2seq/example.jsonl
rename to backend/data_import/tests/data/seq2seq/example.jsonl
diff --git a/backend/api/tests/data/sequence_labeling/example.conll b/backend/data_import/tests/data/sequence_labeling/example.conll
similarity index 100%
rename from backend/api/tests/data/sequence_labeling/example.conll
rename to backend/data_import/tests/data/sequence_labeling/example.conll
diff --git a/backend/api/tests/data/sequence_labeling/example.jsonl b/backend/data_import/tests/data/sequence_labeling/example.jsonl
similarity index 100%
rename from backend/api/tests/data/sequence_labeling/example.jsonl
rename to backend/data_import/tests/data/sequence_labeling/example.jsonl
diff --git a/backend/api/tests/data/sequence_labeling/example_overlapping.jsonl b/backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl
similarity index 100%
rename from backend/api/tests/data/sequence_labeling/example_overlapping.jsonl
rename to backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl
diff --git a/backend/api/tests/data/sequence_labeling/labeling.invalid.conll b/backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll
similarity index 100%
rename from backend/api/tests/data/sequence_labeling/labeling.invalid.conll
rename to backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll
diff --git a/backend/api/tests/data/sequence_labeling/labeling.trailing.conll b/backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll
similarity index 100%
rename from backend/api/tests/data/sequence_labeling/labeling.trailing.conll
rename to backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll
diff --git a/backend/api/tests/data/text_classification/example.csv b/backend/data_import/tests/data/text_classification/example.csv
similarity index 100%
rename from backend/api/tests/data/text_classification/example.csv
rename to backend/data_import/tests/data/text_classification/example.csv
diff --git a/backend/api/tests/data/text_classification/example.invalid.2.csv b/backend/data_import/tests/data/text_classification/example.invalid.2.csv
similarity index 100%
rename from backend/api/tests/data/text_classification/example.invalid.2.csv
rename to backend/data_import/tests/data/text_classification/example.invalid.2.csv
diff --git a/backend/api/tests/data/text_classification/example.invalid.2.xlsx b/backend/data_import/tests/data/text_classification/example.invalid.2.xlsx
similarity index 100%
rename from backend/api/tests/data/text_classification/example.invalid.2.xlsx
rename to backend/data_import/tests/data/text_classification/example.invalid.2.xlsx
diff --git a/backend/api/tests/data/text_classification/example.json b/backend/data_import/tests/data/text_classification/example.json
similarity index 100%
rename from backend/api/tests/data/text_classification/example.json
rename to backend/data_import/tests/data/text_classification/example.json
diff --git a/backend/api/tests/data/text_classification/example.jsonl b/backend/data_import/tests/data/text_classification/example.jsonl
similarity index 100%
rename from backend/api/tests/data/text_classification/example.jsonl
rename to backend/data_import/tests/data/text_classification/example.jsonl
diff --git a/backend/api/tests/data/text_classification/example.xlsx b/backend/data_import/tests/data/text_classification/example.xlsx
similarity index 100%
rename from backend/api/tests/data/text_classification/example.xlsx
rename to backend/data_import/tests/data/text_classification/example.xlsx
diff --git a/backend/api/tests/data/text_classification/example_fasttext.txt b/backend/data_import/tests/data/text_classification/example_fasttext.txt
similarity index 100%
rename from backend/api/tests/data/text_classification/example_fasttext.txt
rename to backend/data_import/tests/data/text_classification/example_fasttext.txt
diff --git a/backend/api/tests/data/text_classification/example_one_column_no_header.xlsx b/backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx
similarity index 100%
rename from backend/api/tests/data/text_classification/example_one_column_no_header.xlsx
rename to backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx
diff --git a/backend/api/tests/data/text_classification/example_out_of_order_columns.csv b/backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv
similarity index 100%
rename from backend/api/tests/data/text_classification/example_out_of_order_columns.csv
rename to backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv
diff --git a/backend/api/tests/upload/test_builder.py b/backend/data_import/tests/test_builder.py
similarity index 94%
rename from backend/api/tests/upload/test_builder.py
rename to backend/data_import/tests/test_builder.py
index fe44ff74..b444cff5 100644
--- a/backend/api/tests/upload/test_builder.py
+++ b/backend/data_import/tests/test_builder.py
@@ -1,10 +1,10 @@
 import unittest
 from typing import List
 
-from ...views.upload import builders
-from ...views.upload.data import TextData
-from ...views.upload.exception import FileParseException
-from ...views.upload.label import CategoryLabel, SpanLabel
+from data_import.pipeline import builders
+from data_import.pipeline.data import TextData
+from data_import.pipeline.exceptions import FileParseException
+from data_import.pipeline.labels import CategoryLabel, SpanLabel
 
 
 class TestColumnBuilder(unittest.TestCase):
diff --git a/backend/api/tests/upload/test_parser.py b/backend/data_import/tests/test_parser.py
similarity index 98%
rename from backend/api/tests/upload/test_parser.py
rename to backend/data_import/tests/test_parser.py
index c73dc239..3ce1009b 100644
--- a/backend/api/tests/upload/test_parser.py
+++ b/backend/data_import/tests/test_parser.py
@@ -4,7 +4,7 @@ import shutil
 import tempfile
 import unittest
 
-from ...views.upload import parsers
+from data_import.pipeline import parsers
 
 
 class TestParser(unittest.TestCase):
diff --git a/backend/api/tests/test_tasks.py b/backend/data_import/tests/test_tasks.py
similarity index 96%
rename from backend/api/tests/test_tasks.py
rename to backend/data_import/tests/test_tasks.py
index 864fcbf2..7d65883d 100644
--- a/backend/api/tests/test_tasks.py
+++ b/backend/data_import/tests/test_tasks.py
@@ -2,12 +2,12 @@ import pathlib
 
 from django.test import TestCase
 
-from ..celery_tasks import ingest_data
-from ..models import (DOCUMENT_CLASSIFICATION,
-                      INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                      SEQUENCE_LABELING, Category, CategoryType, Example, Span,
-                      SpanType)
-from .api.utils import prepare_project
+from data_import.celery_tasks import ingest_data
+from api.models import (DOCUMENT_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, Category, CategoryType, Example, Span,
+                        SpanType)
+from api.tests.api.utils import prepare_project
 
 
 class TestIngestData(TestCase):
diff --git a/backend/api/tests/api/test_upload.py b/backend/data_import/tests/test_views.py
similarity index 89%
rename from backend/api/tests/api/test_upload.py
rename to backend/data_import/tests/test_views.py
index 3873bd70..6bc1b6d2 100644
--- a/backend/api/tests/api/test_upload.py
+++ b/backend/data_import/tests/test_views.py
@@ -2,8 +2,8 @@ from django.test import override_settings
 from rest_framework import status
 from rest_framework.reverse import reverse
 
-from ...models import DOCUMENT_CLASSIFICATION
-from .utils import CRUDMixin, create_default_roles, make_user, prepare_project
+from api.models import DOCUMENT_CLASSIFICATION
+from api.tests.api.utils import CRUDMixin, create_default_roles, make_user, prepare_project
 
 
 class TestFeatures(CRUDMixin):
diff --git a/backend/data_import/urls.py b/backend/data_import/urls.py
new file mode 100644
index 00000000..e6dc3fe5
--- /dev/null
+++ b/backend/data_import/urls.py
@@ -0,0 +1,17 @@
+from django.urls import include, path
+
+from .views import UploadAPI, DatasetCatalog
+
+urlpatterns = [
+    path('fp/', include('django_drf_filepond.urls')),
+    path(
+        route='projects/<int:project_id>/upload',
+        view=UploadAPI.as_view(),
+        name='upload'
+    ),
+    path(
+        route='projects/<int:project_id>/catalog',
+        view=DatasetCatalog.as_view(),
+        name='catalog'
+    ),
+]
diff --git a/backend/api/views/import_dataset.py b/backend/data_import/views.py
similarity index 93%
rename from backend/api/views/import_dataset.py
rename to backend/data_import/views.py
index 2aa441f0..04be66e1 100644
--- a/backend/api/views/import_dataset.py
+++ b/backend/data_import/views.py
@@ -8,11 +8,11 @@ from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from rest_framework.views import APIView
 
-from members.permissions import IsProjectAdmin
 
-from ..celery_tasks import ingest_data
-from ..models import Project
-from .upload.catalog import Options
+from api.models import Project
+from members.permissions import IsProjectAdmin
+from .celery_tasks import ingest_data
+from .pipeline.catalog import Options
 
 
 class DatasetCatalog(APIView):

From ca7c3a97f88e824386892f025092815613f737d5 Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:06:01 +0900
Subject: [PATCH 2/6] Remove Features API

---
 backend/api/urls.py                     |  8 +---
 backend/api/views/import_export.py      | 64 -------------------------
 backend/data_import/tests/test_views.py | 17 +------
 3 files changed, 2 insertions(+), 87 deletions(-)
 delete mode 100644 backend/api/views/import_export.py

diff --git a/backend/api/urls.py b/backend/api/urls.py
index 2520249f..9bc80085 100644
--- a/backend/api/urls.py
+++ b/backend/api/urls.py
@@ -1,8 +1,7 @@
 from django.urls import include, path
 
 from .views import (annotation, auto_labeling, comment, example, example_state,
-                    export_dataset, health, import_export, label, project, tag,
-                    task)
+                    export_dataset, health, label, project, tag, task)
 from .views.tasks import category, relation, span, text
 
 urlpatterns_project = [
@@ -204,11 +203,6 @@ urlpatterns = [
         view=health.Health.as_view(),
         name='health'
     ),
-    path(
-        route='features',
-        view=import_export.Features.as_view(),
-        name='features'
-    ),
     path(
         route='projects',
         view=project.ProjectList.as_view(),
diff --git a/backend/api/views/import_export.py b/backend/api/views/import_export.py
deleted file mode 100644
index c1198928..00000000
--- a/backend/api/views/import_export.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from django.conf import settings
-from rest_framework.permissions import IsAuthenticated
-from rest_framework.response import Response
-from rest_framework.views import APIView
-
-
-class Features(APIView):
-    permission_classes = (IsAuthenticated,)
-
-    def get(self, request, *args, **kwargs):
-        return Response({
-            'cloud_upload': bool(settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER),
-        })
-
-
-# class CloudUploadAPI(APIView):
-#     permission_classes = TextUploadAPI.permission_classes
-#
-#     def get(self, request, *args, **kwargs):
-#         try:
-#             project_id = request.query_params['project_id']
-#             file_format = request.query_params['upload_format']
-#             cloud_container = request.query_params['container']
-#             cloud_object = request.query_params['object']
-#         except KeyError as ex:
-#             raise ValidationError('query parameter {} is missing'.format(ex))
-#
-#         try:
-#             cloud_file = self.get_cloud_object_as_io(cloud_container, cloud_object)
-#         except ContainerDoesNotExistError:
-#             raise ValidationError('cloud container {} does not exist'.format(cloud_container))
-#         except ObjectDoesNotExistError:
-#             raise ValidationError('cloud object {} does not exist'.format(cloud_object))
-#
-#         TextUploadAPI.save_file(
-#             user=request.user,
-#             file=cloud_file,
-#             file_format=file_format,
-#             project_id=project_id,
-#         )
-#
-#         next_url = request.query_params.get('next')
-#
-#         if next_url == 'about:blank':
-#             return Response(data='', content_type='text/plain', status=status.HTTP_201_CREATED)
-#
-#         if next_url:
-#             return redirect(next_url)
-#
-#         return Response(status=status.HTTP_201_CREATED)
-#
-#     @classmethod
-#     def get_cloud_object_as_io(cls, container_name, object_name):
-#         provider = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER.lower()
-#         account = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_ACCOUNT
-#         key = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_SECRET_KEY
-#
-#         driver = get_driver(DriverType.STORAGE, provider)
-#         client = driver(account, key)
-#
-#         cloud_container = client.get_container(container_name)
-#         cloud_object = cloud_container.get_object(object_name)
-#
-#         return iterable_to_io(cloud_object.as_stream())
diff --git a/backend/data_import/tests/test_views.py b/backend/data_import/tests/test_views.py
index 6bc1b6d2..11a72650 100644
--- a/backend/data_import/tests/test_views.py
+++ b/backend/data_import/tests/test_views.py
@@ -1,23 +1,8 @@
-from django.test import override_settings
 from rest_framework import status
 from rest_framework.reverse import reverse
 
 from api.models import DOCUMENT_CLASSIFICATION
-from api.tests.api.utils import CRUDMixin, create_default_roles, make_user, prepare_project
-
-
-class TestFeatures(CRUDMixin):
-
-    @classmethod
-    def setUpTestData(cls):
-        create_default_roles()
-        cls.user = make_user()
-        cls.url = reverse('features')
-
-    @override_settings(CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER=None)
-    def test_no_cloud_upload(self):
-        response = self.assert_fetch(self.user, status.HTTP_200_OK)
-        self.assertFalse(response.json().get('cloud_upload'))
+from api.tests.api.utils import CRUDMixin, prepare_project
 
 
 class TestImportCatalog(CRUDMixin):

From 5565283ffdf88a2e37e8949e20c8ae1f9111e627 Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:14:02 +0900
Subject: [PATCH 3/6] Change words like upload, ingest to import

---
 frontend/i18n/en/projects/dataset.js         | 4 ++--
 frontend/pages/projects/_id/upload/index.vue | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/frontend/i18n/en/projects/dataset.js b/frontend/i18n/en/projects/dataset.js
index fb5b0b38..3d02569a 100644
--- a/frontend/i18n/en/projects/dataset.js
+++ b/frontend/i18n/en/projects/dataset.js
@@ -7,11 +7,11 @@ export default {
   metadata: 'Metadata',
   action: 'Action',
   annotate: 'Annotate',
-  importDataTitle: 'Upload Data',
+  importDataTitle: 'Import Dataset',
   importDataMessage1: 'Select a file format',
   importDataMessage2: 'Select file(s)',
   importDataPlaceholder: 'File input',
-  exportDataTitle: 'Export Data',
+  exportDataTitle: 'Export Dataset',
   exportDataMessage: 'Select a file format',
   exportDataMessage2: 'Select a file name',
   deleteDocumentsTitle: 'Delete Document',
diff --git a/frontend/pages/projects/_id/upload/index.vue b/frontend/pages/projects/_id/upload/index.vue
index f98da156..cfcb053e 100644
--- a/frontend/pages/projects/_id/upload/index.vue
+++ b/frontend/pages/projects/_id/upload/index.vue
@@ -85,9 +85,9 @@
       <v-btn
         class='text-capitalize me-2 primary'
         :disabled="isDisabled"
-        @click="injest"
+        @click="importDataset"
       >
-        Ingest
+        Import
       </v-btn>
     </v-card-actions>
   </v-card>
@@ -232,7 +232,7 @@ export default {
           this.$nextTick()
       }
     },
-    async injest() {
+    async importDataset() {
       this.taskId = await this.$services.parse.analyze(
         this.$route.params.id,
         this.selected,

From a090e94d0fda22eabe8f213a9b67b8275a93c95e Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:18:44 +0900
Subject: [PATCH 4/6] Change a parameter name from format to file_format

---
 backend/data_import/celery_tasks.py | 4 ++--
 backend/data_import/views.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py
index a2d9cfc0..52a8b226 100644
--- a/backend/data_import/celery_tasks.py
+++ b/backend/data_import/celery_tasks.py
@@ -10,11 +10,11 @@ from .pipeline.writers import BulkWriter
 
 
 @shared_task
-def ingest_data(user_id, project_id, filenames, format: str, **kwargs):
+def ingest_data(user_id, project_id, filenames, file_format: str, **kwargs):
     project = get_object_or_404(Project, pk=project_id)
     user = get_object_or_404(get_user_model(), pk=user_id)
 
-    parser = create_parser(format, **kwargs)
+    parser = create_parser(file_format, **kwargs)
     builder = create_bulder(project, **kwargs)
     reader = Reader(filenames=filenames, parser=parser, builder=builder)
     cleaner = create_cleaner(project)
diff --git a/backend/data_import/views.py b/backend/data_import/views.py
index 04be66e1..ade5ecc2 100644
--- a/backend/data_import/views.py
+++ b/backend/data_import/views.py
@@ -31,7 +31,7 @@ class UploadAPI(APIView):
     def post(self, request, *args, **kwargs):
         project_id = self.kwargs['project_id']
         upload_ids = request.data.pop('uploadIds')
-        format = request.data.pop('format')
+        file_format = request.data.pop('format')
 
         tus = [TemporaryUpload.objects.get(upload_id=upload_id) for upload_id in upload_ids]
         sus = [
@@ -46,7 +46,7 @@ class UploadAPI(APIView):
             user_id=request.user.id,
             project_id=project_id,
             filenames=filenames,
-            format=format,
+            file_format=file_format,
             **request.data
         )
         return Response({'task_id': task.task_id})

From f1fc54a316e3a1d54ff576f9c5f735754abd0017 Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:27:45 +0900
Subject: [PATCH 5/6] Rename UploadAPI to DatasetImportAPI

---
 backend/data_import/celery_tasks.py     |  2 +-
 backend/data_import/tests/test_tasks.py | 56 ++++++++++++-------------
 backend/data_import/urls.py             |  4 +-
 backend/data_import/views.py            |  6 +--
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py
index 52a8b226..18cc42b5 100644
--- a/backend/data_import/celery_tasks.py
+++ b/backend/data_import/celery_tasks.py
@@ -10,7 +10,7 @@ from .pipeline.writers import BulkWriter
 
 
 @shared_task
-def ingest_data(user_id, project_id, filenames, file_format: str, **kwargs):
+def import_dataset(user_id, project_id, filenames, file_format: str, **kwargs):
     project = get_object_or_404(Project, pk=project_id)
     user = get_object_or_404(get_user_model(), pk=user_id)
 
diff --git a/backend/data_import/tests/test_tasks.py b/backend/data_import/tests/test_tasks.py
index 7d65883d..566b0315 100644
--- a/backend/data_import/tests/test_tasks.py
+++ b/backend/data_import/tests/test_tasks.py
@@ -2,7 +2,7 @@ import pathlib
 
 from django.test import TestCase
 
-from data_import.celery_tasks import ingest_data
+from data_import.celery_tasks import import_dataset
 from api.models import (DOCUMENT_CLASSIFICATION,
                         INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
                         SEQUENCE_LABELING, Category, CategoryType, Example, Span,
@@ -10,7 +10,7 @@ from api.models import (DOCUMENT_CLASSIFICATION,
 from api.tests.api.utils import prepare_project
 
 
-class TestIngestData(TestCase):
+class TestImportData(TestCase):
     task = 'Any'
     annotation_class = Category
 
@@ -19,13 +19,13 @@ class TestIngestData(TestCase):
         self.user = self.project.users[0]
         self.data_path = pathlib.Path(__file__).parent / 'data'
 
-    def ingest_data(self, filename, file_format, kwargs=None):
+    def import_dataset(self, filename, file_format, kwargs=None):
         filenames = [str(self.data_path / filename)]
         kwargs = kwargs or {}
-        return ingest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
+        return import_dataset(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
 
 
-class TestIngestClassificationData(TestIngestData):
+class TestImportClassificationData(TestImportData):
     task = DOCUMENT_CLASSIFICATION
 
     def assert_examples(self, dataset):
@@ -50,7 +50,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleB', ['positive', 'negative']),
             ('exampleC', [])
         ]
-        self.ingest_data(filename, file_format, kwargs)
+        self.import_dataset(filename, file_format, kwargs)
         self.assert_examples(dataset)
 
     def test_csv(self):
@@ -60,7 +60,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleA', ['positive']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_csv_out_of_order_columns(self):
@@ -70,7 +70,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleA', ['positive']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_fasttext(self):
@@ -81,7 +81,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleB', ['positive', 'negative']),
             ('exampleC', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_excel(self):
@@ -91,7 +91,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleA', ['positive']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_json(self):
@@ -102,7 +102,7 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleB', ['positive', 'negative']),
             ('exampleC', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_textfile(self):
@@ -111,7 +111,7 @@ class TestIngestClassificationData(TestIngestData):
         dataset = [
             ('exampleA\nexampleB\n\nexampleC\n', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_textline(self):
@@ -122,35 +122,35 @@ class TestIngestClassificationData(TestIngestData):
             ('exampleB', []),
             ('exampleC', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_wrong_jsonl(self):
         filename = 'text_classification/example.json'
         file_format = 'JSONL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assert_parse_error(response)
 
     def test_wrong_json(self):
         filename = 'text_classification/example.jsonl'
         file_format = 'JSON'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assert_parse_error(response)
 
     def test_wrong_excel(self):
         filename = 'text_classification/example.jsonl'
         file_format = 'Excel'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assert_parse_error(response)
 
     def test_wrong_csv(self):
         filename = 'text_classification/example.jsonl'
         file_format = 'CSV'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assert_parse_error(response)
 
 
-class TestIngestSequenceLabelingData(TestIngestData):
+class TestImportSequenceLabelingData(TestImportData):
     task = SEQUENCE_LABELING
 
     def assert_examples(self, dataset):
@@ -173,7 +173,7 @@ class TestIngestSequenceLabelingData(TestIngestData):
             ('exampleA', [[0, 1, 'LOC']]),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_conll(self):
@@ -183,23 +183,23 @@ class TestIngestSequenceLabelingData(TestIngestData):
             ('JAPAN GET', [[0, 5, 'LOC']]),
             ('Nadim Ladki', [[0, 11, 'PER']])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_wrong_conll(self):
         filename = 'sequence_labeling/example.jsonl'
         file_format = 'CoNLL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assert_parse_error(response)
 
     def test_jsonl_with_overlapping(self):
         filename = 'sequence_labeling/example_overlapping.jsonl'
         file_format = 'JSONL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
         self.assertEqual(len(response['error']), 1)
 
 
-class TestIngestSeq2seqData(TestIngestData):
+class TestImportSeq2seqData(TestImportData):
     task = SEQ2SEQ
 
     def assert_examples(self, dataset):
@@ -216,7 +216,7 @@ class TestIngestSeq2seqData(TestIngestData):
             ('exampleA', ['label1']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_json(self):
@@ -226,7 +226,7 @@ class TestIngestSeq2seqData(TestIngestData):
             ('exampleA', ['label1']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
     def test_csv(self):
@@ -236,11 +236,11 @@ class TestIngestSeq2seqData(TestIngestData):
             ('exampleA', ['label1']),
             ('exampleB', [])
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
 
 
-class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
+class TextImportIntentDetectionAndSlotFillingData(TestImportData):
     task = INTENT_DETECTION_AND_SLOT_FILLING
 
     def assert_examples(self, dataset):
@@ -261,5 +261,5 @@ class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
             ('exampleC', {'cats': [], 'entities': [(0, 1, 'LOC')]}),
             ('exampleD', {'cats': [], 'entities': []}),
         ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
         self.assert_examples(dataset)
diff --git a/backend/data_import/urls.py b/backend/data_import/urls.py
index e6dc3fe5..b8dd039f 100644
--- a/backend/data_import/urls.py
+++ b/backend/data_import/urls.py
@@ -1,12 +1,12 @@
 from django.urls import include, path
 
-from .views import UploadAPI, DatasetCatalog
+from .views import DatasetImportAPI, DatasetCatalog
 
 urlpatterns = [
     path('fp/', include('django_drf_filepond.urls')),
     path(
         route='projects/<int:project_id>/upload',
-        view=UploadAPI.as_view(),
+        view=DatasetImportAPI.as_view(),
         name='upload'
     ),
     path(
diff --git a/backend/data_import/views.py b/backend/data_import/views.py
index ade5ecc2..7d04690b 100644
--- a/backend/data_import/views.py
+++ b/backend/data_import/views.py
@@ -11,7 +11,7 @@ from rest_framework.views import APIView
 
 from api.models import Project
 from members.permissions import IsProjectAdmin
-from .celery_tasks import ingest_data
+from .celery_tasks import import_dataset
 from .pipeline.catalog import Options
 
 
@@ -25,7 +25,7 @@ class DatasetCatalog(APIView):
         return Response(data=options, status=status.HTTP_200_OK)
 
 
-class UploadAPI(APIView):
+class DatasetImportAPI(APIView):
     permission_classes = [IsAuthenticated & IsProjectAdmin]
 
     def post(self, request, *args, **kwargs):
@@ -42,7 +42,7 @@ class UploadAPI(APIView):
             for tu in tus
         ]
         filenames = [su.file.path for su in sus]
-        task = ingest_data.delay(
+        task = import_dataset.delay(
             user_id=request.user.id,
             project_id=project_id,
             filenames=filenames,

From 0cfdfc2e47ff995abf26ae8eb4bdf85d8295de5b Mon Sep 17 00:00:00 2001
From: Hironsan <light.tree.1.13@gmail.com>
Date: Tue, 18 Jan 2022 10:34:05 +0900
Subject: [PATCH 6/6] Add isImporting flag

---
 frontend/pages/projects/_id/upload/index.vue | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/frontend/pages/projects/_id/upload/index.vue b/frontend/pages/projects/_id/upload/index.vue
index cfcb053e..ca9b1c7f 100644
--- a/frontend/pages/projects/_id/upload/index.vue
+++ b/frontend/pages/projects/_id/upload/index.vue
@@ -4,7 +4,7 @@
       {{ $t('dataset.importDataTitle') }}
     </v-card-title>
     <v-card-text>
-      <v-overlay :value="taskId">
+      <v-overlay :value="isImporting">
         <v-progress-circular
           indeterminate
           size="64"
@@ -143,6 +143,7 @@ export default {
       },
       uploadedFiles: [],
       valid: false,
+      isImporting: false,
     }
   },
 
@@ -233,6 +234,7 @@ export default {
       }
     },
     async importDataset() {
+      this.isImporting = true
       this.taskId = await this.$services.parse.analyze(
         this.$route.params.id,
         this.selected,
@@ -249,6 +251,7 @@ export default {
             this.errors = res.result.error
             this.myFiles = []
             this.uploadedFiles = []
+            this.isImporting = false
           }
         }
   		}, 3000)