Merge pull request #1640 from doccano/enhancement/createDataImport

[Enhancement]Separate data import app
3 years ago · 6c2ffdcb56
56 changed files with 136 additions and 200 deletions
--- a/2
+++ b/2
@ -60,6 +60,6 @@ python_version = "3.8"
 isort = "isort api -c --skip migrations"
 flake8 = "flake8 --filename \"*.py\" --extend-exclude \"server,api/migrations,api/views/__init__.py,authentification,api/apps.py\""
 wait_for_db = "python manage.py wait_for_db"
-test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests"
+test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests"
 migrate = "python manage.py migrate"
 collectstatic = "python manage.py collectstatic --noinput"
--- a/backend/api/celery_tasks.py
+++ b/backend/api/celery_tasks.py
@ -1,34 +1,15 @@
 from celery import shared_task
 from celery.utils.log import get_task_logger
 from django.conf import settings
-from django.contrib.auth import get_user_model
 from django.shortcuts import get_object_or_404

 from .models import Project
 from .views.download.factory import create_repository, create_writer
 from .views.download.service import ExportApplicationService
-from .views.upload.factories import (create_bulder, create_cleaner,
-                                     create_parser)
-from .views.upload.readers import Reader
-from .views.upload.writers import BulkWriter

 logger = get_task_logger(__name__)


-@shared_task
-def ingest_data(user_id, project_id, filenames, format: str, **kwargs):
-    project = get_object_or_404(Project, pk=project_id)
-    user = get_object_or_404(get_user_model(), pk=user_id)
-
-    parser = create_parser(format, **kwargs)
-    builder = create_bulder(project, **kwargs)
-    reader = Reader(filenames=filenames, parser=parser, builder=builder)
-    cleaner = create_cleaner(project)
-    writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE)
-    writer.save(reader, project, user, cleaner)
-    return {'error': writer.errors}
-
-
@shared_task
 def export_dataset(project_id, format: str, export_approved=False):
    project = get_object_or_404(Project, pk=project_id)
--- a/backend/api/tests/api/utils.py
+++ b/backend/api/tests/api/utils.py
@ -15,7 +15,7 @@ from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
                       INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
                       SEQUENCE_LABELING, SPEECH2TEXT)

-DATA_DIR = os.path.join(os.path.dirname(__file__), '../data')
+DATA_DIR = os.path.join(os.path.dirname(__file__), '../../../data_import/tests/data')


 ProjectData = namedtuple('ProjectData', ['item', 'users'])
--- a/backend/api/urls.py
+++ b/backend/api/urls.py
@ -1,21 +1,10 @@
 from django.urls import include, path

 from .views import (annotation, auto_labeling, comment, example, example_state,
-                    export_dataset, health, import_dataset, import_export,
-                    label, project, tag, task)
+                    export_dataset, health, label, project, tag, task)
 from .views.tasks import category, relation, span, text

 urlpatterns_project = [
-    path(
-        route='upload',
-        view=import_dataset.UploadAPI.as_view(),
-        name='upload'
-    ),
-    path(
-        route='catalog',
-        view=import_dataset.DatasetCatalog.as_view(),
-        name='catalog'
-    ),
    path(
        route='download-format',
        view=export_dataset.DownloadDatasetCatalog.as_view(),
@ -214,12 +203,6 @@ urlpatterns = [
        view=health.Health.as_view(),
        name='health'
    ),
-    path('fp/', include('django_drf_filepond.urls')),
-    path(
-        route='features',
-        view=import_export.Features.as_view(),
-        name='features'
-    ),
    path(
        route='projects',
        view=project.ProjectList.as_view(),
--- a/backend/api/views/import_export.py
+++ b/backend/api/views/import_export.py
@ -1,64 +0,0 @@
-from django.conf import settings
-from rest_framework.permissions import IsAuthenticated
-from rest_framework.response import Response
-from rest_framework.views import APIView
-
-
-class Features(APIView):
-    permission_classes = (IsAuthenticated,)
-
-    def get(self, request, *args, **kwargs):
-        return Response({
-            'cloud_upload': bool(settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER),
-        })
-
-
-# class CloudUploadAPI(APIView):
-#     permission_classes = TextUploadAPI.permission_classes
-#
-#     def get(self, request, *args, **kwargs):
-#         try:
-#             project_id = request.query_params['project_id']
-#             file_format = request.query_params['upload_format']
-#             cloud_container = request.query_params['container']
-#             cloud_object = request.query_params['object']
-#         except KeyError as ex:
-#             raise ValidationError('query parameter {} is missing'.format(ex))
-#
-#         try:
-#             cloud_file = self.get_cloud_object_as_io(cloud_container, cloud_object)
-#         except ContainerDoesNotExistError:
-#             raise ValidationError('cloud container {} does not exist'.format(cloud_container))
-#         except ObjectDoesNotExistError:
-#             raise ValidationError('cloud object {} does not exist'.format(cloud_object))
-#
-#         TextUploadAPI.save_file(
-#             user=request.user,
-#             file=cloud_file,
-#             file_format=file_format,
-#             project_id=project_id,
-#         )
-#
-#         next_url = request.query_params.get('next')
-#
-#         if next_url == 'about:blank':
-#             return Response(data='', content_type='text/plain', status=status.HTTP_201_CREATED)
-#
-#         if next_url:
-#             return redirect(next_url)
-#
-#         return Response(status=status.HTTP_201_CREATED)
-#
-#     @classmethod
-#     def get_cloud_object_as_io(cls, container_name, object_name):
-#         provider = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER.lower()
-#         account = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_ACCOUNT
-#         key = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_SECRET_KEY
-#
-#         driver = get_driver(DriverType.STORAGE, provider)
-#         client = driver(account, key)
-#
-#         cloud_container = client.get_container(container_name)
-#         cloud_object = cloud_container.get_object(object_name)
-#
-#         return iterable_to_io(cloud_object.as_stream())
--- a/backend/app/settings.py
+++ b/backend/app/settings.py
@ -56,6 +56,7 @@ INSTALLED_APPS = [
    'members.apps.MembersConfig',
    'metrics.apps.MetricsConfig',
    'users.apps.UsersConfig',
+    'data_import.apps.DataImportConfig',
    'rest_framework',
    'rest_framework.authtoken',
    'django_filters',
--- a/backend/app/urls.py
+++ b/backend/app/urls.py
@ -43,6 +43,7 @@ urlpatterns += [
    path('v1/', include('api.urls')),
    path('v1/', include('roles.urls')),
    path('v1/', include('users.urls')),
+    path('v1/', include('data_import.urls')),
    path('v1/projects/<int:project_id>/', include('members.urls')),
    path('v1/projects/<int:project_id>/metrics/', include('metrics.urls')),
    path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
--- a/backend/api/tests/upload/init.py
+++ b/backend/api/tests/upload/init.py
--- a/backend/api/views/upload/init.py
+++ b/backend/api/views/upload/init.py
--- a/backend/data_import/apps.py
+++ b/backend/data_import/apps.py
@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class DataImportConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'data_import'
--- a/backend/data_import/celery_tasks.py
+++ b/backend/data_import/celery_tasks.py
@ -0,0 +1,23 @@
+from celery import shared_task
+from django.conf import settings
+from django.contrib.auth import get_user_model
+from django.shortcuts import get_object_or_404
+
+from api.models import Project
+from .pipeline.factories import create_parser, create_bulder, create_cleaner
+from .pipeline.readers import Reader
+from .pipeline.writers import BulkWriter
+
+
+@shared_task
+def import_dataset(user_id, project_id, filenames, file_format: str, **kwargs):
+    project = get_object_or_404(Project, pk=project_id)
+    user = get_object_or_404(get_user_model(), pk=user_id)
+
+    parser = create_parser(file_format, **kwargs)
+    builder = create_bulder(project, **kwargs)
+    reader = Reader(filenames=filenames, parser=parser, builder=builder)
+    cleaner = create_cleaner(project)
+    writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE)
+    writer.save(reader, project, user, cleaner)
+    return {'error': writer.errors}
--- a/backend/data_import/migrations/init.py
+++ b/backend/data_import/migrations/init.py
--- a/backend/data_import/models.py
+++ b/backend/data_import/models.py
--- a/backend/data_import/pipeline/init.py
+++ b/backend/data_import/pipeline/init.py
--- a/backend/data_import/pipeline/builders.py
+++ b/backend/data_import/pipeline/builders.py
@ -5,8 +5,8 @@ from typing import Any, Dict, List, Optional, Type, TypeVar
 from pydantic import ValidationError

 from .data import BaseData
-from .exception import FileParseException
-from .label import Label
+from .exceptions import FileParseException
+from .labels import Label
 from .readers import Builder, Record

 logger = getLogger(__name__)
--- a/backend/data_import/pipeline/catalog.py
+++ b/backend/data_import/pipeline/catalog.py
@ -4,9 +4,9 @@ from typing import Dict, List, Type
 from pydantic import BaseModel
 from typing_extensions import Literal

-from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
-                       INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                       SEQUENCE_LABELING, SPEECH2TEXT)
+from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, SPEECH2TEXT)
 from . import examples

 encodings = Literal[
--- a/backend/data_import/pipeline/cleaners.py
+++ b/backend/data_import/pipeline/cleaners.py
@ -1,7 +1,7 @@
 from typing import List

-from ...models import Project
-from .label import CategoryLabel, Label, SpanLabel
+from api.models import Project
+from .labels import CategoryLabel, Label, SpanLabel


 class Cleaner:
--- a/backend/data_import/pipeline/data.py
+++ b/backend/data_import/pipeline/data.py
@ -4,7 +4,7 @@ from typing import Any, Dict

 from pydantic import BaseModel, validator

-from ...models import Example, Project
+from api.models import Example, Project


 class BaseData(BaseModel, abc.ABC):
--- a/backend/data_import/pipeline/examples.py
+++ b/backend/data_import/pipeline/examples.py
--- a/backend/data_import/pipeline/exceptions.py
+++ b/backend/data_import/pipeline/exceptions.py
--- a/backend/data_import/pipeline/factories.py
+++ b/backend/data_import/pipeline/factories.py
@ -1,7 +1,7 @@
-from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
-                       INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                       SEQUENCE_LABELING, SPEECH2TEXT)
-from . import builders, catalog, cleaners, data, label, parsers, readers
+from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, SPEECH2TEXT)
+from . import builders, catalog, cleaners, data, labels, parsers, readers


 def get_data_class(project_type: str):
@ -37,11 +37,11 @@ def create_parser(file_format: str, **kwargs):

 def get_label_class(project_type: str):
    mapping = {
-        DOCUMENT_CLASSIFICATION: label.CategoryLabel,
-        SEQUENCE_LABELING: label.SpanLabel,
-        SEQ2SEQ: label.TextLabel,
-        IMAGE_CLASSIFICATION: label.CategoryLabel,
-        SPEECH2TEXT: label.TextLabel,
+        DOCUMENT_CLASSIFICATION: labels.CategoryLabel,
+        SEQUENCE_LABELING: labels.SpanLabel,
+        SEQ2SEQ: labels.TextLabel,
+        IMAGE_CLASSIFICATION: labels.CategoryLabel,
+        SPEECH2TEXT: labels.TextLabel,
    }
    if project_type not in mapping:
        ValueError(f'Invalid project type: {project_type}')
@ -71,11 +71,11 @@ def create_bulder(project, **kwargs):
        label_columns = [
            builders.LabelColumn(
                name='cats',
-                value_class=label.CategoryLabel
+                value_class=labels.CategoryLabel
            ),
            builders.LabelColumn(
                name='entities',
-                value_class=label.SpanLabel
+                value_class=labels.SpanLabel
            )
        ]
    else:
--- a/backend/data_import/pipeline/labels.py
+++ b/backend/data_import/pipeline/labels.py
@ -3,10 +3,10 @@ from typing import Any, Dict, Optional, Union

 from pydantic import BaseModel, validator

-from ...models import Category, CategoryType
-from ...models import Label as LabelModel
-from ...models import Project, Span, SpanType
-from ...models import TextLabel as TL
+from api.models import Category, CategoryType
+from api.models import Label as LabelModel
+from api.models import Project, Span, SpanType
+from api.models import TextLabel as TL


 class Label(BaseModel, abc.ABC):
--- a/backend/data_import/pipeline/parsers.py
+++ b/backend/data_import/pipeline/parsers.py
@ -10,7 +10,7 @@ import pyexcel.exceptions
 from chardet import UniversalDetector
 from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens

-from .exception import FileParseException
+from .exceptions import FileParseException
 from .readers import DEFAULT_LABEL_COLUMN, DEFAULT_TEXT_COLUMN, Parser

 DEFAULT_ENCODING = 'Auto'
--- a/backend/data_import/pipeline/readers.py
+++ b/backend/data_import/pipeline/readers.py
@ -4,8 +4,8 @@ from typing import Any, Dict, Iterator, List, Type

 from .cleaners import Cleaner
 from .data import BaseData
-from .exception import FileParseException
-from .label import Label
+from .exceptions import FileParseException
+from .labels import Label

 DEFAULT_TEXT_COLUMN = 'text'
 DEFAULT_LABEL_COLUMN = 'label'
--- a/backend/data_import/pipeline/writers.py
+++ b/backend/data_import/pipeline/writers.py
@ -5,8 +5,8 @@ from typing import Any, Dict, List

 from django.conf import settings

-from ...models import CategoryType, Example, Project, SpanType
-from .exception import FileParseException
+from api.models import CategoryType, Example, Project, SpanType
+from .exceptions import FileParseException
 from .readers import BaseReader


--- a/backend/data_import/tests/init.py
+++ b/backend/data_import/tests/init.py
--- a/backend/data_import/tests/data/example.txt
+++ b/backend/data_import/tests/data/example.txt
--- a/backend/data_import/tests/data/example.utf16.csv
+++ b/backend/data_import/tests/data/example.utf16.csv
--- a/backend/data_import/tests/data/intent/example.jsonl
+++ b/backend/data_import/tests/data/intent/example.jsonl
--- a/backend/data_import/tests/data/label/invalid_labels.json
+++ b/backend/data_import/tests/data/label/invalid_labels.json
--- a/backend/data_import/tests/data/label/valid_labels.json
+++ b/backend/data_import/tests/data/label/valid_labels.json
--- a/backend/data_import/tests/data/seq2seq/example.csv
+++ b/backend/data_import/tests/data/seq2seq/example.csv
--- a/backend/data_import/tests/data/seq2seq/example.json
+++ b/backend/data_import/tests/data/seq2seq/example.json
--- a/backend/data_import/tests/data/seq2seq/example.jsonl
+++ b/backend/data_import/tests/data/seq2seq/example.jsonl
--- a/backend/data_import/tests/data/sequence_labeling/example.conll
+++ b/backend/data_import/tests/data/sequence_labeling/example.conll
--- a/backend/data_import/tests/data/sequence_labeling/example.jsonl
+++ b/backend/data_import/tests/data/sequence_labeling/example.jsonl
--- a/backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl
+++ b/backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl
--- a/backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll
+++ b/backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll
--- a/backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll
+++ b/backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll
--- a/backend/data_import/tests/data/text_classification/example.csv
+++ b/backend/data_import/tests/data/text_classification/example.csv
--- a/backend/data_import/tests/data/text_classification/example.invalid.2.csv
+++ b/backend/data_import/tests/data/text_classification/example.invalid.2.csv
--- a/backend/data_import/tests/data/text_classification/example.invalid.2.xlsx
+++ b/backend/data_import/tests/data/text_classification/example.invalid.2.xlsx
--- a/backend/data_import/tests/data/text_classification/example.json
+++ b/backend/data_import/tests/data/text_classification/example.json
--- a/backend/data_import/tests/data/text_classification/example.jsonl
+++ b/backend/data_import/tests/data/text_classification/example.jsonl
--- a/backend/data_import/tests/data/text_classification/example.xlsx
+++ b/backend/data_import/tests/data/text_classification/example.xlsx
--- a/backend/data_import/tests/data/text_classification/example_fasttext.txt
+++ b/backend/data_import/tests/data/text_classification/example_fasttext.txt
--- a/backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx
+++ b/backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx
--- a/backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv
+++ b/backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv
--- a/backend/data_import/tests/test_builder.py
+++ b/backend/data_import/tests/test_builder.py
@ -1,10 +1,10 @@
 import unittest
 from typing import List

-from ...views.upload import builders
-from ...views.upload.data import TextData
-from ...views.upload.exception import FileParseException
-from ...views.upload.label import CategoryLabel, SpanLabel
+from data_import.pipeline import builders
+from data_import.pipeline.data import TextData
+from data_import.pipeline.exceptions import FileParseException
+from data_import.pipeline.labels import CategoryLabel, SpanLabel


 class TestColumnBuilder(unittest.TestCase):
--- a/backend/data_import/tests/test_parser.py
+++ b/backend/data_import/tests/test_parser.py
@ -4,7 +4,7 @@ import shutil
 import tempfile
 import unittest

-from ...views.upload import parsers
+from data_import.pipeline import parsers


 class TestParser(unittest.TestCase):
--- a/backend/data_import/tests/test_tasks.py
+++ b/backend/data_import/tests/test_tasks.py
@ -2,15 +2,15 @@ import pathlib

 from django.test import TestCase

-from ..celery_tasks import ingest_data
-from ..models import (DOCUMENT_CLASSIFICATION,
-                      INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
-                      SEQUENCE_LABELING, Category, CategoryType, Example, Span,
-                      SpanType)
-from .api.utils import prepare_project
+from data_import.celery_tasks import import_dataset
+from api.models import (DOCUMENT_CLASSIFICATION,
+                        INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
+                        SEQUENCE_LABELING, Category, CategoryType, Example, Span,
+                        SpanType)
+from api.tests.api.utils import prepare_project


-class TestIngestData(TestCase):
+class TestImportData(TestCase):
    task = 'Any'
    annotation_class = Category

@ -19,13 +19,13 @@ class TestIngestData(TestCase):
        self.user = self.project.users[0]
        self.data_path = pathlib.Path(__file__).parent / 'data'

-    def ingest_data(self, filename, file_format, kwargs=None):
+    def import_dataset(self, filename, file_format, kwargs=None):
        filenames = [str(self.data_path / filename)]
        kwargs = kwargs or {}
-        return ingest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
+        return import_dataset(self.user.id, self.project.item.id, filenames, file_format, **kwargs)


-class TestIngestClassificationData(TestIngestData):
+class TestImportClassificationData(TestImportData):
    task = DOCUMENT_CLASSIFICATION

    def assert_examples(self, dataset):
@ -50,7 +50,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleB', ['positive', 'negative']),
            ('exampleC', [])
        ]
-        self.ingest_data(filename, file_format, kwargs)
+        self.import_dataset(filename, file_format, kwargs)
        self.assert_examples(dataset)

    def test_csv(self):
@ -60,7 +60,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleA', ['positive']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_csv_out_of_order_columns(self):
@ -70,7 +70,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleA', ['positive']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_fasttext(self):
@ -81,7 +81,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleB', ['positive', 'negative']),
            ('exampleC', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_excel(self):
@ -91,7 +91,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleA', ['positive']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_json(self):
@ -102,7 +102,7 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleB', ['positive', 'negative']),
            ('exampleC', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_textfile(self):
@ -111,7 +111,7 @@ class TestIngestClassificationData(TestIngestData):
        dataset = [
            ('exampleA\nexampleB\n\nexampleC\n', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_textline(self):
@ -122,35 +122,35 @@ class TestIngestClassificationData(TestIngestData):
            ('exampleB', []),
            ('exampleC', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_wrong_jsonl(self):
        filename = 'text_classification/example.json'
        file_format = 'JSONL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assert_parse_error(response)

    def test_wrong_json(self):
        filename = 'text_classification/example.jsonl'
        file_format = 'JSON'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assert_parse_error(response)

    def test_wrong_excel(self):
        filename = 'text_classification/example.jsonl'
        file_format = 'Excel'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assert_parse_error(response)

    def test_wrong_csv(self):
        filename = 'text_classification/example.jsonl'
        file_format = 'CSV'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assert_parse_error(response)


-class TestIngestSequenceLabelingData(TestIngestData):
+class TestImportSequenceLabelingData(TestImportData):
    task = SEQUENCE_LABELING

    def assert_examples(self, dataset):
@ -173,7 +173,7 @@ class TestIngestSequenceLabelingData(TestIngestData):
            ('exampleA', [[0, 1, 'LOC']]),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_conll(self):
@ -183,23 +183,23 @@ class TestIngestSequenceLabelingData(TestIngestData):
            ('JAPAN GET', [[0, 5, 'LOC']]),
            ('Nadim Ladki', [[0, 11, 'PER']])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_wrong_conll(self):
        filename = 'sequence_labeling/example.jsonl'
        file_format = 'CoNLL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assert_parse_error(response)

    def test_jsonl_with_overlapping(self):
        filename = 'sequence_labeling/example_overlapping.jsonl'
        file_format = 'JSONL'
-        response = self.ingest_data(filename, file_format)
+        response = self.import_dataset(filename, file_format)
        self.assertEqual(len(response['error']), 1)


-class TestIngestSeq2seqData(TestIngestData):
+class TestImportSeq2seqData(TestImportData):
    task = SEQ2SEQ

    def assert_examples(self, dataset):
@ -216,7 +216,7 @@ class TestIngestSeq2seqData(TestIngestData):
            ('exampleA', ['label1']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_json(self):
@ -226,7 +226,7 @@ class TestIngestSeq2seqData(TestIngestData):
            ('exampleA', ['label1']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)

    def test_csv(self):
@ -236,11 +236,11 @@ class TestIngestSeq2seqData(TestIngestData):
            ('exampleA', ['label1']),
            ('exampleB', [])
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)


-class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
+class TextImportIntentDetectionAndSlotFillingData(TestImportData):
    task = INTENT_DETECTION_AND_SLOT_FILLING

    def assert_examples(self, dataset):
@ -261,5 +261,5 @@ class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
            ('exampleC', {'cats': [], 'entities': [(0, 1, 'LOC')]}),
            ('exampleD', {'cats': [], 'entities': []}),
        ]
-        self.ingest_data(filename, file_format)
+        self.import_dataset(filename, file_format)
        self.assert_examples(dataset)
--- a/backend/data_import/tests/test_views.py
+++ b/backend/data_import/tests/test_views.py
@ -1,23 +1,8 @@
-from django.test import override_settings
 from rest_framework import status
 from rest_framework.reverse import reverse

-from ...models import DOCUMENT_CLASSIFICATION
-from .utils import CRUDMixin, create_default_roles, make_user, prepare_project
-
-
-class TestFeatures(CRUDMixin):
-
-    @classmethod
-    def setUpTestData(cls):
-        create_default_roles()
-        cls.user = make_user()
-        cls.url = reverse('features')
-
-    @override_settings(CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER=None)
-    def test_no_cloud_upload(self):
-        response = self.assert_fetch(self.user, status.HTTP_200_OK)
-        self.assertFalse(response.json().get('cloud_upload'))
+from api.models import DOCUMENT_CLASSIFICATION
+from api.tests.api.utils import CRUDMixin, prepare_project


 class TestImportCatalog(CRUDMixin):
--- a/backend/data_import/urls.py
+++ b/backend/data_import/urls.py
@ -0,0 +1,17 @@
+from django.urls import include, path
+
+from .views import DatasetImportAPI, DatasetCatalog
+
+urlpatterns = [
+    path('fp/', include('django_drf_filepond.urls')),
+    path(
+        route='projects/<int:project_id>/upload',
+        view=DatasetImportAPI.as_view(),
+        name='upload'
+    ),
+    path(
+        route='projects/<int:project_id>/catalog',
+        view=DatasetCatalog.as_view(),
+        name='catalog'
+    ),
+]
--- a/backend/api/views/import_dataset.py
+++ b/backend/api/views/import_dataset.py
@ -8,11 +8,11 @@ from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from rest_framework.views import APIView

-from members.permissions import IsProjectAdmin

-from ..celery_tasks import ingest_data
-from ..models import Project
-from .upload.catalog import Options
+from api.models import Project
+from members.permissions import IsProjectAdmin
+from .celery_tasks import import_dataset
+from .pipeline.catalog import Options


 class DatasetCatalog(APIView):
@ -25,13 +25,13 @@ class DatasetCatalog(APIView):
        return Response(data=options, status=status.HTTP_200_OK)


-class UploadAPI(APIView):
+class DatasetImportAPI(APIView):
    permission_classes = [IsAuthenticated & IsProjectAdmin]

    def post(self, request, *args, **kwargs):
        project_id = self.kwargs['project_id']
        upload_ids = request.data.pop('uploadIds')
-        format = request.data.pop('format')
+        file_format = request.data.pop('format')

        tus = [TemporaryUpload.objects.get(upload_id=upload_id) for upload_id in upload_ids]
        sus = [
@ -42,11 +42,11 @@ class UploadAPI(APIView):
            for tu in tus
        ]
        filenames = [su.file.path for su in sus]
-        task = ingest_data.delay(
+        task = import_dataset.delay(
            user_id=request.user.id,
            project_id=project_id,
            filenames=filenames,
-            format=format,
+            file_format=file_format,
            **request.data
        )
        return Response({'task_id': task.task_id})
--- a/frontend/i18n/en/projects/dataset.js
+++ b/frontend/i18n/en/projects/dataset.js
@ -7,11 +7,11 @@ export default {
  metadata: 'Metadata',
  action: 'Action',
  annotate: 'Annotate',
-  importDataTitle: 'Upload Data',
+  importDataTitle: 'Import Dataset',
  importDataMessage1: 'Select a file format',
  importDataMessage2: 'Select file(s)',
  importDataPlaceholder: 'File input',
-  exportDataTitle: 'Export Data',
+  exportDataTitle: 'Export Dataset',
  exportDataMessage: 'Select a file format',
  exportDataMessage2: 'Select a file name',
  deleteDocumentsTitle: 'Delete Document',
--- a/frontend/pages/projects/_id/upload/index.vue
+++ b/frontend/pages/projects/_id/upload/index.vue
@ -4,7 +4,7 @@
      {{ $t('dataset.importDataTitle') }}
    </v-card-title>
    <v-card-text>
-      <v-overlay :value="taskId">
+      <v-overlay :value="isImporting">
        <v-progress-circular
          indeterminate
          size="64"
@ -85,9 +85,9 @@
      <v-btn
        class='text-capitalize me-2 primary'
        :disabled="isDisabled"
-        @click="injest"
+        @click="importDataset"
      >
-        Ingest
+        Import
      </v-btn>
    </v-card-actions>
  </v-card>
@ -143,6 +143,7 @@ export default {
      },
      uploadedFiles: [],
      valid: false,
+      isImporting: false,
    }
  },

@ -232,7 +233,8 @@ export default {
          this.$nextTick()
      }
    },
-    async injest() {
+    async importDataset() {
+      this.isImporting = true
      this.taskId = await this.$services.parse.analyze(
        this.$route.params.id,
        this.selected,
@ -249,6 +251,7 @@ export default {
            this.errors = res.result.error
            this.myFiles = []
            this.uploadedFiles = []
+            this.isImporting = false
          }
        }
  		}, 3000)