From fd7bc43947b6763974d17029c9a6113eabcd9f3f Mon Sep 17 00:00:00 2001 From: Hironsan Date: Tue, 18 Jan 2022 10:00:28 +0900 Subject: [PATCH] Move data import's code and data to data_import app --- Pipfile | 2 +- backend/api/celery_tasks.py | 19 --------------- backend/api/tests/api/utils.py | 2 +- backend/api/urls.py | 15 ++---------- backend/app/settings.py | 1 + backend/app/urls.py | 1 + .../tests/upload => data_import}/__init__.py | 0 .../__init__.py => data_import/admin.py} | 0 backend/data_import/apps.py | 6 +++++ backend/data_import/celery_tasks.py | 23 ++++++++++++++++++ backend/data_import/migrations/__init__.py | 0 backend/data_import/models.py | 0 backend/data_import/pipeline/__init__.py | 0 .../pipeline}/builders.py | 4 +-- .../pipeline}/catalog.py | 6 ++--- .../pipeline}/cleaners.py | 4 +-- .../upload => data_import/pipeline}/data.py | 2 +- .../pipeline}/examples.py | 0 .../pipeline/exceptions.py} | 0 .../pipeline}/factories.py | 22 ++++++++--------- .../pipeline/labels.py} | 8 +++--- .../pipeline}/parsers.py | 2 +- .../pipeline}/readers.py | 4 +-- .../pipeline}/writers.py | 4 +-- backend/data_import/tests/__init__.py | 0 .../tests/data/example.txt | 0 .../tests/data/example.utf16.csv | Bin .../tests/data/intent/example.jsonl | 0 .../tests/data/label/invalid_labels.json | 0 .../tests/data/label/valid_labels.json | 0 .../tests/data/seq2seq/example.csv | 0 .../tests/data/seq2seq/example.json | 0 .../tests/data/seq2seq/example.jsonl | 0 .../data/sequence_labeling/example.conll | 0 .../data/sequence_labeling/example.jsonl | 0 .../example_overlapping.jsonl | 0 .../sequence_labeling/labeling.invalid.conll | 0 .../sequence_labeling/labeling.trailing.conll | 0 .../data/text_classification/example.csv | 0 .../text_classification/example.invalid.2.csv | 0 .../example.invalid.2.xlsx | Bin .../data/text_classification/example.json | 0 .../data/text_classification/example.jsonl | 0 .../data/text_classification/example.xlsx | Bin .../text_classification/example_fasttext.txt | 0 .../example_one_column_no_header.xlsx | Bin .../example_out_of_order_columns.csv | 0 .../tests}/test_builder.py | 8 +++--- .../tests}/test_parser.py | 2 +- .../{api => data_import}/tests/test_tasks.py | 12 ++++----- .../tests/test_views.py} | 4 +-- backend/data_import/urls.py | 17 +++++++++++++ .../views.py} | 8 +++--- 53 files changed, 97 insertions(+), 79 deletions(-) rename backend/{api/tests/upload => data_import}/__init__.py (100%) rename backend/{api/views/upload/__init__.py => data_import/admin.py} (100%) create mode 100644 backend/data_import/apps.py create mode 100644 backend/data_import/celery_tasks.py create mode 100644 backend/data_import/migrations/__init__.py create mode 100644 backend/data_import/models.py create mode 100644 backend/data_import/pipeline/__init__.py rename backend/{api/views/upload => data_import/pipeline}/builders.py (97%) rename backend/{api/views/upload => data_import/pipeline}/catalog.py (96%) rename backend/{api/views/upload => data_import/pipeline}/cleaners.py (94%) rename backend/{api/views/upload => data_import/pipeline}/data.py (96%) rename backend/{api/views/upload => data_import/pipeline}/examples.py (100%) rename backend/{api/views/upload/exception.py => data_import/pipeline/exceptions.py} (100%) rename backend/{api/views/upload => data_import/pipeline}/factories.py (81%) rename backend/{api/views/upload/label.py => data_import/pipeline/labels.py} (94%) rename backend/{api/views/upload => data_import/pipeline}/parsers.py (99%) rename backend/{api/views/upload => data_import/pipeline}/readers.py (98%) rename backend/{api/views/upload => data_import/pipeline}/writers.py (97%) create mode 100644 backend/data_import/tests/__init__.py rename backend/{api => data_import}/tests/data/example.txt (100%) rename backend/{api => data_import}/tests/data/example.utf16.csv (100%) rename backend/{api => data_import}/tests/data/intent/example.jsonl (100%) rename backend/{api => data_import}/tests/data/label/invalid_labels.json (100%) rename backend/{api => data_import}/tests/data/label/valid_labels.json (100%) rename backend/{api => data_import}/tests/data/seq2seq/example.csv (100%) rename backend/{api => data_import}/tests/data/seq2seq/example.json (100%) rename backend/{api => data_import}/tests/data/seq2seq/example.jsonl (100%) rename backend/{api => data_import}/tests/data/sequence_labeling/example.conll (100%) rename backend/{api => data_import}/tests/data/sequence_labeling/example.jsonl (100%) rename backend/{api => data_import}/tests/data/sequence_labeling/example_overlapping.jsonl (100%) rename backend/{api => data_import}/tests/data/sequence_labeling/labeling.invalid.conll (100%) rename backend/{api => data_import}/tests/data/sequence_labeling/labeling.trailing.conll (100%) rename backend/{api => data_import}/tests/data/text_classification/example.csv (100%) rename backend/{api => data_import}/tests/data/text_classification/example.invalid.2.csv (100%) rename backend/{api => data_import}/tests/data/text_classification/example.invalid.2.xlsx (100%) rename backend/{api => data_import}/tests/data/text_classification/example.json (100%) rename backend/{api => data_import}/tests/data/text_classification/example.jsonl (100%) rename backend/{api => data_import}/tests/data/text_classification/example.xlsx (100%) rename backend/{api => data_import}/tests/data/text_classification/example_fasttext.txt (100%) rename backend/{api => data_import}/tests/data/text_classification/example_one_column_no_header.xlsx (100%) rename backend/{api => data_import}/tests/data/text_classification/example_out_of_order_columns.csv (100%) rename backend/{api/tests/upload => data_import/tests}/test_builder.py (94%) rename backend/{api/tests/upload => data_import/tests}/test_parser.py (98%) rename backend/{api => data_import}/tests/test_tasks.py (96%) rename backend/{api/tests/api/test_upload.py => data_import/tests/test_views.py} (89%) create mode 100644 backend/data_import/urls.py rename backend/{api/views/import_dataset.py => data_import/views.py} (93%) diff --git a/Pipfile b/Pipfile index e69f61d3..6bbecc10 100644 --- a/Pipfile +++ b/Pipfile @@ -60,6 +60,6 @@ python_version = "3.8" isort = "isort api -c --skip migrations" flake8 = "flake8 --filename \"*.py\" --extend-exclude \"server,api/migrations,api/views/__init__.py,authentification,api/apps.py\"" wait_for_db = "python manage.py wait_for_db" -test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests" +test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests" migrate = "python manage.py migrate" collectstatic = "python manage.py collectstatic --noinput" diff --git a/backend/api/celery_tasks.py b/backend/api/celery_tasks.py index 9be04894..ce6bb997 100644 --- a/backend/api/celery_tasks.py +++ b/backend/api/celery_tasks.py @@ -1,34 +1,15 @@ from celery import shared_task from celery.utils.log import get_task_logger from django.conf import settings -from django.contrib.auth import get_user_model from django.shortcuts import get_object_or_404 from .models import Project from .views.download.factory import create_repository, create_writer from .views.download.service import ExportApplicationService -from .views.upload.factories import (create_bulder, create_cleaner, - create_parser) -from .views.upload.readers import Reader -from .views.upload.writers import BulkWriter logger = get_task_logger(__name__) -@shared_task -def ingest_data(user_id, project_id, filenames, format: str, **kwargs): - project = get_object_or_404(Project, pk=project_id) - user = get_object_or_404(get_user_model(), pk=user_id) - - parser = create_parser(format, **kwargs) - builder = create_bulder(project, **kwargs) - reader = Reader(filenames=filenames, parser=parser, builder=builder) - cleaner = create_cleaner(project) - writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE) - writer.save(reader, project, user, cleaner) - return {'error': writer.errors} - - @shared_task def export_dataset(project_id, format: str, export_approved=False): project = get_object_or_404(Project, pk=project_id) diff --git a/backend/api/tests/api/utils.py b/backend/api/tests/api/utils.py index 4ecf5bf3..01138075 100644 --- a/backend/api/tests/api/utils.py +++ b/backend/api/tests/api/utils.py @@ -15,7 +15,7 @@ from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, SEQUENCE_LABELING, SPEECH2TEXT) -DATA_DIR = os.path.join(os.path.dirname(__file__), '../data') +DATA_DIR = os.path.join(os.path.dirname(__file__), '../../../data_import/tests/data') ProjectData = namedtuple('ProjectData', ['item', 'users']) diff --git a/backend/api/urls.py b/backend/api/urls.py index 3e5b4799..2520249f 100644 --- a/backend/api/urls.py +++ b/backend/api/urls.py @@ -1,21 +1,11 @@ from django.urls import include, path from .views import (annotation, auto_labeling, comment, example, example_state, - export_dataset, health, import_dataset, import_export, - label, project, tag, task) + export_dataset, health, import_export, label, project, tag, + task) from .views.tasks import category, relation, span, text urlpatterns_project = [ - path( - route='upload', - view=import_dataset.UploadAPI.as_view(), - name='upload' - ), - path( - route='catalog', - view=import_dataset.DatasetCatalog.as_view(), - name='catalog' - ), path( route='download-format', view=export_dataset.DownloadDatasetCatalog.as_view(), @@ -214,7 +204,6 @@ urlpatterns = [ view=health.Health.as_view(), name='health' ), - path('fp/', include('django_drf_filepond.urls')), path( route='features', view=import_export.Features.as_view(), diff --git a/backend/app/settings.py b/backend/app/settings.py index 3856c681..ba9a643e 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -56,6 +56,7 @@ INSTALLED_APPS = [ 'members.apps.MembersConfig', 'metrics.apps.MetricsConfig', 'users.apps.UsersConfig', + 'data_import.apps.DataImportConfig', 'rest_framework', 'rest_framework.authtoken', 'django_filters', diff --git a/backend/app/urls.py b/backend/app/urls.py index 6a9a2961..8195845e 100644 --- a/backend/app/urls.py +++ b/backend/app/urls.py @@ -43,6 +43,7 @@ urlpatterns += [ path('v1/', include('api.urls')), path('v1/', include('roles.urls')), path('v1/', include('users.urls')), + path('v1/', include('data_import.urls')), path('v1/projects//', include('members.urls')), path('v1/projects//metrics/', include('metrics.urls')), path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'), diff --git a/backend/api/tests/upload/__init__.py b/backend/data_import/__init__.py similarity index 100% rename from backend/api/tests/upload/__init__.py rename to backend/data_import/__init__.py diff --git a/backend/api/views/upload/__init__.py b/backend/data_import/admin.py similarity index 100% rename from backend/api/views/upload/__init__.py rename to backend/data_import/admin.py diff --git a/backend/data_import/apps.py b/backend/data_import/apps.py new file mode 100644 index 00000000..23260a7e --- /dev/null +++ b/backend/data_import/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class DataImportConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'data_import' diff --git a/backend/data_import/celery_tasks.py b/backend/data_import/celery_tasks.py new file mode 100644 index 00000000..a2d9cfc0 --- /dev/null +++ b/backend/data_import/celery_tasks.py @@ -0,0 +1,23 @@ +from celery import shared_task +from django.conf import settings +from django.contrib.auth import get_user_model +from django.shortcuts import get_object_or_404 + +from api.models import Project +from .pipeline.factories import create_parser, create_bulder, create_cleaner +from .pipeline.readers import Reader +from .pipeline.writers import BulkWriter + + +@shared_task +def ingest_data(user_id, project_id, filenames, format: str, **kwargs): + project = get_object_or_404(Project, pk=project_id) + user = get_object_or_404(get_user_model(), pk=user_id) + + parser = create_parser(format, **kwargs) + builder = create_bulder(project, **kwargs) + reader = Reader(filenames=filenames, parser=parser, builder=builder) + cleaner = create_cleaner(project) + writer = BulkWriter(batch_size=settings.IMPORT_BATCH_SIZE) + writer.save(reader, project, user, cleaner) + return {'error': writer.errors} diff --git a/backend/data_import/migrations/__init__.py b/backend/data_import/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/data_import/models.py b/backend/data_import/models.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/data_import/pipeline/__init__.py b/backend/data_import/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/api/views/upload/builders.py b/backend/data_import/pipeline/builders.py similarity index 97% rename from backend/api/views/upload/builders.py rename to backend/data_import/pipeline/builders.py index c8829b15..dad1de98 100644 --- a/backend/api/views/upload/builders.py +++ b/backend/data_import/pipeline/builders.py @@ -5,8 +5,8 @@ from typing import Any, Dict, List, Optional, Type, TypeVar from pydantic import ValidationError from .data import BaseData -from .exception import FileParseException -from .label import Label +from .exceptions import FileParseException +from .labels import Label from .readers import Builder, Record logger = getLogger(__name__) diff --git a/backend/api/views/upload/catalog.py b/backend/data_import/pipeline/catalog.py similarity index 96% rename from backend/api/views/upload/catalog.py rename to backend/data_import/pipeline/catalog.py index 99f23b22..acc69c59 100644 --- a/backend/api/views/upload/catalog.py +++ b/backend/data_import/pipeline/catalog.py @@ -4,9 +4,9 @@ from typing import Dict, List, Type from pydantic import BaseModel from typing_extensions import Literal -from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, - INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, - SEQUENCE_LABELING, SPEECH2TEXT) +from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, + INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, + SEQUENCE_LABELING, SPEECH2TEXT) from . import examples encodings = Literal[ diff --git a/backend/api/views/upload/cleaners.py b/backend/data_import/pipeline/cleaners.py similarity index 94% rename from backend/api/views/upload/cleaners.py rename to backend/data_import/pipeline/cleaners.py index 093a5e8a..73d6a6bb 100644 --- a/backend/api/views/upload/cleaners.py +++ b/backend/data_import/pipeline/cleaners.py @@ -1,7 +1,7 @@ from typing import List -from ...models import Project -from .label import CategoryLabel, Label, SpanLabel +from api.models import Project +from .labels import CategoryLabel, Label, SpanLabel class Cleaner: diff --git a/backend/api/views/upload/data.py b/backend/data_import/pipeline/data.py similarity index 96% rename from backend/api/views/upload/data.py rename to backend/data_import/pipeline/data.py index 5a069950..98a36ac7 100644 --- a/backend/api/views/upload/data.py +++ b/backend/data_import/pipeline/data.py @@ -4,7 +4,7 @@ from typing import Any, Dict from pydantic import BaseModel, validator -from ...models import Example, Project +from api.models import Example, Project class BaseData(BaseModel, abc.ABC): diff --git a/backend/api/views/upload/examples.py b/backend/data_import/pipeline/examples.py similarity index 100% rename from backend/api/views/upload/examples.py rename to backend/data_import/pipeline/examples.py diff --git a/backend/api/views/upload/exception.py b/backend/data_import/pipeline/exceptions.py similarity index 100% rename from backend/api/views/upload/exception.py rename to backend/data_import/pipeline/exceptions.py diff --git a/backend/api/views/upload/factories.py b/backend/data_import/pipeline/factories.py similarity index 81% rename from backend/api/views/upload/factories.py rename to backend/data_import/pipeline/factories.py index a022f624..ab61eae2 100644 --- a/backend/api/views/upload/factories.py +++ b/backend/data_import/pipeline/factories.py @@ -1,7 +1,7 @@ -from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, - INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, - SEQUENCE_LABELING, SPEECH2TEXT) -from . import builders, catalog, cleaners, data, label, parsers, readers +from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, + INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, + SEQUENCE_LABELING, SPEECH2TEXT) +from . import builders, catalog, cleaners, data, labels, parsers, readers def get_data_class(project_type: str): @@ -37,11 +37,11 @@ def create_parser(file_format: str, **kwargs): def get_label_class(project_type: str): mapping = { - DOCUMENT_CLASSIFICATION: label.CategoryLabel, - SEQUENCE_LABELING: label.SpanLabel, - SEQ2SEQ: label.TextLabel, - IMAGE_CLASSIFICATION: label.CategoryLabel, - SPEECH2TEXT: label.TextLabel, + DOCUMENT_CLASSIFICATION: labels.CategoryLabel, + SEQUENCE_LABELING: labels.SpanLabel, + SEQ2SEQ: labels.TextLabel, + IMAGE_CLASSIFICATION: labels.CategoryLabel, + SPEECH2TEXT: labels.TextLabel, } if project_type not in mapping: ValueError(f'Invalid project type: {project_type}') @@ -71,11 +71,11 @@ def create_bulder(project, **kwargs): label_columns = [ builders.LabelColumn( name='cats', - value_class=label.CategoryLabel + value_class=labels.CategoryLabel ), builders.LabelColumn( name='entities', - value_class=label.SpanLabel + value_class=labels.SpanLabel ) ] else: diff --git a/backend/api/views/upload/label.py b/backend/data_import/pipeline/labels.py similarity index 94% rename from backend/api/views/upload/label.py rename to backend/data_import/pipeline/labels.py index f6541920..667f537e 100644 --- a/backend/api/views/upload/label.py +++ b/backend/data_import/pipeline/labels.py @@ -3,10 +3,10 @@ from typing import Any, Dict, Optional, Union from pydantic import BaseModel, validator -from ...models import Category, CategoryType -from ...models import Label as LabelModel -from ...models import Project, Span, SpanType -from ...models import TextLabel as TL +from api.models import Category, CategoryType +from api.models import Label as LabelModel +from api.models import Project, Span, SpanType +from api.models import TextLabel as TL class Label(BaseModel, abc.ABC): diff --git a/backend/api/views/upload/parsers.py b/backend/data_import/pipeline/parsers.py similarity index 99% rename from backend/api/views/upload/parsers.py rename to backend/data_import/pipeline/parsers.py index d1cd84b8..f2d43bf4 100644 --- a/backend/api/views/upload/parsers.py +++ b/backend/data_import/pipeline/parsers.py @@ -10,7 +10,7 @@ import pyexcel.exceptions from chardet import UniversalDetector from seqeval.scheme import BILOU, IOB2, IOBES, IOE2, Tokens -from .exception import FileParseException +from .exceptions import FileParseException from .readers import DEFAULT_LABEL_COLUMN, DEFAULT_TEXT_COLUMN, Parser DEFAULT_ENCODING = 'Auto' diff --git a/backend/api/views/upload/readers.py b/backend/data_import/pipeline/readers.py similarity index 98% rename from backend/api/views/upload/readers.py rename to backend/data_import/pipeline/readers.py index bd87ee15..a1338442 100644 --- a/backend/api/views/upload/readers.py +++ b/backend/data_import/pipeline/readers.py @@ -4,8 +4,8 @@ from typing import Any, Dict, Iterator, List, Type from .cleaners import Cleaner from .data import BaseData -from .exception import FileParseException -from .label import Label +from .exceptions import FileParseException +from .labels import Label DEFAULT_TEXT_COLUMN = 'text' DEFAULT_LABEL_COLUMN = 'label' diff --git a/backend/api/views/upload/writers.py b/backend/data_import/pipeline/writers.py similarity index 97% rename from backend/api/views/upload/writers.py rename to backend/data_import/pipeline/writers.py index e3157bea..a79bd442 100644 --- a/backend/api/views/upload/writers.py +++ b/backend/data_import/pipeline/writers.py @@ -5,8 +5,8 @@ from typing import Any, Dict, List from django.conf import settings -from ...models import CategoryType, Example, Project, SpanType -from .exception import FileParseException +from api.models import CategoryType, Example, Project, SpanType +from .exceptions import FileParseException from .readers import BaseReader diff --git a/backend/data_import/tests/__init__.py b/backend/data_import/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/api/tests/data/example.txt b/backend/data_import/tests/data/example.txt similarity index 100% rename from backend/api/tests/data/example.txt rename to backend/data_import/tests/data/example.txt diff --git a/backend/api/tests/data/example.utf16.csv b/backend/data_import/tests/data/example.utf16.csv similarity index 100% rename from backend/api/tests/data/example.utf16.csv rename to backend/data_import/tests/data/example.utf16.csv diff --git a/backend/api/tests/data/intent/example.jsonl b/backend/data_import/tests/data/intent/example.jsonl similarity index 100% rename from backend/api/tests/data/intent/example.jsonl rename to backend/data_import/tests/data/intent/example.jsonl diff --git a/backend/api/tests/data/label/invalid_labels.json b/backend/data_import/tests/data/label/invalid_labels.json similarity index 100% rename from backend/api/tests/data/label/invalid_labels.json rename to backend/data_import/tests/data/label/invalid_labels.json diff --git a/backend/api/tests/data/label/valid_labels.json b/backend/data_import/tests/data/label/valid_labels.json similarity index 100% rename from backend/api/tests/data/label/valid_labels.json rename to backend/data_import/tests/data/label/valid_labels.json diff --git a/backend/api/tests/data/seq2seq/example.csv b/backend/data_import/tests/data/seq2seq/example.csv similarity index 100% rename from backend/api/tests/data/seq2seq/example.csv rename to backend/data_import/tests/data/seq2seq/example.csv diff --git a/backend/api/tests/data/seq2seq/example.json b/backend/data_import/tests/data/seq2seq/example.json similarity index 100% rename from backend/api/tests/data/seq2seq/example.json rename to backend/data_import/tests/data/seq2seq/example.json diff --git a/backend/api/tests/data/seq2seq/example.jsonl b/backend/data_import/tests/data/seq2seq/example.jsonl similarity index 100% rename from backend/api/tests/data/seq2seq/example.jsonl rename to backend/data_import/tests/data/seq2seq/example.jsonl diff --git a/backend/api/tests/data/sequence_labeling/example.conll b/backend/data_import/tests/data/sequence_labeling/example.conll similarity index 100% rename from backend/api/tests/data/sequence_labeling/example.conll rename to backend/data_import/tests/data/sequence_labeling/example.conll diff --git a/backend/api/tests/data/sequence_labeling/example.jsonl b/backend/data_import/tests/data/sequence_labeling/example.jsonl similarity index 100% rename from backend/api/tests/data/sequence_labeling/example.jsonl rename to backend/data_import/tests/data/sequence_labeling/example.jsonl diff --git a/backend/api/tests/data/sequence_labeling/example_overlapping.jsonl b/backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl similarity index 100% rename from backend/api/tests/data/sequence_labeling/example_overlapping.jsonl rename to backend/data_import/tests/data/sequence_labeling/example_overlapping.jsonl diff --git a/backend/api/tests/data/sequence_labeling/labeling.invalid.conll b/backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll similarity index 100% rename from backend/api/tests/data/sequence_labeling/labeling.invalid.conll rename to backend/data_import/tests/data/sequence_labeling/labeling.invalid.conll diff --git a/backend/api/tests/data/sequence_labeling/labeling.trailing.conll b/backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll similarity index 100% rename from backend/api/tests/data/sequence_labeling/labeling.trailing.conll rename to backend/data_import/tests/data/sequence_labeling/labeling.trailing.conll diff --git a/backend/api/tests/data/text_classification/example.csv b/backend/data_import/tests/data/text_classification/example.csv similarity index 100% rename from backend/api/tests/data/text_classification/example.csv rename to backend/data_import/tests/data/text_classification/example.csv diff --git a/backend/api/tests/data/text_classification/example.invalid.2.csv b/backend/data_import/tests/data/text_classification/example.invalid.2.csv similarity index 100% rename from backend/api/tests/data/text_classification/example.invalid.2.csv rename to backend/data_import/tests/data/text_classification/example.invalid.2.csv diff --git a/backend/api/tests/data/text_classification/example.invalid.2.xlsx b/backend/data_import/tests/data/text_classification/example.invalid.2.xlsx similarity index 100% rename from backend/api/tests/data/text_classification/example.invalid.2.xlsx rename to backend/data_import/tests/data/text_classification/example.invalid.2.xlsx diff --git a/backend/api/tests/data/text_classification/example.json b/backend/data_import/tests/data/text_classification/example.json similarity index 100% rename from backend/api/tests/data/text_classification/example.json rename to backend/data_import/tests/data/text_classification/example.json diff --git a/backend/api/tests/data/text_classification/example.jsonl b/backend/data_import/tests/data/text_classification/example.jsonl similarity index 100% rename from backend/api/tests/data/text_classification/example.jsonl rename to backend/data_import/tests/data/text_classification/example.jsonl diff --git a/backend/api/tests/data/text_classification/example.xlsx b/backend/data_import/tests/data/text_classification/example.xlsx similarity index 100% rename from backend/api/tests/data/text_classification/example.xlsx rename to backend/data_import/tests/data/text_classification/example.xlsx diff --git a/backend/api/tests/data/text_classification/example_fasttext.txt b/backend/data_import/tests/data/text_classification/example_fasttext.txt similarity index 100% rename from backend/api/tests/data/text_classification/example_fasttext.txt rename to backend/data_import/tests/data/text_classification/example_fasttext.txt diff --git a/backend/api/tests/data/text_classification/example_one_column_no_header.xlsx b/backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx similarity index 100% rename from backend/api/tests/data/text_classification/example_one_column_no_header.xlsx rename to backend/data_import/tests/data/text_classification/example_one_column_no_header.xlsx diff --git a/backend/api/tests/data/text_classification/example_out_of_order_columns.csv b/backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv similarity index 100% rename from backend/api/tests/data/text_classification/example_out_of_order_columns.csv rename to backend/data_import/tests/data/text_classification/example_out_of_order_columns.csv diff --git a/backend/api/tests/upload/test_builder.py b/backend/data_import/tests/test_builder.py similarity index 94% rename from backend/api/tests/upload/test_builder.py rename to backend/data_import/tests/test_builder.py index fe44ff74..b444cff5 100644 --- a/backend/api/tests/upload/test_builder.py +++ b/backend/data_import/tests/test_builder.py @@ -1,10 +1,10 @@ import unittest from typing import List -from ...views.upload import builders -from ...views.upload.data import TextData -from ...views.upload.exception import FileParseException -from ...views.upload.label import CategoryLabel, SpanLabel +from data_import.pipeline import builders +from data_import.pipeline.data import TextData +from data_import.pipeline.exceptions import FileParseException +from data_import.pipeline.labels import CategoryLabel, SpanLabel class TestColumnBuilder(unittest.TestCase): diff --git a/backend/api/tests/upload/test_parser.py b/backend/data_import/tests/test_parser.py similarity index 98% rename from backend/api/tests/upload/test_parser.py rename to backend/data_import/tests/test_parser.py index c73dc239..3ce1009b 100644 --- a/backend/api/tests/upload/test_parser.py +++ b/backend/data_import/tests/test_parser.py @@ -4,7 +4,7 @@ import shutil import tempfile import unittest -from ...views.upload import parsers +from data_import.pipeline import parsers class TestParser(unittest.TestCase): diff --git a/backend/api/tests/test_tasks.py b/backend/data_import/tests/test_tasks.py similarity index 96% rename from backend/api/tests/test_tasks.py rename to backend/data_import/tests/test_tasks.py index 864fcbf2..7d65883d 100644 --- a/backend/api/tests/test_tasks.py +++ b/backend/data_import/tests/test_tasks.py @@ -2,12 +2,12 @@ import pathlib from django.test import TestCase -from ..celery_tasks import ingest_data -from ..models import (DOCUMENT_CLASSIFICATION, - INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, - SEQUENCE_LABELING, Category, CategoryType, Example, Span, - SpanType) -from .api.utils import prepare_project +from data_import.celery_tasks import ingest_data +from api.models import (DOCUMENT_CLASSIFICATION, + INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, + SEQUENCE_LABELING, Category, CategoryType, Example, Span, + SpanType) +from api.tests.api.utils import prepare_project class TestIngestData(TestCase): diff --git a/backend/api/tests/api/test_upload.py b/backend/data_import/tests/test_views.py similarity index 89% rename from backend/api/tests/api/test_upload.py rename to backend/data_import/tests/test_views.py index 3873bd70..6bc1b6d2 100644 --- a/backend/api/tests/api/test_upload.py +++ b/backend/data_import/tests/test_views.py @@ -2,8 +2,8 @@ from django.test import override_settings from rest_framework import status from rest_framework.reverse import reverse -from ...models import DOCUMENT_CLASSIFICATION -from .utils import CRUDMixin, create_default_roles, make_user, prepare_project +from api.models import DOCUMENT_CLASSIFICATION +from api.tests.api.utils import CRUDMixin, create_default_roles, make_user, prepare_project class TestFeatures(CRUDMixin): diff --git a/backend/data_import/urls.py b/backend/data_import/urls.py new file mode 100644 index 00000000..e6dc3fe5 --- /dev/null +++ b/backend/data_import/urls.py @@ -0,0 +1,17 @@ +from django.urls import include, path + +from .views import UploadAPI, DatasetCatalog + +urlpatterns = [ + path('fp/', include('django_drf_filepond.urls')), + path( + route='projects//upload', + view=UploadAPI.as_view(), + name='upload' + ), + path( + route='projects//catalog', + view=DatasetCatalog.as_view(), + name='catalog' + ), +] diff --git a/backend/api/views/import_dataset.py b/backend/data_import/views.py similarity index 93% rename from backend/api/views/import_dataset.py rename to backend/data_import/views.py index 2aa441f0..04be66e1 100644 --- a/backend/api/views/import_dataset.py +++ b/backend/data_import/views.py @@ -8,11 +8,11 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView -from members.permissions import IsProjectAdmin -from ..celery_tasks import ingest_data -from ..models import Project -from .upload.catalog import Options +from api.models import Project +from members.permissions import IsProjectAdmin +from .celery_tasks import ingest_data +from .pipeline.catalog import Options class DatasetCatalog(APIView):