From be5faaf304f4a9324cc87edba2ce61ec534e2a55 Mon Sep 17 00:00:00 2001 From: Hironsan Date: Tue, 18 Jan 2022 11:45:55 +0900 Subject: [PATCH] Separate data export app --- Pipfile | 2 +- backend/api/tests/test_serializers.py | 19 -------------- backend/api/urls.py | 12 +-------- backend/app/settings.py | 1 + backend/app/urls.py | 1 + .../download => data_export}/__init__.py | 0 .../__init__.py => data_export/admin.py} | 0 backend/data_export/apps.py | 6 +++++ backend/{api => data_export}/celery_tasks.py | 10 +++---- backend/data_export/migrations/__init__.py | 0 backend/data_export/models.py | 0 backend/data_export/pipeline/__init__.py | 0 .../pipeline}/catalog.py | 6 ++--- .../download => data_export/pipeline}/data.py | 0 .../pipeline}/examples.py | 0 .../pipeline/factories.py} | 26 +++++++++---------- .../pipeline}/repositories.py | 2 +- .../pipeline/services.py} | 2 +- .../pipeline/writers.py} | 0 backend/data_export/tests/__init__.py | 0 .../tests}/test_repositories.py | 6 ++--- .../tests/test_views.py} | 4 +-- .../tests}/test_writer.py | 4 +-- backend/data_export/urls.py | 16 ++++++++++++ .../views.py} | 15 +++++------ 25 files changed, 63 insertions(+), 69 deletions(-) delete mode 100644 backend/api/tests/test_serializers.py rename backend/{api/tests/download => data_export}/__init__.py (100%) rename backend/{api/views/download/__init__.py => data_export/admin.py} (100%) create mode 100644 backend/data_export/apps.py rename backend/{api => data_export}/celery_tasks.py (60%) create mode 100644 backend/data_export/migrations/__init__.py create mode 100644 backend/data_export/models.py create mode 100644 backend/data_export/pipeline/__init__.py rename backend/{api/views/download => data_export/pipeline}/catalog.py (92%) rename backend/{api/views/download => data_export/pipeline}/data.py (100%) rename backend/{api/views/download => data_export/pipeline}/examples.py (100%) rename backend/{api/views/download/factory.py => data_export/pipeline/factories.py} (51%) rename backend/{api/views/download => data_export/pipeline}/repositories.py (99%) rename backend/{api/views/download/service.py => data_export/pipeline/services.py} (92%) rename backend/{api/views/download/writer.py => data_export/pipeline/writers.py} (100%) create mode 100644 backend/data_export/tests/__init__.py rename backend/{api/tests/download => data_export/tests}/test_repositories.py (87%) rename backend/{api/tests/api/test_download.py => data_export/tests/test_views.py} (86%) rename backend/{api/tests/download => data_export/tests}/test_writer.py (96%) create mode 100644 backend/data_export/urls.py rename backend/{api/views/export_dataset.py => data_export/views.py} (84%) diff --git a/Pipfile b/Pipfile index 6bbecc10..83370f15 100644 --- a/Pipfile +++ b/Pipfile @@ -60,6 +60,6 @@ python_version = "3.8" isort = "isort api -c --skip migrations" flake8 = "flake8 --filename \"*.py\" --extend-exclude \"server,api/migrations,api/views/__init__.py,authentification,api/apps.py\"" wait_for_db = "python manage.py wait_for_db" -test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests" +test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests data_export.tests" migrate = "python manage.py migrate" collectstatic = "python manage.py collectstatic --noinput" diff --git a/backend/api/tests/test_serializers.py b/backend/api/tests/test_serializers.py deleted file mode 100644 index d50db8d1..00000000 --- a/backend/api/tests/test_serializers.py +++ /dev/null @@ -1,19 +0,0 @@ -# from django.test import TestCase -# -# from ..models import SEQUENCE_LABELING, Label -# from ..serializers import LabelSerializer -# from .api.utils import prepare_project - -# class TestLabelSerializer(TestCase): -# -# def test_create_label(self): -# project = prepare_project(SEQUENCE_LABELING) -# data = { -# 'text': 'example', -# 'task_type': 'Span' -# } -# serializer = LabelSerializer(data=data) -# serializer.is_valid() -# label = serializer.save(project=project.item) -# created = Label.objects.get(pk=label.id) -# self.assertEqual(label, created) diff --git a/backend/api/urls.py b/backend/api/urls.py index 9bc80085..6068adea 100644 --- a/backend/api/urls.py +++ b/backend/api/urls.py @@ -1,20 +1,10 @@ from django.urls import include, path from .views import (annotation, auto_labeling, comment, example, example_state, - export_dataset, health, label, project, tag, task) + health, label, project, tag, task) from .views.tasks import category, relation, span, text urlpatterns_project = [ - path( - route='download-format', - view=export_dataset.DownloadDatasetCatalog.as_view(), - name='download-format' - ), - path( - route='download', - view=export_dataset.DownloadAPI.as_view(), - name='download-dataset' - ), path( route='category-types', view=label.CategoryTypeList.as_view(), diff --git a/backend/app/settings.py b/backend/app/settings.py index ba9a643e..666557f2 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -57,6 +57,7 @@ INSTALLED_APPS = [ 'metrics.apps.MetricsConfig', 'users.apps.UsersConfig', 'data_import.apps.DataImportConfig', + 'data_export.apps.DataExportConfig', 'rest_framework', 'rest_framework.authtoken', 'django_filters', diff --git a/backend/app/urls.py b/backend/app/urls.py index 8195845e..01ebc8b4 100644 --- a/backend/app/urls.py +++ b/backend/app/urls.py @@ -44,6 +44,7 @@ urlpatterns += [ path('v1/', include('roles.urls')), path('v1/', include('users.urls')), path('v1/', include('data_import.urls')), + path('v1/', include('data_export.urls')), path('v1/projects//', include('members.urls')), path('v1/projects//metrics/', include('metrics.urls')), path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'), diff --git a/backend/api/tests/download/__init__.py b/backend/data_export/__init__.py similarity index 100% rename from backend/api/tests/download/__init__.py rename to backend/data_export/__init__.py diff --git a/backend/api/views/download/__init__.py b/backend/data_export/admin.py similarity index 100% rename from backend/api/views/download/__init__.py rename to backend/data_export/admin.py diff --git a/backend/data_export/apps.py b/backend/data_export/apps.py new file mode 100644 index 00000000..d1d8bce5 --- /dev/null +++ b/backend/data_export/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class DataExportConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'data_export' diff --git a/backend/api/celery_tasks.py b/backend/data_export/celery_tasks.py similarity index 60% rename from backend/api/celery_tasks.py rename to backend/data_export/celery_tasks.py index ce6bb997..ab01786f 100644 --- a/backend/api/celery_tasks.py +++ b/backend/data_export/celery_tasks.py @@ -3,18 +3,18 @@ from celery.utils.log import get_task_logger from django.conf import settings from django.shortcuts import get_object_or_404 -from .models import Project -from .views.download.factory import create_repository, create_writer -from .views.download.service import ExportApplicationService +from api.models import Project +from .pipeline.factories import create_repository, create_writer +from .pipeline.services import ExportApplicationService logger = get_task_logger(__name__) @shared_task -def export_dataset(project_id, format: str, export_approved=False): +def export_dataset(project_id, file_format: str, export_approved=False): project = get_object_or_404(Project, pk=project_id) repository = create_repository(project) - writer = create_writer(format)(settings.MEDIA_ROOT) + writer = create_writer(file_format)(settings.MEDIA_ROOT) service = ExportApplicationService(repository, writer) filepath = service.export(export_approved) return filepath diff --git a/backend/data_export/migrations/__init__.py b/backend/data_export/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/data_export/models.py b/backend/data_export/models.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/data_export/pipeline/__init__.py b/backend/data_export/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/api/views/download/catalog.py b/backend/data_export/pipeline/catalog.py similarity index 92% rename from backend/api/views/download/catalog.py rename to backend/data_export/pipeline/catalog.py index cc6eae66..04e59e93 100644 --- a/backend/api/views/download/catalog.py +++ b/backend/data_export/pipeline/catalog.py @@ -4,9 +4,9 @@ from typing import Dict, List, Type from pydantic import BaseModel from typing_extensions import Literal -from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, - INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, - SEQUENCE_LABELING, SPEECH2TEXT) +from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, + INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, + SEQUENCE_LABELING, SPEECH2TEXT) from . import examples diff --git a/backend/api/views/download/data.py b/backend/data_export/pipeline/data.py similarity index 100% rename from backend/api/views/download/data.py rename to backend/data_export/pipeline/data.py diff --git a/backend/api/views/download/examples.py b/backend/data_export/pipeline/examples.py similarity index 100% rename from backend/api/views/download/examples.py rename to backend/data_export/pipeline/examples.py diff --git a/backend/api/views/download/factory.py b/backend/data_export/pipeline/factories.py similarity index 51% rename from backend/api/views/download/factory.py rename to backend/data_export/pipeline/factories.py index 24689298..1bf432e1 100644 --- a/backend/api/views/download/factory.py +++ b/backend/data_export/pipeline/factories.py @@ -1,9 +1,9 @@ from typing import Type -from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, - INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, - SEQUENCE_LABELING, SPEECH2TEXT) -from . import catalog, repositories, writer +from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, + INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ, + SEQUENCE_LABELING, SPEECH2TEXT) +from . import catalog, repositories, writers def create_repository(project) -> repositories.BaseRepository: @@ -21,14 +21,14 @@ def create_repository(project) -> repositories.BaseRepository: return repository -def create_writer(format: str) -> Type[writer.BaseWriter]: +def create_writer(file_format: str) -> Type[writers.BaseWriter]: mapping = { - catalog.CSV.name: writer.CsvWriter, - catalog.JSON.name: writer.JSONWriter, - catalog.JSONL.name: writer.JSONLWriter, - catalog.FastText.name: writer.FastTextWriter, - catalog.IntentAndSlot.name: writer.IntentAndSlotWriter + catalog.CSV.name: writers.CsvWriter, + catalog.JSON.name: writers.JSONWriter, + catalog.JSONL.name: writers.JSONLWriter, + catalog.FastText.name: writers.FastTextWriter, + catalog.IntentAndSlot.name: writers.IntentAndSlotWriter } - if format not in mapping: - ValueError(f'Invalid format: {format}') - return mapping[format] + if file_format not in mapping: + ValueError(f'Invalid format: {file_format}') + return mapping[file_format] diff --git a/backend/api/views/download/repositories.py b/backend/data_export/pipeline/repositories.py similarity index 99% rename from backend/api/views/download/repositories.py rename to backend/data_export/pipeline/repositories.py index 7222d414..acb0a069 100644 --- a/backend/api/views/download/repositories.py +++ b/backend/data_export/pipeline/repositories.py @@ -3,7 +3,7 @@ import itertools from collections import defaultdict from typing import Dict, Iterator, List -from ...models import Example, Project +from api.models import Example, Project from .data import Record diff --git a/backend/api/views/download/service.py b/backend/data_export/pipeline/services.py similarity index 92% rename from backend/api/views/download/service.py rename to backend/data_export/pipeline/services.py index 84c1189e..d1b8b62f 100644 --- a/backend/api/views/download/service.py +++ b/backend/data_export/pipeline/services.py @@ -1,5 +1,5 @@ from .repositories import BaseRepository -from .writer import BaseWriter +from .writers import BaseWriter class ExportApplicationService: diff --git a/backend/api/views/download/writer.py b/backend/data_export/pipeline/writers.py similarity index 100% rename from backend/api/views/download/writer.py rename to backend/data_export/pipeline/writers.py diff --git a/backend/data_export/tests/__init__.py b/backend/data_export/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/api/tests/download/test_repositories.py b/backend/data_export/tests/test_repositories.py similarity index 87% rename from backend/api/tests/download/test_repositories.py rename to backend/data_export/tests/test_repositories.py index dd713801..b9bd35ae 100644 --- a/backend/api/tests/download/test_repositories.py +++ b/backend/data_export/tests/test_repositories.py @@ -2,9 +2,9 @@ import unittest from model_mommy import mommy -from ...models import INTENT_DETECTION_AND_SLOT_FILLING -from ...views.download.repositories import IntentDetectionSlotFillingRepository -from ..api.utils import prepare_project +from api.models import INTENT_DETECTION_AND_SLOT_FILLING +from api.tests.api.utils import prepare_project +from ..pipeline.repositories import IntentDetectionSlotFillingRepository class TestCSVWriter(unittest.TestCase): diff --git a/backend/api/tests/api/test_download.py b/backend/data_export/tests/test_views.py similarity index 86% rename from backend/api/tests/api/test_download.py rename to backend/data_export/tests/test_views.py index dcafd66d..96d8312d 100644 --- a/backend/api/tests/api/test_download.py +++ b/backend/data_export/tests/test_views.py @@ -1,8 +1,8 @@ from rest_framework import status from rest_framework.reverse import reverse -from ...models import DOCUMENT_CLASSIFICATION -from .utils import CRUDMixin, prepare_project +from api.models import DOCUMENT_CLASSIFICATION +from api.tests.api.utils import CRUDMixin, prepare_project class TestDownloadCatalog(CRUDMixin): diff --git a/backend/api/tests/download/test_writer.py b/backend/data_export/tests/test_writer.py similarity index 96% rename from backend/api/tests/download/test_writer.py rename to backend/data_export/tests/test_writer.py index 39e2c0b9..c88a03ea 100644 --- a/backend/api/tests/download/test_writer.py +++ b/backend/data_export/tests/test_writer.py @@ -2,8 +2,8 @@ import json import unittest from unittest.mock import call, patch -from ...views.download.data import Record -from ...views.download.writer import CsvWriter, IntentAndSlotWriter +from ..pipeline.data import Record +from ..pipeline.writers import CsvWriter, IntentAndSlotWriter class TestCSVWriter(unittest.TestCase): diff --git a/backend/data_export/urls.py b/backend/data_export/urls.py new file mode 100644 index 00000000..d0f50261 --- /dev/null +++ b/backend/data_export/urls.py @@ -0,0 +1,16 @@ +from django.urls import path + +from .views import DatasetCatalog, DatasetExportAPI + +urlpatterns = [ + path( + route='projects//download-format', + view=DatasetCatalog.as_view(), + name='download-format' + ), + path( + route='projects//download', + view=DatasetExportAPI.as_view(), + name='download-dataset' + ), +] diff --git a/backend/api/views/export_dataset.py b/backend/data_export/views.py similarity index 84% rename from backend/api/views/export_dataset.py rename to backend/data_export/views.py index e3572ddc..f10d1bca 100644 --- a/backend/api/views/export_dataset.py +++ b/backend/data_export/views.py @@ -6,14 +6,13 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView +from api.models import Project from members.permissions import IsProjectAdmin +from .celery_tasks import export_dataset +from .pipeline.catalog import Options -from ..celery_tasks import export_dataset -from ..models import Project -from .download.catalog import Options - -class DownloadDatasetCatalog(APIView): +class DatasetCatalog(APIView): permission_classes = [IsAuthenticated & IsProjectAdmin] def get(self, request, *args, **kwargs): @@ -23,7 +22,7 @@ class DownloadDatasetCatalog(APIView): return Response(data=options, status=status.HTTP_200_OK) -class DownloadAPI(APIView): +class DatasetExportAPI(APIView): permission_classes = [IsAuthenticated & IsProjectAdmin] def get(self, request, *args, **kwargs): @@ -37,11 +36,11 @@ class DownloadAPI(APIView): def post(self, request, *args, **kwargs): project_id = self.kwargs['project_id'] - format = request.data.pop('format') + file_format = request.data.pop('format') export_approved = request.data.pop('exportApproved', False) task = export_dataset.delay( project_id=project_id, - format=format, + file_format=file_format, export_approved=export_approved, **request.data )