Browse Source

Separate data export app

pull/1641/head
Hironsan 2 years ago
parent
commit
be5faaf304
25 changed files with 63 additions and 69 deletions
  1. 2
      Pipfile
  2. 19
      backend/api/tests/test_serializers.py
  3. 12
      backend/api/urls.py
  4. 1
      backend/app/settings.py
  5. 1
      backend/app/urls.py
  6. 0
      backend/data_export/__init__.py
  7. 0
      backend/data_export/admin.py
  8. 6
      backend/data_export/apps.py
  9. 10
      backend/data_export/celery_tasks.py
  10. 0
      backend/data_export/migrations/__init__.py
  11. 0
      backend/data_export/models.py
  12. 0
      backend/data_export/pipeline/__init__.py
  13. 6
      backend/data_export/pipeline/catalog.py
  14. 0
      backend/data_export/pipeline/data.py
  15. 0
      backend/data_export/pipeline/examples.py
  16. 26
      backend/data_export/pipeline/factories.py
  17. 2
      backend/data_export/pipeline/repositories.py
  18. 2
      backend/data_export/pipeline/services.py
  19. 0
      backend/data_export/pipeline/writers.py
  20. 0
      backend/data_export/tests/__init__.py
  21. 6
      backend/data_export/tests/test_repositories.py
  22. 4
      backend/data_export/tests/test_views.py
  23. 4
      backend/data_export/tests/test_writer.py
  24. 16
      backend/data_export/urls.py
  25. 15
      backend/data_export/views.py

2
Pipfile

@ -60,6 +60,6 @@ python_version = "3.8"
isort = "isort api -c --skip migrations"
flake8 = "flake8 --filename \"*.py\" --extend-exclude \"server,api/migrations,api/views/__init__.py,authentification,api/apps.py\""
wait_for_db = "python manage.py wait_for_db"
test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests"
test = "python manage.py test api.tests roles.tests members.tests metrics.tests users.tests data_import.tests data_export.tests"
migrate = "python manage.py migrate"
collectstatic = "python manage.py collectstatic --noinput"

19
backend/api/tests/test_serializers.py

@ -1,19 +0,0 @@
# from django.test import TestCase
#
# from ..models import SEQUENCE_LABELING, Label
# from ..serializers import LabelSerializer
# from .api.utils import prepare_project
# class TestLabelSerializer(TestCase):
#
# def test_create_label(self):
# project = prepare_project(SEQUENCE_LABELING)
# data = {
# 'text': 'example',
# 'task_type': 'Span'
# }
# serializer = LabelSerializer(data=data)
# serializer.is_valid()
# label = serializer.save(project=project.item)
# created = Label.objects.get(pk=label.id)
# self.assertEqual(label, created)

12
backend/api/urls.py

@ -1,20 +1,10 @@
from django.urls import include, path
from .views import (annotation, auto_labeling, comment, example, example_state,
export_dataset, health, label, project, tag, task)
health, label, project, tag, task)
from .views.tasks import category, relation, span, text
urlpatterns_project = [
path(
route='download-format',
view=export_dataset.DownloadDatasetCatalog.as_view(),
name='download-format'
),
path(
route='download',
view=export_dataset.DownloadAPI.as_view(),
name='download-dataset'
),
path(
route='category-types',
view=label.CategoryTypeList.as_view(),

1
backend/app/settings.py

@ -57,6 +57,7 @@ INSTALLED_APPS = [
'metrics.apps.MetricsConfig',
'users.apps.UsersConfig',
'data_import.apps.DataImportConfig',
'data_export.apps.DataExportConfig',
'rest_framework',
'rest_framework.authtoken',
'django_filters',

1
backend/app/urls.py

@ -44,6 +44,7 @@ urlpatterns += [
path('v1/', include('roles.urls')),
path('v1/', include('users.urls')),
path('v1/', include('data_import.urls')),
path('v1/', include('data_export.urls')),
path('v1/projects/<int:project_id>/', include('members.urls')),
path('v1/projects/<int:project_id>/metrics/', include('metrics.urls')),
path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),

backend/api/tests/download/__init__.py → backend/data_export/__init__.py

backend/api/views/download/__init__.py → backend/data_export/admin.py

6
backend/data_export/apps.py

@ -0,0 +1,6 @@
from django.apps import AppConfig
class DataExportConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'data_export'

backend/api/celery_tasks.py → backend/data_export/celery_tasks.py

@ -3,18 +3,18 @@ from celery.utils.log import get_task_logger
from django.conf import settings
from django.shortcuts import get_object_or_404
from .models import Project
from .views.download.factory import create_repository, create_writer
from .views.download.service import ExportApplicationService
from api.models import Project
from .pipeline.factories import create_repository, create_writer
from .pipeline.services import ExportApplicationService
logger = get_task_logger(__name__)
@shared_task
def export_dataset(project_id, format: str, export_approved=False):
def export_dataset(project_id, file_format: str, export_approved=False):
project = get_object_or_404(Project, pk=project_id)
repository = create_repository(project)
writer = create_writer(format)(settings.MEDIA_ROOT)
writer = create_writer(file_format)(settings.MEDIA_ROOT)
service = ExportApplicationService(repository, writer)
filepath = service.export(export_approved)
return filepath

0
backend/data_export/migrations/__init__.py

0
backend/data_export/models.py

0
backend/data_export/pipeline/__init__.py

backend/api/views/download/catalog.py → backend/data_export/pipeline/catalog.py

@ -4,9 +4,9 @@ from typing import Dict, List, Type
from pydantic import BaseModel
from typing_extensions import Literal
from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, SPEECH2TEXT)
from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, SPEECH2TEXT)
from . import examples

backend/api/views/download/data.py → backend/data_export/pipeline/data.py

backend/api/views/download/examples.py → backend/data_export/pipeline/examples.py

backend/api/views/download/factory.py → backend/data_export/pipeline/factories.py

@ -1,9 +1,9 @@
from typing import Type
from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, SPEECH2TEXT)
from . import catalog, repositories, writer
from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, SPEECH2TEXT)
from . import catalog, repositories, writers
def create_repository(project) -> repositories.BaseRepository:
@ -21,14 +21,14 @@ def create_repository(project) -> repositories.BaseRepository:
return repository
def create_writer(format: str) -> Type[writer.BaseWriter]:
def create_writer(file_format: str) -> Type[writers.BaseWriter]:
mapping = {
catalog.CSV.name: writer.CsvWriter,
catalog.JSON.name: writer.JSONWriter,
catalog.JSONL.name: writer.JSONLWriter,
catalog.FastText.name: writer.FastTextWriter,
catalog.IntentAndSlot.name: writer.IntentAndSlotWriter
catalog.CSV.name: writers.CsvWriter,
catalog.JSON.name: writers.JSONWriter,
catalog.JSONL.name: writers.JSONLWriter,
catalog.FastText.name: writers.FastTextWriter,
catalog.IntentAndSlot.name: writers.IntentAndSlotWriter
}
if format not in mapping:
ValueError(f'Invalid format: {format}')
return mapping[format]
if file_format not in mapping:
ValueError(f'Invalid format: {file_format}')
return mapping[file_format]

backend/api/views/download/repositories.py → backend/data_export/pipeline/repositories.py

@ -3,7 +3,7 @@ import itertools
from collections import defaultdict
from typing import Dict, Iterator, List
from ...models import Example, Project
from api.models import Example, Project
from .data import Record

backend/api/views/download/service.py → backend/data_export/pipeline/services.py

@ -1,5 +1,5 @@
from .repositories import BaseRepository
from .writer import BaseWriter
from .writers import BaseWriter
class ExportApplicationService:

backend/api/views/download/writer.py → backend/data_export/pipeline/writers.py

0
backend/data_export/tests/__init__.py

backend/api/tests/download/test_repositories.py → backend/data_export/tests/test_repositories.py

@ -2,9 +2,9 @@ import unittest
from model_mommy import mommy
from ...models import INTENT_DETECTION_AND_SLOT_FILLING
from ...views.download.repositories import IntentDetectionSlotFillingRepository
from ..api.utils import prepare_project
from api.models import INTENT_DETECTION_AND_SLOT_FILLING
from api.tests.api.utils import prepare_project
from ..pipeline.repositories import IntentDetectionSlotFillingRepository
class TestCSVWriter(unittest.TestCase):

backend/api/tests/api/test_download.py → backend/data_export/tests/test_views.py

@ -1,8 +1,8 @@
from rest_framework import status
from rest_framework.reverse import reverse
from ...models import DOCUMENT_CLASSIFICATION
from .utils import CRUDMixin, prepare_project
from api.models import DOCUMENT_CLASSIFICATION
from api.tests.api.utils import CRUDMixin, prepare_project
class TestDownloadCatalog(CRUDMixin):

backend/api/tests/download/test_writer.py → backend/data_export/tests/test_writer.py

@ -2,8 +2,8 @@ import json
import unittest
from unittest.mock import call, patch
from ...views.download.data import Record
from ...views.download.writer import CsvWriter, IntentAndSlotWriter
from ..pipeline.data import Record
from ..pipeline.writers import CsvWriter, IntentAndSlotWriter
class TestCSVWriter(unittest.TestCase):

16
backend/data_export/urls.py

@ -0,0 +1,16 @@
from django.urls import path
from .views import DatasetCatalog, DatasetExportAPI
urlpatterns = [
path(
route='projects/<int:project_id>/download-format',
view=DatasetCatalog.as_view(),
name='download-format'
),
path(
route='projects/<int:project_id>/download',
view=DatasetExportAPI.as_view(),
name='download-dataset'
),
]

backend/api/views/export_dataset.py → backend/data_export/views.py

@ -6,14 +6,13 @@ from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from api.models import Project
from members.permissions import IsProjectAdmin
from .celery_tasks import export_dataset
from .pipeline.catalog import Options
from ..celery_tasks import export_dataset
from ..models import Project
from .download.catalog import Options
class DownloadDatasetCatalog(APIView):
class DatasetCatalog(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
@ -23,7 +22,7 @@ class DownloadDatasetCatalog(APIView):
return Response(data=options, status=status.HTTP_200_OK)
class DownloadAPI(APIView):
class DatasetExportAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def get(self, request, *args, **kwargs):
@ -37,11 +36,11 @@ class DownloadAPI(APIView):
def post(self, request, *args, **kwargs):
project_id = self.kwargs['project_id']
format = request.data.pop('format')
file_format = request.data.pop('format')
export_approved = request.data.pop('exportApproved', False)
task = export_dataset.delay(
project_id=project_id,
format=format,
file_format=file_format,
export_approved=export_approved,
**request.data
)
Loading…
Cancel
Save