Browse Source

Rename UploadAPI to DatasetImportAPI

pull/1640/head
Hironsan 3 years ago
parent
commit
f1fc54a316
4 changed files with 34 additions and 34 deletions
  1. 2
      backend/data_import/celery_tasks.py
  2. 56
      backend/data_import/tests/test_tasks.py
  3. 4
      backend/data_import/urls.py
  4. 6
      backend/data_import/views.py

2
backend/data_import/celery_tasks.py

@ -10,7 +10,7 @@ from .pipeline.writers import BulkWriter
@shared_task
def ingest_data(user_id, project_id, filenames, file_format: str, **kwargs):
def import_dataset(user_id, project_id, filenames, file_format: str, **kwargs):
project = get_object_or_404(Project, pk=project_id)
user = get_object_or_404(get_user_model(), pk=user_id)

56
backend/data_import/tests/test_tasks.py

@ -2,7 +2,7 @@ import pathlib
from django.test import TestCase
from data_import.celery_tasks import ingest_data
from data_import.celery_tasks import import_dataset
from api.models import (DOCUMENT_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, Category, CategoryType, Example, Span,
@ -10,7 +10,7 @@ from api.models import (DOCUMENT_CLASSIFICATION,
from api.tests.api.utils import prepare_project
class TestIngestData(TestCase):
class TestImportData(TestCase):
task = 'Any'
annotation_class = Category
@ -19,13 +19,13 @@ class TestIngestData(TestCase):
self.user = self.project.users[0]
self.data_path = pathlib.Path(__file__).parent / 'data'
def ingest_data(self, filename, file_format, kwargs=None):
def import_dataset(self, filename, file_format, kwargs=None):
filenames = [str(self.data_path / filename)]
kwargs = kwargs or {}
return ingest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
return import_dataset(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
class TestIngestClassificationData(TestIngestData):
class TestImportClassificationData(TestImportData):
task = DOCUMENT_CLASSIFICATION
def assert_examples(self, dataset):
@ -50,7 +50,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleB', ['positive', 'negative']),
('exampleC', [])
]
self.ingest_data(filename, file_format, kwargs)
self.import_dataset(filename, file_format, kwargs)
self.assert_examples(dataset)
def test_csv(self):
@ -60,7 +60,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleA', ['positive']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_csv_out_of_order_columns(self):
@ -70,7 +70,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleA', ['positive']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_fasttext(self):
@ -81,7 +81,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleB', ['positive', 'negative']),
('exampleC', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_excel(self):
@ -91,7 +91,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleA', ['positive']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_json(self):
@ -102,7 +102,7 @@ class TestIngestClassificationData(TestIngestData):
('exampleB', ['positive', 'negative']),
('exampleC', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_textfile(self):
@ -111,7 +111,7 @@ class TestIngestClassificationData(TestIngestData):
dataset = [
('exampleA\nexampleB\n\nexampleC\n', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_textline(self):
@ -122,35 +122,35 @@ class TestIngestClassificationData(TestIngestData):
('exampleB', []),
('exampleC', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_wrong_jsonl(self):
filename = 'text_classification/example.json'
file_format = 'JSONL'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assert_parse_error(response)
def test_wrong_json(self):
filename = 'text_classification/example.jsonl'
file_format = 'JSON'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assert_parse_error(response)
def test_wrong_excel(self):
filename = 'text_classification/example.jsonl'
file_format = 'Excel'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assert_parse_error(response)
def test_wrong_csv(self):
filename = 'text_classification/example.jsonl'
file_format = 'CSV'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assert_parse_error(response)
class TestIngestSequenceLabelingData(TestIngestData):
class TestImportSequenceLabelingData(TestImportData):
task = SEQUENCE_LABELING
def assert_examples(self, dataset):
@ -173,7 +173,7 @@ class TestIngestSequenceLabelingData(TestIngestData):
('exampleA', [[0, 1, 'LOC']]),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_conll(self):
@ -183,23 +183,23 @@ class TestIngestSequenceLabelingData(TestIngestData):
('JAPAN GET', [[0, 5, 'LOC']]),
('Nadim Ladki', [[0, 11, 'PER']])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_wrong_conll(self):
filename = 'sequence_labeling/example.jsonl'
file_format = 'CoNLL'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assert_parse_error(response)
def test_jsonl_with_overlapping(self):
filename = 'sequence_labeling/example_overlapping.jsonl'
file_format = 'JSONL'
response = self.ingest_data(filename, file_format)
response = self.import_dataset(filename, file_format)
self.assertEqual(len(response['error']), 1)
class TestIngestSeq2seqData(TestIngestData):
class TestImportSeq2seqData(TestImportData):
task = SEQ2SEQ
def assert_examples(self, dataset):
@ -216,7 +216,7 @@ class TestIngestSeq2seqData(TestIngestData):
('exampleA', ['label1']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_json(self):
@ -226,7 +226,7 @@ class TestIngestSeq2seqData(TestIngestData):
('exampleA', ['label1']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
def test_csv(self):
@ -236,11 +236,11 @@ class TestIngestSeq2seqData(TestIngestData):
('exampleA', ['label1']),
('exampleB', [])
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)
class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
class TextImportIntentDetectionAndSlotFillingData(TestImportData):
task = INTENT_DETECTION_AND_SLOT_FILLING
def assert_examples(self, dataset):
@ -261,5 +261,5 @@ class TextIngestIntentDetectionAndSlotFillingData(TestIngestData):
('exampleC', {'cats': [], 'entities': [(0, 1, 'LOC')]}),
('exampleD', {'cats': [], 'entities': []}),
]
self.ingest_data(filename, file_format)
self.import_dataset(filename, file_format)
self.assert_examples(dataset)

4
backend/data_import/urls.py

@ -1,12 +1,12 @@
from django.urls import include, path
from .views import UploadAPI, DatasetCatalog
from .views import DatasetImportAPI, DatasetCatalog
urlpatterns = [
path('fp/', include('django_drf_filepond.urls')),
path(
route='projects/<int:project_id>/upload',
view=UploadAPI.as_view(),
view=DatasetImportAPI.as_view(),
name='upload'
),
path(

6
backend/data_import/views.py

@ -11,7 +11,7 @@ from rest_framework.views import APIView
from api.models import Project
from members.permissions import IsProjectAdmin
from .celery_tasks import ingest_data
from .celery_tasks import import_dataset
from .pipeline.catalog import Options
@ -25,7 +25,7 @@ class DatasetCatalog(APIView):
return Response(data=options, status=status.HTTP_200_OK)
class UploadAPI(APIView):
class DatasetImportAPI(APIView):
permission_classes = [IsAuthenticated & IsProjectAdmin]
def post(self, request, *args, **kwargs):
@ -42,7 +42,7 @@ class UploadAPI(APIView):
for tu in tus
]
filenames = [su.file.path for su in sus]
task = ingest_data.delay(
task = import_dataset.delay(
user_id=request.user.id,
project_id=project_id,
filenames=filenames,

Loading…
Cancel
Save