diff --git a/backend/api/tests/data/example.json b/backend/api/tests/data/example.json new file mode 100644 index 00000000..7143f59a --- /dev/null +++ b/backend/api/tests/data/example.json @@ -0,0 +1,6 @@ +[ + {"text": "example", "label": ["positive"]}, + {"text": "example", "label": ["positive", "negative"]}, + {"text": "example", "label": ["negative"]}, + {"text": "example", "label": ["neutral"]} +] diff --git a/backend/api/tests/test_tasks.py b/backend/api/tests/test_tasks.py new file mode 100644 index 00000000..622ee32a --- /dev/null +++ b/backend/api/tests/test_tasks.py @@ -0,0 +1,65 @@ +import pathlib + +from django.test import TestCase + +from ..models import Category, Example, Label +from ..tasks import injest_data +from .api.utils import prepare_project + + +class TestIngestClassificationData(TestCase): + + def setUp(self): + self.project = prepare_project(task='DocumentClassification') + self.user = self.project.users[0] + self.data_path = pathlib.Path(__file__).parent / 'data' + + def assert_count(self, + filename, + file_format, + kwargs=None, + expected_example=0, + expected_label=0, + expected_annotation=0): + filenames = [str(self.data_path / filename)] + kwargs = kwargs or {} + injest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs) + self.assertEqual(Example.objects.count(), expected_example) + self.assertEqual(Label.objects.count(), expected_label) + self.assertEqual(Category.objects.count(), expected_annotation) + + def test_jsonl(self): + filename = 'classification.jsonl' + file_format = 'JSONL' + kwargs = {'column_label': 'labels'} + self.assert_count(filename, file_format, kwargs, expected_example=4, expected_label=3, expected_annotation=5) + + def test_csv(self): + filename = 'example.csv' + file_format = 'CSV' + self.assert_count(filename, file_format, expected_example=4, expected_label=2, expected_annotation=2) + + def test_fasttext(self): + filename = 'example_fasttext.txt' + file_format = 'fastText' + self.assert_count(filename, file_format, expected_example=4, expected_label=5, expected_annotation=5) + + def test_excel(self): + filename = 'example.xlsx' + file_format = 'Excel' + self.assert_count(filename, file_format, expected_example=3, expected_label=2, expected_annotation=3) + + def test_json(self): + filename = 'example.json' + file_format = 'JSON' + self.assert_count(filename, file_format, expected_example=4, expected_label=3, expected_annotation=5) + + def test_textfile(self): + filename = 'example.txt' + file_format = 'TextFile' + self.assert_count(filename, file_format, expected_example=1, expected_label=0, expected_annotation=0) + + def test_textline(self): + filename = 'example.txt' + file_format = 'TextLine' + self.assert_count(filename, file_format, expected_example=3, expected_label=0, expected_annotation=0)