doccano/backend/api/tests/test_tasks.py


								import pathlib


								from django.test import TestCase


								from ..models import Category, Example, Label, Span

								from ..tasks import injest_data

								from .api.utils import prepare_project


								class TestIngestData(TestCase):

								    task = 'Any'

								    annotation_class = Category


								    def setUp(self):

								        self.project = prepare_project(self.task)

								        self.user = self.project.users[0]

								        self.data_path = pathlib.Path(__file__).parent / 'data'


								    def assert_count(self,

								                     filename,

								                     file_format,

								                     kwargs=None,

								                     expected_example=0,

								                     expected_label=0,

								                     expected_annotation=0):

								        filenames = [str(self.data_path / filename)]

								        kwargs = kwargs or {}

								        injest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs)

								        self.assertEqual(Example.objects.count(), expected_example)

								        self.assertEqual(Label.objects.count(), expected_label)

								        self.assertEqual(self.annotation_class.objects.count(), expected_annotation)


								class TestIngestClassificationData(TestIngestData):

								    task = 'DocumentClassification'

								    annotation_class = Category


								    def test_jsonl(self):

								        filename = 'text_classification/example.jsonl'

								        file_format = 'JSONL'

								        kwargs = {'column_label': 'labels'}

								        self.assert_count(filename, file_format, kwargs, expected_example=4, expected_label=3, expected_annotation=5)


								    def test_csv(self):

								        filename = 'text_classification/example.csv'

								        file_format = 'CSV'

								        self.assert_count(filename, file_format, expected_example=4, expected_label=2, expected_annotation=2)


								    def test_fasttext(self):

								        filename = 'text_classification/example_fasttext.txt'

								        file_format = 'fastText'

								        self.assert_count(filename, file_format, expected_example=4, expected_label=5, expected_annotation=5)


								    def test_excel(self):

								        filename = 'text_classification/example.xlsx'

								        file_format = 'Excel'

								        self.assert_count(filename, file_format, expected_example=3, expected_label=2, expected_annotation=3)


								    def test_json(self):

								        filename = 'text_classification/example.json'

								        file_format = 'JSON'

								        self.assert_count(filename, file_format, expected_example=4, expected_label=3, expected_annotation=5)


								    def test_textfile(self):

								        filename = 'example.txt'

								        file_format = 'TextFile'

								        self.assert_count(filename, file_format, expected_example=1, expected_label=0, expected_annotation=0)


								    def test_textline(self):

								        filename = 'example.txt'

								        file_format = 'TextLine'

								        self.assert_count(filename, file_format, expected_example=3, expected_label=0, expected_annotation=0)


								class TestIngestSequenceLabelingData(TestIngestData):

								    task = 'SequenceLabeling'

								    annotation_class = Span


								    def test_jsonl(self):

								        filename = 'sequence_labeling/example.jsonl'

								        file_format = 'JSONL'

								        self.assert_count(filename, file_format, expected_example=3, expected_label=3, expected_annotation=4)


								    def test_conll(self):

								        filename = 'sequence_labeling/example.conll'

								        file_format = 'CoNLL'

								        self.assert_count(filename, file_format, expected_example=3, expected_label=2, expected_annotation=5)