Browse Source

Add test cases for ingesting sequence labeling data

pull/1544/head
Hironsan 3 years ago
parent
commit
26669d20ae
4 changed files with 36 additions and 4 deletions
  1. 3
      backend/api/tests/data/labeling.jsonl
  2. 0
      backend/api/tests/data/sequence_labeling/example.conll
  3. 3
      backend/api/tests/data/sequence_labeling/example.jsonl
  4. 34
      backend/api/tests/test_tasks.py

3
backend/api/tests/data/labeling.jsonl

@ -1,3 +0,0 @@
{"text": "example", "labels": [[0, 1, "LOC"], [0, 2, "ORG"]], "meta": {"wikiPageID": 1}}
{"text": "example", "labels": [[0, 1, "LOC"]], "meta": {"wikiPageID": 2}}
{"text": "example", "labels": [[0, 1, "PER"]], "meta": {"wikiPageID": 3}}

backend/api/tests/data/labeling.conll → backend/api/tests/data/sequence_labeling/example.conll

3
backend/api/tests/data/sequence_labeling/example.jsonl

@ -0,0 +1,3 @@
{"text": "example", "label": [[0, 1, "LOC"], [0, 2, "ORG"]], "meta": {"wikiPageID": 1}}
{"text": "example", "label": [[0, 1, "LOC"]], "meta": {"wikiPageID": 2}}
{"text": "example", "label": [[0, 1, "PER"]], "meta": {"wikiPageID": 3}}

34
backend/api/tests/test_tasks.py

@ -2,7 +2,7 @@ import pathlib
from django.test import TestCase from django.test import TestCase
from ..models import Category, Example, Label
from ..models import Category, Example, Label, Span
from ..tasks import injest_data from ..tasks import injest_data
from .api.utils import prepare_project from .api.utils import prepare_project
@ -63,3 +63,35 @@ class TestIngestClassificationData(TestCase):
filename = 'example.txt' filename = 'example.txt'
file_format = 'TextLine' file_format = 'TextLine'
self.assert_count(filename, file_format, expected_example=3, expected_label=0, expected_annotation=0) self.assert_count(filename, file_format, expected_example=3, expected_label=0, expected_annotation=0)
class TestIngestSequenceLabelingData(TestCase):
def setUp(self):
self.project = prepare_project(task='SequenceLabeling')
self.user = self.project.users[0]
self.data_path = pathlib.Path(__file__).parent / 'data'
def assert_count(self,
filename,
file_format,
kwargs=None,
expected_example=0,
expected_label=0,
expected_annotation=0):
filenames = [str(self.data_path / filename)]
kwargs = kwargs or {}
injest_data(self.user.id, self.project.item.id, filenames, file_format, **kwargs)
self.assertEqual(Example.objects.count(), expected_example)
self.assertEqual(Label.objects.count(), expected_label)
self.assertEqual(Span.objects.count(), expected_annotation)
def test_jsonl(self):
filename = 'sequence_labeling/example.jsonl'
file_format = 'JSONL'
self.assert_count(filename, file_format, expected_example=3, expected_label=3, expected_annotation=4)
def test_conll(self):
filename = 'sequence_labeling/example.conll'
file_format = 'CoNLL'
self.assert_count(filename, file_format, expected_example=3, expected_label=2, expected_annotation=5)
Loading…
Cancel
Save