mirror of https://github.com/doccano/doccano.git
pythondatasetnatural-language-processingdata-labelingmachine-learningannotation-tooldatasetsactive-learningtext-annotation
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
532 lines
22 KiB
532 lines
22 KiB
import unittest
|
|
|
|
from model_mommy import mommy
|
|
|
|
from ..pipeline.repositories import (
|
|
FileRepository,
|
|
IntentDetectionSlotFillingRepository,
|
|
RelationExtractionRepository,
|
|
Seq2seqRepository,
|
|
SequenceLabelingRepository,
|
|
Speech2TextRepository,
|
|
TextClassificationRepository,
|
|
)
|
|
from projects.models import (
|
|
DOCUMENT_CLASSIFICATION,
|
|
IMAGE_CLASSIFICATION,
|
|
INTENT_DETECTION_AND_SLOT_FILLING,
|
|
SEQ2SEQ,
|
|
SEQUENCE_LABELING,
|
|
SPEECH2TEXT,
|
|
)
|
|
from projects.tests.utils import prepare_project
|
|
|
|
|
|
class TestRepository(unittest.TestCase):
|
|
def assert_records(self, repository, expected, confirmed_only=False):
|
|
records = list(repository.list(export_approved=confirmed_only))
|
|
self.assertEqual(len(records), len(expected))
|
|
for record, expect in zip(records, expected):
|
|
self.assertEqual(record.data, expect["data"])
|
|
self.assertEqual(record.label, expect["label"])
|
|
self.assertEqual(record.user, expect["user"])
|
|
|
|
|
|
class TestTextClassificationRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item, text="confirmed")
|
|
self.category1 = mommy.make("Category", example=self.confirmed_example, user=project.admin)
|
|
self.category2 = mommy.make("Category", example=self.confirmed_example, user=project.annotator)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(DOCUMENT_CLASSIFICATION)
|
|
repository = TextClassificationRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.category1.label.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.category2.label.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": [], "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(DOCUMENT_CLASSIFICATION, collaborative_annotation=True)
|
|
repository = TextClassificationRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.category1.label.text, self.category2.label.text],
|
|
"user": "all",
|
|
},
|
|
{
|
|
"data": self.unconfirmed_example.text,
|
|
"label": [],
|
|
"user": "all",
|
|
},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(DOCUMENT_CLASSIFICATION)
|
|
repository = TextClassificationRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.category1.label.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.category2.label.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestSeq2seqRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item, text="confirmed")
|
|
self.text1 = mommy.make("TextLabel", example=self.confirmed_example, user=project.admin)
|
|
self.text2 = mommy.make("TextLabel", example=self.confirmed_example, user=project.annotator)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(SEQ2SEQ)
|
|
repository = Seq2seqRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.text1.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.text2.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": [], "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(SEQ2SEQ, collaborative_annotation=True)
|
|
repository = Seq2seqRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.text1.text, self.text2.text],
|
|
"user": "all",
|
|
},
|
|
{
|
|
"data": self.unconfirmed_example.text,
|
|
"label": [],
|
|
"user": "all",
|
|
},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(SEQ2SEQ)
|
|
repository = Seq2seqRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.text1.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [self.text2.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestIntentDetectionSlotFillingRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item, text="confirmed")
|
|
self.category1 = mommy.make("Category", example=self.confirmed_example, user=project.admin)
|
|
self.category2 = mommy.make("Category", example=self.confirmed_example, user=project.annotator)
|
|
self.span = mommy.make("Span", example=self.confirmed_example, user=project.admin, start_offset=0, end_offset=1)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(INTENT_DETECTION_AND_SLOT_FILLING)
|
|
repository = IntentDetectionSlotFillingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": {
|
|
"cats": [self.category1.label.text],
|
|
"entities": [(self.span.start_offset, self.span.end_offset, self.span.label.text)],
|
|
},
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": {
|
|
"cats": [self.category2.label.text],
|
|
"entities": [],
|
|
},
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": {"cats": [], "entities": []}, "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(INTENT_DETECTION_AND_SLOT_FILLING, collaborative_annotation=True)
|
|
repository = IntentDetectionSlotFillingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": {
|
|
"cats": [self.category1.label.text, self.category2.label.text],
|
|
"entities": [(self.span.start_offset, self.span.end_offset, self.span.label.text)],
|
|
},
|
|
"user": "all",
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": {"cats": [], "entities": []}, "user": "all"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(INTENT_DETECTION_AND_SLOT_FILLING)
|
|
repository = IntentDetectionSlotFillingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": {
|
|
"cats": [self.category1.label.text],
|
|
"entities": [(self.span.start_offset, self.span.end_offset, self.span.label.text)],
|
|
},
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": {
|
|
"cats": [self.category2.label.text],
|
|
"entities": [],
|
|
},
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestSequenceLabelingRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item, text="confirmed")
|
|
self.span1 = mommy.make(
|
|
"Span", example=self.confirmed_example, user=project.admin, start_offset=0, end_offset=1
|
|
)
|
|
self.span2 = mommy.make(
|
|
"Span", example=self.confirmed_example, user=project.annotator, start_offset=1, end_offset=2
|
|
)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(SEQUENCE_LABELING)
|
|
repository = SequenceLabelingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [(self.span1.start_offset, self.span1.end_offset, self.span1.label.text)],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [(self.span2.start_offset, self.span2.end_offset, self.span2.label.text)],
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": [], "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(SEQUENCE_LABELING, collaborative_annotation=True)
|
|
repository = SequenceLabelingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [
|
|
(self.span1.start_offset, self.span1.end_offset, self.span1.label.text),
|
|
(self.span2.start_offset, self.span2.end_offset, self.span2.label.text),
|
|
],
|
|
"user": "all",
|
|
},
|
|
{"data": self.unconfirmed_example.text, "label": [], "user": "all"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(SEQUENCE_LABELING)
|
|
repository = SequenceLabelingRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [(self.span1.start_offset, self.span1.end_offset, self.span1.label.text)],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.text,
|
|
"label": [(self.span2.start_offset, self.span2.end_offset, self.span2.label.text)],
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestRelationExtractionRepository(TestRepository):
|
|
def test_list(self):
|
|
project = prepare_project(SEQUENCE_LABELING, use_relation=True)
|
|
confirmed_example = mommy.make("Example", project=project.item, text="example")
|
|
span1 = mommy.make("Span", example=confirmed_example, user=project.admin, start_offset=0, end_offset=1)
|
|
span2 = mommy.make("Span", example=confirmed_example, user=project.admin, start_offset=1, end_offset=2)
|
|
relation = mommy.make("Relation", from_id=span1, to_id=span2, example=confirmed_example, user=project.admin)
|
|
mommy.make("ExampleState", example=confirmed_example, confirmed_by=project.admin)
|
|
unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
repository = RelationExtractionRepository(project.item)
|
|
expected = [
|
|
{
|
|
"data": confirmed_example.text,
|
|
"label": {
|
|
"entities": [
|
|
{
|
|
"id": span1.id,
|
|
"start_offset": span1.start_offset,
|
|
"end_offset": span1.end_offset,
|
|
"label": span1.label.text,
|
|
},
|
|
{
|
|
"id": span2.id,
|
|
"start_offset": span2.start_offset,
|
|
"end_offset": span2.end_offset,
|
|
"label": span2.label.text,
|
|
},
|
|
],
|
|
"relations": [
|
|
{"id": relation.id, "from_id": span1.id, "to_id": span2.id, "type": relation.type.text}
|
|
],
|
|
},
|
|
"user": project.admin.username,
|
|
},
|
|
{"data": unconfirmed_example.text, "label": {"entities": [], "relations": []}, "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(SEQUENCE_LABELING, collaborative_annotation=True, use_relation=True)
|
|
example = mommy.make("Example", project=project.item, text="example")
|
|
span1 = mommy.make("Span", example=example, user=project.admin, start_offset=0, end_offset=1)
|
|
span2 = mommy.make("Span", example=example, user=project.annotator, start_offset=1, end_offset=2)
|
|
relation = mommy.make("Relation", from_id=span1, to_id=span2, example=example, user=project.admin)
|
|
repository = RelationExtractionRepository(project.item)
|
|
expected = [
|
|
{
|
|
"data": example.text,
|
|
"label": {
|
|
"entities": [
|
|
{
|
|
"id": span1.id,
|
|
"start_offset": span1.start_offset,
|
|
"end_offset": span1.end_offset,
|
|
"label": span1.label.text,
|
|
},
|
|
{
|
|
"id": span2.id,
|
|
"start_offset": span2.start_offset,
|
|
"end_offset": span2.end_offset,
|
|
"label": span2.label.text,
|
|
},
|
|
],
|
|
"relations": [
|
|
{"id": relation.id, "from_id": span1.id, "to_id": span2.id, "type": relation.type.text}
|
|
],
|
|
},
|
|
"user": "all",
|
|
}
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(SEQUENCE_LABELING, use_relation=True)
|
|
confirmed_example = mommy.make("Example", project=project.item, text="example")
|
|
span1 = mommy.make("Span", example=confirmed_example, user=project.admin, start_offset=0, end_offset=1)
|
|
span2 = mommy.make("Span", example=confirmed_example, user=project.admin, start_offset=1, end_offset=2)
|
|
relation = mommy.make("Relation", from_id=span1, to_id=span2, example=confirmed_example, user=project.admin)
|
|
mommy.make("ExampleState", example=confirmed_example, confirmed_by=project.admin)
|
|
mommy.make("Example", project=project.item, text="unconfirmed")
|
|
repository = RelationExtractionRepository(project.item)
|
|
expected = [
|
|
{
|
|
"data": confirmed_example.text,
|
|
"label": {
|
|
"entities": [
|
|
{
|
|
"id": span1.id,
|
|
"start_offset": span1.start_offset,
|
|
"end_offset": span1.end_offset,
|
|
"label": span1.label.text,
|
|
},
|
|
{
|
|
"id": span2.id,
|
|
"start_offset": span2.start_offset,
|
|
"end_offset": span2.end_offset,
|
|
"label": span2.label.text,
|
|
},
|
|
],
|
|
"relations": [
|
|
{"id": relation.id, "from_id": span1.id, "to_id": span2.id, "type": relation.type.text}
|
|
],
|
|
},
|
|
"user": project.admin.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestSpeech2TextRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item)
|
|
self.text1 = mommy.make("TextLabel", example=self.confirmed_example, user=project.admin)
|
|
self.text2 = mommy.make("TextLabel", example=self.confirmed_example, user=project.annotator)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(SPEECH2TEXT)
|
|
repository = Speech2TextRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.text1.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.text2.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.upload_name, "label": [], "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(SPEECH2TEXT, collaborative_annotation=True)
|
|
repository = Speech2TextRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.text1.text, self.text2.text],
|
|
"user": "all",
|
|
},
|
|
{"data": self.unconfirmed_example.upload_name, "label": [], "user": "all"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(SPEECH2TEXT)
|
|
repository = Speech2TextRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.text1.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.text2.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|
|
|
|
|
|
class TestFileRepository(TestRepository):
|
|
def prepare_data(self, project):
|
|
self.confirmed_example = mommy.make("Example", project=project.item, text="example")
|
|
self.category1 = mommy.make("Category", example=self.confirmed_example, user=project.admin)
|
|
self.category2 = mommy.make("Category", example=self.confirmed_example, user=project.annotator)
|
|
mommy.make("ExampleState", example=self.confirmed_example, confirmed_by=project.admin)
|
|
self.unconfirmed_example = mommy.make("Example", project=project.item, text="unconfirmed")
|
|
|
|
def test_list(self):
|
|
project = prepare_project(IMAGE_CLASSIFICATION)
|
|
repository = FileRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.category1.label.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.category2.label.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
{"data": self.unconfirmed_example.upload_name, "label": [], "user": "unknown"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_on_collaborative_annotation(self):
|
|
project = prepare_project(IMAGE_CLASSIFICATION, collaborative_annotation=True)
|
|
repository = FileRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.category1.label.text, self.category2.label.text],
|
|
"user": "all",
|
|
},
|
|
{"data": self.unconfirmed_example.upload_name, "label": [], "user": "all"},
|
|
]
|
|
self.assert_records(repository, expected)
|
|
|
|
def test_list_confirmed_example_only(self):
|
|
project = prepare_project(IMAGE_CLASSIFICATION)
|
|
repository = FileRepository(project.item)
|
|
self.prepare_data(project)
|
|
expected = [
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.category1.label.text],
|
|
"user": project.admin.username,
|
|
},
|
|
{
|
|
"data": self.confirmed_example.upload_name,
|
|
"label": [self.category2.label.text],
|
|
"user": project.annotator.username,
|
|
},
|
|
]
|
|
self.assert_records(repository, expected, confirmed_only=True)
|