Browse Source

Merge pull request #1652 from doccano/enhancement/separateLabelApp

[Enhancement] Separate label app
pull/1653/head
Hiroki Nakayama 2 years ago
committed by GitHub
parent
commit
1bc9d73523
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 1037 additions and 832 deletions
  1. 2
      Pipfile
  2. 24
      backend/api/admin.py
  3. 5
      backend/api/exceptions.py
  4. 75
      backend/api/managers.py
  5. 88
      backend/api/migrations/0031_auto_20220127_0032.py
  6. 116
      backend/api/models.py
  7. 14
      backend/api/permissions.py
  8. 73
      backend/api/serializers.py
  9. 125
      backend/api/tests/test_models.py
  10. 142
      backend/api/tests/test_span.py
  11. 46
      backend/api/urls.py
  12. 63
      backend/api/views/tasks/base.py
  13. 18
      backend/api/views/tasks/category.py
  14. 63
      backend/api/views/tasks/relation.py
  15. 13
      backend/api/views/tasks/span.py
  16. 13
      backend/api/views/tasks/text.py
  17. 1
      backend/app/settings.py
  18. 1
      backend/app/urls.py
  19. 7
      backend/auto_labeling/pipeline/labels.py
  20. 2
      backend/auto_labeling/tests/test_views.py
  21. 6
      backend/data_import/pipeline/labels.py
  22. 4
      backend/data_import/tests/test_tasks.py
  23. 0
      backend/labels/__init__.py
  24. 23
      backend/labels/admin.py
  25. 6
      backend/labels/apps.py
  26. 76
      backend/labels/managers.py
  27. 118
      backend/labels/migrations/0001_initial.py
  28. 20
      backend/labels/migrations/0002_rename_annotationrelations_relation.py
  29. 0
      backend/labels/migrations/__init__.py
  30. 119
      backend/labels/models.py
  31. 15
      backend/labels/permissions.py
  32. 69
      backend/labels/serializers.py
  33. 0
      backend/labels/tests/__init__.py
  34. 25
      backend/labels/tests/test_category.py
  35. 246
      backend/labels/tests/test_span.py
  36. 20
      backend/labels/tests/test_text_label.py
  37. 49
      backend/labels/tests/test_views.py
  38. 50
      backend/labels/urls.py
  39. 125
      backend/labels/views.py
  40. 7
      backend/metrics/views.py

2
Pipfile

@ -58,7 +58,7 @@ python_version = "3.8"
[scripts]
isort = "isort api -c --skip migrations"
flake8 = "flake8 --filename \"*.py\" --extend-exclude \"api/migrations\""
flake8 = "flake8 --filename \"*.py\" --extend-exclude \"*/migrations\""
wait_for_db = "python manage.py wait_for_db"
test = "python manage.py test --pattern=\"test*.py\""
migrate = "python manage.py migrate"

24
backend/api/admin.py

@ -1,8 +1,8 @@
from django.contrib import admin
from .models import (Category, CategoryType, Comment, Example, Project,
Seq2seqProject, SequenceLabelingProject, Span, SpanType,
Tag, TextClassificationProject, TextLabel)
from .models import (CategoryType, Comment, Example, Project, Seq2seqProject,
SequenceLabelingProject, SpanType, Tag,
TextClassificationProject)
class LabelAdmin(admin.ModelAdmin):
@ -31,21 +31,6 @@ class ProjectAdmin(admin.ModelAdmin):
search_fields = ('name',)
class SpanAdmin(admin.ModelAdmin):
list_display = ('example', 'label', 'start_offset', 'user')
ordering = ('example',)
class CategoryAdmin(admin.ModelAdmin):
list_display = ('example', 'label', 'user')
ordering = ('example',)
class TextLabelAdmin(admin.ModelAdmin):
list_display = ('example', 'text', 'user')
ordering = ('example',)
class TagAdmin(admin.ModelAdmin):
list_display = ('project', 'text', )
ordering = ('project', 'text', )
@ -58,9 +43,6 @@ class CommentAdmin(admin.ModelAdmin):
search_fields = ('user',)
admin.site.register(Category, CategoryAdmin)
admin.site.register(Span, SpanAdmin)
admin.site.register(TextLabel, TextLabelAdmin)
admin.site.register(CategoryType, CategoryTypeAdmin)
admin.site.register(SpanType, SpanTypeAdmin)
admin.site.register(Example, ExampleAdmin)

5
backend/api/exceptions.py

@ -5,8 +5,3 @@ from rest_framework.exceptions import APIException
class LabelValidationError(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = 'You cannot create a label with same name or shortcut key.'
class AnnotationRelationValidationError(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = 'You cannot create an annotation relation between the same annotation.'

75
backend/api/managers.py

@ -1,81 +1,6 @@
from django.db.models import Count, Manager
class AnnotationManager(Manager):
def calc_label_distribution(self, examples, members, labels):
"""Calculate label distribution.
Args:
examples: example queryset.
members: user queryset.
labels: label queryset.
Returns:
label distribution per user.
Examples:
>>> self.calc_label_distribution(examples, members, labels)
{'admin': {'positive': 10, 'negative': 5}}
"""
distribution = {member.username: {label.text: 0 for label in labels} for member in members}
items = self.filter(example_id__in=examples)\
.values('user__username', 'label__text')\
.annotate(count=Count('label__text'))
for item in items:
username = item['user__username']
label = item['label__text']
count = item['count']
distribution[username][label] = count
return distribution
def get_labels(self, label, project):
if project.collaborative_annotation:
return self.filter(example=label.example)
else:
return self.filter(example=label.example, user=label.user)
def can_annotate(self, label, project) -> bool:
raise NotImplementedError('Please implement this method in the subclass')
def filter_annotatable_labels(self, labels, project):
return [label for label in labels if self.can_annotate(label, project)]
class CategoryManager(AnnotationManager):
def can_annotate(self, label, project) -> bool:
is_exclusive = project.single_class_classification
categories = self.get_labels(label, project)
if is_exclusive:
return not categories.exists()
else:
return not categories.filter(label=label.label).exists()
class SpanManager(AnnotationManager):
def can_annotate(self, label, project) -> bool:
overlapping = getattr(project, 'allow_overlapping', False)
spans = self.get_labels(label, project)
if overlapping:
return True
for span in spans:
if span.is_overlapping(label):
return False
return True
class TextLabelManager(AnnotationManager):
def can_annotate(self, label, project) -> bool:
texts = self.get_labels(label, project)
for text in texts:
if text.is_same_text(label):
return False
return True
class ExampleManager(Manager):
def bulk_create(self, objs, batch_size=None, ignore_conflicts=False):

88
backend/api/migrations/0031_auto_20220127_0032.py

@ -0,0 +1,88 @@
# Generated by Django 3.2.11 on 2022-01-27 00:32
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('api', '0030_delete_autolabelingconfig'),
]
operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterUniqueTogether(
name='category',
unique_together=None,
),
migrations.RemoveField(
model_name='category',
name='example',
),
migrations.RemoveField(
model_name='category',
name='label',
),
migrations.RemoveField(
model_name='category',
name='user',
),
migrations.RemoveField(
model_name='span',
name='example',
),
migrations.RemoveField(
model_name='span',
name='label',
),
migrations.RemoveField(
model_name='span',
name='user',
),
migrations.AlterUniqueTogether(
name='textlabel',
unique_together=None,
),
migrations.RemoveField(
model_name='textlabel',
name='example',
),
migrations.RemoveField(
model_name='textlabel',
name='user',
),
migrations.DeleteModel(
name='AnnotationRelations',
),
migrations.DeleteModel(
name='Category',
),
migrations.DeleteModel(
name='Span',
),
migrations.DeleteModel(
name='TextLabel',
),
],
database_operations=[
migrations.AlterModelTable(
name='Span',
table='labels_span'
),
migrations.AlterModelTable(
name='Category',
table='labels_category'
),
migrations.AlterModelTable(
name='TextLabel',
table='labels_textlabel'
),
migrations.AlterModelTable(
name='AnnotationRelations',
table='labels_annotationrelations'
)
]
)
]

116
backend/api/models.py

@ -8,8 +8,7 @@ from django.core.exceptions import ValidationError
from django.db import models
from polymorphic.models import PolymorphicModel
from .managers import (AnnotationManager, CategoryManager, ExampleManager,
ExampleStateManager, SpanManager, TextLabelManager)
from .managers import ExampleManager, ExampleStateManager
DOCUMENT_CLASSIFICATION = 'DocumentClassification'
SEQUENCE_LABELING = 'SequenceLabeling'
@ -321,104 +320,6 @@ class Tag(models.Model):
return self.text
class Annotation(models.Model):
objects = AnnotationManager()
prob = models.FloatField(default=0.0)
manual = models.BooleanField(default=False)
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
abstract = True
class Category(Annotation):
objects = CategoryManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='categories'
)
label = models.ForeignKey(to=CategoryType, on_delete=models.CASCADE)
class Meta:
unique_together = (
'example',
'user',
'label'
)
class Span(Annotation):
objects = SpanManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='spans'
)
label = models.ForeignKey(to=SpanType, on_delete=models.CASCADE)
start_offset = models.IntegerField()
end_offset = models.IntegerField()
def validate_unique(self, exclude=None):
allow_overlapping = getattr(self.example.project, 'allow_overlapping', False)
is_collaborative = self.example.project.collaborative_annotation
if allow_overlapping:
super().validate_unique(exclude=exclude)
return
overlapping_span = Span.objects.exclude(id=self.id).filter(example=self.example).filter(
models.Q(start_offset__gte=self.start_offset, start_offset__lt=self.end_offset) |
models.Q(end_offset__gt=self.start_offset, end_offset__lte=self.end_offset) |
models.Q(start_offset__lte=self.start_offset, end_offset__gte=self.end_offset)
)
if is_collaborative:
if overlapping_span.exists():
raise ValidationError('This overlapping is not allowed in this project.')
else:
if overlapping_span.filter(user=self.user).exists():
raise ValidationError('This overlapping is not allowed in this project.')
def save(self, force_insert=False, force_update=False, using=None,
update_fields=None):
self.full_clean()
super().save(force_insert, force_update, using, update_fields)
def is_overlapping(self, other: 'Span'):
return (other.start_offset <= self.start_offset < other.end_offset) or\
(other.start_offset < self.end_offset <= other.end_offset) or\
(self.start_offset < other.start_offset and other.end_offset < self.end_offset)
class Meta:
constraints = [
models.CheckConstraint(check=models.Q(start_offset__gte=0), name='startOffset >= 0'),
models.CheckConstraint(check=models.Q(end_offset__gte=0), name='endOffset >= 0'),
models.CheckConstraint(check=models.Q(start_offset__lt=models.F('end_offset')), name='start < end')
]
class TextLabel(Annotation):
objects = TextLabelManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='texts'
)
text = models.TextField()
def is_same_text(self, other: 'TextLabel'):
return self.text == other.text
class Meta:
unique_together = (
'example',
'user',
'text'
)
class RelationTypes(models.Model):
color = models.TextField()
name = models.TextField()
@ -429,18 +330,3 @@ class RelationTypes(models.Model):
class Meta:
unique_together = ('color', 'name')
class AnnotationRelations(models.Model):
annotation_id_1 = models.IntegerField()
annotation_id_2 = models.IntegerField()
type = models.ForeignKey(RelationTypes, related_name='annotation_relations', on_delete=models.CASCADE)
timestamp = models.DateTimeField()
user = models.ForeignKey(User, related_name='annotation_relations', on_delete=models.CASCADE)
project = models.ForeignKey(Project, related_name='annotation_relations', on_delete=models.CASCADE)
def __str__(self):
return self.__dict__.__str__()
class Meta:
unique_together = ('annotation_id_1', 'annotation_id_2', 'type', 'project')

14
backend/api/permissions.py

@ -1,20 +1,6 @@
from rest_framework.permissions import BasePermission
class CanEditAnnotation(BasePermission):
def __init__(self, queryset):
super().__init__()
self.queryset = queryset
def has_permission(self, request, view):
if request.user.is_superuser:
return True
annotation_id = view.kwargs.get('annotation_id')
return self.queryset.filter(id=annotation_id, user=request.user).exists()
class IsOwnComment(BasePermission):
@classmethod
def has_object_permission(cls, request, view, obj):

73
backend/api/serializers.py

@ -2,12 +2,12 @@ from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from rest_polymorphic.serializers import PolymorphicSerializer
from .models import (AnnotationRelations, Category, CategoryType, Comment,
Example, ExampleState, ImageClassificationProject,
from .models import (CategoryType, Comment, Example, ExampleState,
ImageClassificationProject,
IntentDetectionAndSlotFillingProject, Label, Project,
RelationTypes, Seq2seqProject, SequenceLabelingProject,
Span, SpanType, Speech2textProject, Tag,
TextClassificationProject, TextLabel)
SpanType, Speech2textProject, Tag,
TextClassificationProject)
class LabelSerializer(serializers.ModelSerializer):
@ -226,61 +226,6 @@ class ProjectPolymorphicSerializer(PolymorphicSerializer):
}
class CategorySerializer(serializers.ModelSerializer):
label = serializers.PrimaryKeyRelatedField(queryset=CategoryType.objects.all())
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = Category
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'label',
)
read_only_fields = ('user',)
class SpanSerializer(serializers.ModelSerializer):
label = serializers.PrimaryKeyRelatedField(queryset=SpanType.objects.all())
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = Span
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'label',
'start_offset',
'end_offset',
)
read_only_fields = ('user',)
class TextLabelSerializer(serializers.ModelSerializer):
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = TextLabel
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'text',
)
read_only_fields = ('user',)
class RelationTypesSerializer(serializers.ModelSerializer):
def validate(self, attrs):
@ -289,13 +234,3 @@ class RelationTypesSerializer(serializers.ModelSerializer):
class Meta:
model = RelationTypes
fields = ('id', 'color', 'name')
class AnnotationRelationsSerializer(serializers.ModelSerializer):
def validate(self, attrs):
return super().validate(attrs)
class Meta:
model = AnnotationRelations
fields = ('id', 'annotation_id_1', 'annotation_id_2', 'type', 'user', 'timestamp')

125
backend/api/tests/test_models.py

@ -3,9 +3,8 @@ from django.db.utils import IntegrityError
from django.test import TestCase
from model_mommy import mommy
from api.models import (IMAGE_CLASSIFICATION, SEQUENCE_LABELING, Category,
CategoryType, ExampleState, Span, SpanType, TextLabel,
generate_random_hex_color)
from api.models import (IMAGE_CLASSIFICATION, SEQUENCE_LABELING, CategoryType,
ExampleState, generate_random_hex_color)
from .api.utils import prepare_project
@ -61,103 +60,6 @@ class TestLabel(TestCase):
self.fail(msg=ValidationError)
class TestCategory(TestCase):
def test_uniqueness(self):
a = mommy.make('Category')
with self.assertRaises(IntegrityError):
Category(example=a.example, user=a.user, label=a.label).save()
class TestSpan(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
self.user = self.project.users[0]
def test_start_offset_is_not_negative(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=-1, end_offset=0)
def test_end_offset_is_not_negative(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=-2, end_offset=-1)
def test_start_offset_is_less_than_end_offset(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=0, end_offset=0)
def test_unique_constraint(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user)
mommy.make('Span', example=self.example, start_offset=0, end_offset=5, user=self.user)
mommy.make('Span', example=self.example, start_offset=10, end_offset=15, user=self.user)
def test_unique_constraint_violated(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user)
spans = [(5, 10), (5, 11), (4, 10), (6, 9), (9, 15), (0, 6)]
for start_offset, end_offset in spans:
with self.assertRaises(ValidationError):
mommy.make(
'Span',
example=self.example,
start_offset=start_offset,
end_offset=end_offset,
user=self.user
)
def test_unique_constraint_if_overlapping_is_allowed(self):
project = prepare_project(SEQUENCE_LABELING, allow_overlapping=True)
example = mommy.make('Example', project=project.item)
user = project.users[0]
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=user)
spans = [(5, 10), (5, 11), (4, 10), (6, 9), (9, 15), (0, 6)]
for start_offset, end_offset in spans:
mommy.make('Span', example=example, start_offset=start_offset, end_offset=end_offset, user=user)
def test_update(self):
span = mommy.make('Span', example=self.example, start_offset=0, end_offset=5)
span.end_offset = 6
span.save()
class TestSpanWithoutCollaborativeMode(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, False, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
def test_allow_users_to_create_same_spans(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.project.users[0])
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.project.users[1])
class TestSpanWithCollaborativeMode(TestCase):
def test_deny_users_to_create_same_spans(self):
project = prepare_project(SEQUENCE_LABELING, True, allow_overlapping=False)
example = mommy.make('Example', project=project.item)
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[0])
with self.assertRaises(ValidationError):
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[1])
def test_allow_users_to_create_same_spans_if_overlapping_is_allowed(self):
project = prepare_project(SEQUENCE_LABELING, True, allow_overlapping=True)
example = mommy.make('Example', project=project.item)
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[0])
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[1])
class TestSeq2seqAnnotation(TestCase):
def test_uniqueness(self):
a = mommy.make('TextLabel')
with self.assertRaises(IntegrityError):
TextLabel(example=a.example,
user=a.user,
text=a.text).save()
class TestGeneratedColor(TestCase):
def test_length(self):
@ -219,29 +121,6 @@ class TestExampleState(TestCase):
self.assertEqual(progress, {'total': 2, 'progress': expected_progress})
class TestLabelDistribution(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
self.user = self.project.users[0]
def test_calc_label_distribution(self):
label_a = mommy.make('SpanType', text='labelA', project=self.project.item)
label_b = mommy.make('SpanType', text='labelB', project=self.project.item)
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user, label=label_a)
mommy.make('Span', example=self.example, start_offset=10, end_offset=15, user=self.user, label=label_b)
distribution = Span.objects.calc_label_distribution(
examples=self.project.item.examples.all(),
members=self.project.users,
labels=SpanType.objects.all()
)
expected = {user.username: {label.text: 0 for label in SpanType.objects.all()} for user in self.project.users}
expected[self.user.username][label_a.text] = 1
expected[self.user.username][label_b.text] = 1
self.assertEqual(distribution, expected)
class TestExample(TestCase):
def test_text_project_returns_text_as_data_property(self):

142
backend/api/tests/test_span.py

@ -1,142 +0,0 @@
import abc
from django.test import TestCase
from model_mommy import mommy
from api.models import SEQUENCE_LABELING, Span
from .api.utils import prepare_project
class TestSpanAnnotation(abc.ABC, TestCase):
overlapping = False
collaborative = False
@classmethod
def setUpTestData(cls):
cls.project = prepare_project(
SEQUENCE_LABELING,
allow_overlapping=cls.overlapping,
collaborative_annotation=cls.collaborative
)
cls.example = mommy.make('Example', project=cls.project.item)
cls.label_type = mommy.make('SpanType', project=cls.project.item)
users = cls.project.users
cls.user = users[0]
cls.another_user = users[1]
cls.span = Span(
example=cls.example,
label=cls.label_type,
user=cls.user,
start_offset=0,
end_offset=5
)
def test_can_annotate_span_to_unannotated_data(self):
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class NonCollaborativeMixin:
def test_allow_another_user_to_annotate_same_span(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class TestNonOverlappingSpanAnnotation(TestSpanAnnotation, NonCollaborativeMixin):
overlapping = False
collaborative = False
def test_cannot_annotate_same_span_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset,
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
def test_cannot_annotate_different_span_type_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
class TestOverlappingSpanAnnotation(TestSpanAnnotation, NonCollaborativeMixin):
overlapping = True
collaborative = False
def test_can_annotate_same_span_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset,
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class TestCollaborativeNonOverlappingSpanAnnotation(TestSpanAnnotation):
overlapping = False
collaborative = True
def test_deny_another_user_to_annotate_same_span_type(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
def test_deny_another_user_to_annotate_different_span_type(self):
mommy.make(
'Span',
example=self.example,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
class TestCollaborativeOverlappingSpanAnnotation(TestSpanAnnotation):
overlapping = True
collaborative = True
def test_allow_another_user_to_annotate_same_span(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)

46
backend/api/urls.py

@ -2,7 +2,6 @@ from django.urls import include, path
from .views import (comment, example, example_state, health, label, project,
tag, task)
from .views.tasks import category, relation, span, text
urlpatterns_project = [
path(
@ -60,51 +59,6 @@ urlpatterns_project = [
view=label.RelationTypeDetail.as_view(),
name='relation_type_detail'
),
path(
route='annotation_relations',
view=relation.RelationList.as_view(),
name='relation_types_list'
),
path(
route='annotation_relation-upload',
view=relation.RelationUploadAPI.as_view(),
name='annotation_relation-upload'
),
path(
route='annotation_relations/<int:annotation_relation_id>',
view=relation.RelationDetail.as_view(),
name='annotation_relation_detail'
),
path(
route='examples/<int:example_id>/categories',
view=category.CategoryListAPI.as_view(),
name='category_list'
),
path(
route='examples/<int:example_id>/categories/<int:annotation_id>',
view=category.CategoryDetailAPI.as_view(),
name='category_detail'
),
path(
route='examples/<int:example_id>/spans',
view=span.SpanListAPI.as_view(),
name='span_list'
),
path(
route='examples/<int:example_id>/spans/<int:annotation_id>',
view=span.SpanDetailAPI.as_view(),
name='span_detail'
),
path(
route='examples/<int:example_id>/texts',
view=text.TextLabelListAPI.as_view(),
name='text_list'
),
path(
route='examples/<int:example_id>/texts/<int:annotation_id>',
view=text.TextLabelDetailAPI.as_view(),
name='text_detail'
),
path(
route='tags',
view=tag.TagList.as_view(),

63
backend/api/views/tasks/base.py

@ -1,63 +0,0 @@
from functools import partial
from django.core.exceptions import ValidationError
from django.shortcuts import get_object_or_404
from rest_framework import generics, status
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from members.permissions import IsInProjectOrAdmin
from ...models import Project
from ...permissions import CanEditAnnotation
class BaseListAPI(generics.ListCreateAPIView):
annotation_class = None
pagination_class = None
permission_classes = [IsAuthenticated & IsInProjectOrAdmin]
swagger_schema = None
@property
def project(self):
return get_object_or_404(Project, pk=self.kwargs['project_id'])
def get_queryset(self):
queryset = self.annotation_class.objects.filter(example=self.kwargs['example_id'])
if not self.project.collaborative_annotation:
queryset = queryset.filter(user=self.request.user)
return queryset
def create(self, request, *args, **kwargs):
request.data['example'] = self.kwargs['example_id']
try:
response = super().create(request, args, kwargs)
except ValidationError as err:
response = Response({'detail': err.messages}, status=status.HTTP_400_BAD_REQUEST)
return response
def perform_create(self, serializer):
serializer.save(example_id=self.kwargs['example_id'], user=self.request.user)
def delete(self, request, *args, **kwargs):
queryset = self.get_queryset()
queryset.all().delete()
return Response(status=status.HTTP_204_NO_CONTENT)
class BaseDetailAPI(generics.RetrieveUpdateDestroyAPIView):
lookup_url_kwarg = 'annotation_id'
swagger_schema = None
@property
def project(self):
return get_object_or_404(Project, pk=self.kwargs['project_id'])
def get_permissions(self):
if self.project.collaborative_annotation:
self.permission_classes = [IsAuthenticated & IsInProjectOrAdmin]
else:
self.permission_classes = [
IsAuthenticated & IsInProjectOrAdmin & partial(CanEditAnnotation, self.queryset)
]
return super().get_permissions()

18
backend/api/views/tasks/category.py

@ -1,18 +0,0 @@
from ...models import Category
from ...serializers import CategorySerializer
from .base import BaseDetailAPI, BaseListAPI
class CategoryListAPI(BaseListAPI):
annotation_class = Category
serializer_class = CategorySerializer
def create(self, request, *args, **kwargs):
if self.project.single_class_classification:
self.get_queryset().delete()
return super().create(request, args, kwargs)
class CategoryDetailAPI(BaseDetailAPI):
queryset = Category.objects.all()
serializer_class = CategorySerializer

63
backend/api/views/tasks/relation.py

@ -1,63 +0,0 @@
import json
from django.db import IntegrityError, transaction
from django.shortcuts import get_object_or_404
from rest_framework import generics, status
from rest_framework.exceptions import ParseError
from rest_framework.parsers import MultiPartParser
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from members.permissions import IsInProjectReadOnlyOrAdmin, IsProjectAdmin
from ...exceptions import AnnotationRelationValidationError
from ...models import AnnotationRelations, Project
from ...serializers import AnnotationRelationsSerializer
class RelationList(generics.ListCreateAPIView):
serializer_class = AnnotationRelationsSerializer
pagination_class = None
permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin]
def get_queryset(self):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
return project.annotation_relations
def perform_create(self, serializer):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
serializer.save(project=project)
def delete(self, request, *args, **kwargs):
delete_ids = request.data['ids']
AnnotationRelations.objects.filter(pk__in=delete_ids).delete()
return Response(status=status.HTTP_204_NO_CONTENT)
class RelationDetail(generics.RetrieveUpdateDestroyAPIView):
queryset = AnnotationRelations.objects.all()
serializer_class = AnnotationRelationsSerializer
lookup_url_kwarg = 'annotation_relation_id'
permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin]
class RelationUploadAPI(APIView):
parser_classes = (MultiPartParser,)
permission_classes = [IsAuthenticated & IsProjectAdmin]
@transaction.atomic
def post(self, request, *args, **kwargs):
if 'file' not in request.data:
raise ParseError('Empty content')
project = get_object_or_404(Project, pk=kwargs['project_id'])
try:
annotation_relations = json.load(request.data)
serializer = AnnotationRelationsSerializer(data=annotation_relations, many=True)
serializer.is_valid(raise_exception=True)
serializer.save(project=project)
return Response(status=status.HTTP_201_CREATED)
except json.decoder.JSONDecodeError:
raise ParseError('The file format is invalid.')
except IntegrityError:
raise AnnotationRelationValidationError

13
backend/api/views/tasks/span.py

@ -1,13 +0,0 @@
from ...models import Span
from ...serializers import SpanSerializer
from .base import BaseDetailAPI, BaseListAPI
class SpanListAPI(BaseListAPI):
annotation_class = Span
serializer_class = SpanSerializer
class SpanDetailAPI(BaseDetailAPI):
queryset = Span.objects.all()
serializer_class = SpanSerializer

13
backend/api/views/tasks/text.py

@ -1,13 +0,0 @@
from ...models import TextLabel
from ...serializers import TextLabelSerializer
from .base import BaseDetailAPI, BaseListAPI
class TextLabelListAPI(BaseListAPI):
annotation_class = TextLabel
serializer_class = TextLabelSerializer
class TextLabelDetailAPI(BaseDetailAPI):
queryset = TextLabel.objects.all()
serializer_class = TextLabelSerializer

1
backend/app/settings.py

@ -59,6 +59,7 @@ INSTALLED_APPS = [
'data_import.apps.DataImportConfig',
'data_export.apps.DataExportConfig',
'auto_labeling.apps.AutoLabelingConfig',
'labels.apps.LabelsConfig',
'rest_framework',
'rest_framework.authtoken',
'django_filters',

1
backend/app/urls.py

@ -48,6 +48,7 @@ urlpatterns += [
path('v1/projects/<int:project_id>/', include('members.urls')),
path('v1/projects/<int:project_id>/metrics/', include('metrics.urls')),
path('v1/projects/<int:project_id>/', include('auto_labeling.urls')),
path('v1/projects/<int:project_id>/', include('labels.urls')),
path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
re_path('', TemplateView.as_view(template_name='index.html')),
]

7
backend/auto_labeling/pipeline/labels.py

@ -4,7 +4,8 @@ from typing import List
from auto_labeling_pipeline.labels import Labels
from django.contrib.auth.models import User
from api.models import Project, Example, Annotation, CategoryType, Category, SpanType, Span, TextLabel
from api.models import Project, Example, CategoryType, SpanType
from labels.models import Label, Category, Span, TextLabel
class LabelCollection(abc.ABC):
@ -14,7 +15,7 @@ class LabelCollection(abc.ABC):
def __init__(self, labels):
self.labels = labels
def transform(self, project: Project, example: Example, user: User) -> List[Annotation]:
def transform(self, project: Project, example: Example, user: User) -> List[Label]:
mapping = {
c.text: c for c in self.label_type.objects.filter(project=project)
}
@ -47,7 +48,7 @@ class Spans(LabelCollection):
class Texts(LabelCollection):
model = TextLabel
def transform(self, project: Project, example: Example, user: User) -> List[Annotation]:
def transform(self, project: Project, example: Example, user: User) -> List[Label]:
annotations = []
for label in self.labels:
label['example'] = example

2
backend/auto_labeling/tests/test_views.py

@ -8,7 +8,7 @@ from rest_framework import status
from rest_framework.reverse import reverse
from api.models import DOCUMENT_CLASSIFICATION, SEQUENCE_LABELING, SEQ2SEQ
from api.models import Category, Span, TextLabel
from labels.models import Category, Span, TextLabel
from api.tests.api.utils import CRUDMixin, make_doc, prepare_project
from auto_labeling.pipeline.labels import Categories, Spans, Texts

6
backend/data_import/pipeline/labels.py

@ -3,10 +3,10 @@ from typing import Any, Dict, Optional, Union
from pydantic import BaseModel, validator
from api.models import Category, CategoryType
from api.models import CategoryType
from api.models import Label as LabelModel
from api.models import Project, Span, SpanType
from api.models import TextLabel as TL
from api.models import Project, SpanType
from labels.models import Category, Span, TextLabel as TL
class Label(BaseModel, abc.ABC):

4
backend/data_import/tests/test_tasks.py

@ -5,8 +5,8 @@ from django.test import TestCase
from data_import.celery_tasks import import_dataset
from api.models import (DOCUMENT_CLASSIFICATION,
INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
SEQUENCE_LABELING, Category, CategoryType, Example, Span,
SpanType)
SEQUENCE_LABELING, CategoryType, Example, SpanType)
from labels.models import Category, Span
from api.tests.api.utils import prepare_project

backend/api/views/tasks/__init__.py → backend/labels/__init__.py

23
backend/labels/admin.py

@ -0,0 +1,23 @@
from django.contrib import admin
from .models import Category, Span, TextLabel
class SpanAdmin(admin.ModelAdmin):
list_display = ('example', 'label', 'start_offset', 'user')
ordering = ('example',)
class CategoryAdmin(admin.ModelAdmin):
list_display = ('example', 'label', 'user')
ordering = ('example',)
class TextLabelAdmin(admin.ModelAdmin):
list_display = ('example', 'text', 'user')
ordering = ('example',)
admin.site.register(Category, CategoryAdmin)
admin.site.register(Span, SpanAdmin)
admin.site.register(TextLabel, TextLabelAdmin)

6
backend/labels/apps.py

@ -0,0 +1,6 @@
from django.apps import AppConfig
class LabelsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'labels'

76
backend/labels/managers.py

@ -0,0 +1,76 @@
from django.db.models import Manager, Count
class LabelManager(Manager):
def calc_label_distribution(self, examples, members, labels):
"""Calculate label distribution.
Args:
examples: example queryset.
members: user queryset.
labels: label queryset.
Returns:
label distribution per user.
Examples:
>>> self.calc_label_distribution(examples, members, labels)
{'admin': {'positive': 10, 'negative': 5}}
"""
distribution = {member.username: {label.text: 0 for label in labels} for member in members}
items = self.filter(example_id__in=examples)\
.values('user__username', 'label__text')\
.annotate(count=Count('label__text'))
for item in items:
username = item['user__username']
label = item['label__text']
count = item['count']
distribution[username][label] = count
return distribution
def get_labels(self, label, project):
if project.collaborative_annotation:
return self.filter(example=label.example)
else:
return self.filter(example=label.example, user=label.user)
def can_annotate(self, label, project) -> bool:
raise NotImplementedError('Please implement this method in the subclass')
def filter_annotatable_labels(self, labels, project):
return [label for label in labels if self.can_annotate(label, project)]
class CategoryManager(LabelManager):
def can_annotate(self, label, project) -> bool:
is_exclusive = project.single_class_classification
categories = self.get_labels(label, project)
if is_exclusive:
return not categories.exists()
else:
return not categories.filter(label=label.label).exists()
class SpanManager(LabelManager):
def can_annotate(self, label, project) -> bool:
overlapping = getattr(project, 'allow_overlapping', False)
spans = self.get_labels(label, project)
if overlapping:
return True
for span in spans:
if span.is_overlapping(label):
return False
return True
class TextLabelManager(LabelManager):
def can_annotate(self, label, project) -> bool:
texts = self.get_labels(label, project)
for text in texts:
if text.is_same_text(label):
return False
return True

118
backend/labels/migrations/0001_initial.py

@ -0,0 +1,118 @@
# Generated by Django 3.2.11 on 2022-01-27 00:32
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.db.models.expressions
class Migration(migrations.Migration):
initial = True
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('api', '0031_auto_20220127_0032'),
]
operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.CreateModel(
name='Span',
fields=[
('id',
models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('prob', models.FloatField(default=0.0)),
('manual', models.BooleanField(default=False)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('start_offset', models.IntegerField()),
('end_offset', models.IntegerField()),
('example', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='spans',
to='api.example')),
('label', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='api.spantype')),
('user',
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='Category',
fields=[
('id',
models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('prob', models.FloatField(default=0.0)),
('manual', models.BooleanField(default=False)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('example',
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='categories',
to='api.example')),
(
'label', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='api.categorytype')),
('user',
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='AnnotationRelations',
fields=[
('id',
models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('annotation_id_1', models.IntegerField()),
('annotation_id_2', models.IntegerField()),
('timestamp', models.DateTimeField()),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE,
related_name='annotation_relations', to='api.project')),
('type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE,
related_name='annotation_relations', to='api.relationtypes')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE,
related_name='annotation_relations', to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='TextLabel',
fields=[
('id',
models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('prob', models.FloatField(default=0.0)),
('manual', models.BooleanField(default=False)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('text', models.TextField()),
('example', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='texts',
to='api.example')),
('user',
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'unique_together': {('example', 'user', 'text')},
},
),
migrations.AddConstraint(
model_name='span',
constraint=models.CheckConstraint(check=models.Q(('start_offset__gte', 0)),
name='startOffset >= 0'),
),
migrations.AddConstraint(
model_name='span',
constraint=models.CheckConstraint(check=models.Q(('end_offset__gte', 0)), name='endOffset >= 0'),
),
migrations.AddConstraint(
model_name='span',
constraint=models.CheckConstraint(
check=models.Q(('start_offset__lt', django.db.models.expressions.F('end_offset'))),
name='start < end'),
),
migrations.AlterUniqueTogether(
name='category',
unique_together={('example', 'user', 'label')},
),
migrations.AlterUniqueTogether(
name='annotationrelations',
unique_together={('annotation_id_1', 'annotation_id_2', 'type', 'project')},
),
],
database_operations=[]
)
]

20
backend/labels/migrations/0002_rename_annotationrelations_relation.py

@ -0,0 +1,20 @@
# Generated by Django 3.2.11 on 2022-01-27 02:39
from django.conf import settings
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('api', '0031_auto_20220127_0032'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('labels', '0001_initial'),
]
operations = [
migrations.RenameModel(
old_name='AnnotationRelations',
new_name='Relation',
),
]

0
backend/labels/migrations/__init__.py

119
backend/labels/models.py

@ -0,0 +1,119 @@
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.db import models
from .managers import LabelManager, CategoryManager, SpanManager, TextLabelManager
from api.models import Example, CategoryType, SpanType, RelationTypes, Project
class Label(models.Model):
objects = LabelManager()
prob = models.FloatField(default=0.0)
manual = models.BooleanField(default=False)
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
abstract = True
class Category(Label):
objects = CategoryManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='categories'
)
label = models.ForeignKey(to=CategoryType, on_delete=models.CASCADE)
class Meta:
unique_together = (
'example',
'user',
'label'
)
class Span(Label):
objects = SpanManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='spans'
)
label = models.ForeignKey(to=SpanType, on_delete=models.CASCADE)
start_offset = models.IntegerField()
end_offset = models.IntegerField()
def validate_unique(self, exclude=None):
allow_overlapping = getattr(self.example.project, 'allow_overlapping', False)
is_collaborative = self.example.project.collaborative_annotation
if allow_overlapping:
super().validate_unique(exclude=exclude)
return
overlapping_span = Span.objects.exclude(id=self.id).filter(example=self.example).filter(
models.Q(start_offset__gte=self.start_offset, start_offset__lt=self.end_offset) |
models.Q(end_offset__gt=self.start_offset, end_offset__lte=self.end_offset) |
models.Q(start_offset__lte=self.start_offset, end_offset__gte=self.end_offset)
)
if is_collaborative:
if overlapping_span.exists():
raise ValidationError('This overlapping is not allowed in this project.')
else:
if overlapping_span.filter(user=self.user).exists():
raise ValidationError('This overlapping is not allowed in this project.')
def save(self, force_insert=False, force_update=False, using=None,
update_fields=None):
self.full_clean()
super().save(force_insert, force_update, using, update_fields)
def is_overlapping(self, other: 'Span'):
return (other.start_offset <= self.start_offset < other.end_offset) or\
(other.start_offset < self.end_offset <= other.end_offset) or\
(self.start_offset < other.start_offset and other.end_offset < self.end_offset)
class Meta:
constraints = [
models.CheckConstraint(check=models.Q(start_offset__gte=0), name='startOffset >= 0'),
models.CheckConstraint(check=models.Q(end_offset__gte=0), name='endOffset >= 0'),
models.CheckConstraint(check=models.Q(start_offset__lt=models.F('end_offset')), name='start < end')
]
class TextLabel(Label):
objects = TextLabelManager()
example = models.ForeignKey(
to=Example,
on_delete=models.CASCADE,
related_name='texts'
)
text = models.TextField()
def is_same_text(self, other: 'TextLabel'):
return self.text == other.text
class Meta:
unique_together = (
'example',
'user',
'text'
)
class Relation(models.Model):
annotation_id_1 = models.IntegerField()
annotation_id_2 = models.IntegerField()
type = models.ForeignKey(RelationTypes, related_name='annotation_relations', on_delete=models.CASCADE)
timestamp = models.DateTimeField()
user = models.ForeignKey(User, related_name='annotation_relations', on_delete=models.CASCADE)
project = models.ForeignKey(Project, related_name='annotation_relations', on_delete=models.CASCADE)
def __str__(self):
return self.__dict__.__str__()
class Meta:
unique_together = ('annotation_id_1', 'annotation_id_2', 'type', 'project')

15
backend/labels/permissions.py

@ -0,0 +1,15 @@
from rest_framework.permissions import BasePermission
class CanEditLabel(BasePermission):
def __init__(self, queryset):
super().__init__()
self.queryset = queryset
def has_permission(self, request, view):
if request.user.is_superuser:
return True
annotation_id = view.kwargs.get('annotation_id')
return self.queryset.filter(id=annotation_id, user=request.user).exists()

69
backend/labels/serializers.py

@ -0,0 +1,69 @@
from rest_framework import serializers
from api.models import CategoryType, Example, SpanType
from .models import Category, Span, TextLabel, Relation
class CategorySerializer(serializers.ModelSerializer):
label = serializers.PrimaryKeyRelatedField(queryset=CategoryType.objects.all())
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = Category
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'label',
)
read_only_fields = ('user',)
class SpanSerializer(serializers.ModelSerializer):
label = serializers.PrimaryKeyRelatedField(queryset=SpanType.objects.all())
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = Span
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'label',
'start_offset',
'end_offset',
)
read_only_fields = ('user',)
class TextLabelSerializer(serializers.ModelSerializer):
example = serializers.PrimaryKeyRelatedField(queryset=Example.objects.all())
class Meta:
model = TextLabel
fields = (
'id',
'prob',
'user',
'example',
'created_at',
'updated_at',
'text',
)
read_only_fields = ('user',)
class RelationSerializer(serializers.ModelSerializer):
def validate(self, attrs):
return super().validate(attrs)
class Meta:
model = Relation
fields = ('id', 'annotation_id_1', 'annotation_id_2', 'type', 'user', 'timestamp')

0
backend/labels/tests/__init__.py

backend/api/tests/test_category.py → backend/labels/tests/test_category.py

@ -1,14 +1,15 @@
import abc
from django.db import IntegrityError
from django.test import TestCase
from model_mommy import mommy
from api.models import DOCUMENT_CLASSIFICATION, Category
from api.models import DOCUMENT_CLASSIFICATION
from labels.models import Category
from api.tests.api.utils import prepare_project
from .api.utils import prepare_project
class TestCategoryAnnotation(abc.ABC, TestCase):
class TestCategoryLabeling(abc.ABC, TestCase):
exclusive = True
collaborative = False
@ -53,7 +54,7 @@ class NonCollaborativeMixin:
self.assertTrue(can_annotate)
class TestExclusiveCategoryAnnotation(TestCategoryAnnotation, NonCollaborativeMixin):
class TestExclusiveCategoryLabeling(TestCategoryLabeling, NonCollaborativeMixin):
exclusive = True
collaborative = False
@ -63,7 +64,7 @@ class TestExclusiveCategoryAnnotation(TestCategoryAnnotation, NonCollaborativeMi
self.assertFalse(can_annotate)
class TestNonExclusiveCategoryAnnotation(TestCategoryAnnotation, NonCollaborativeMixin):
class TestNonExclusiveCategoryLabeling(TestCategoryLabeling, NonCollaborativeMixin):
exclusive = False
collaborative = False
@ -86,7 +87,7 @@ class CollaborativeMixin:
self.assertFalse(can_annotate)
class TestCollaborativeExclusiveCategoryAnnotation(TestCategoryAnnotation, CollaborativeMixin):
class TestCollaborativeExclusiveCategoryLabeling(TestCategoryLabeling, CollaborativeMixin):
exclusive = True
collaborative = True
@ -100,7 +101,7 @@ class TestCollaborativeExclusiveCategoryAnnotation(TestCategoryAnnotation, Colla
self.assertFalse(can_annotate)
class TestCollaborativeNonExclusiveCategoryAnnotation(TestCategoryAnnotation, CollaborativeMixin):
class TestCollaborativeNonExclusiveCategoryLabeling(TestCategoryLabeling, CollaborativeMixin):
exclusive = False
collaborative = True
@ -112,3 +113,11 @@ class TestCollaborativeNonExclusiveCategoryAnnotation(TestCategoryAnnotation, Co
)
can_annotate = Category.objects.can_annotate(self.category, self.project.item)
self.assertTrue(can_annotate)
class TestCategory(TestCase):
def test_uniqueness(self):
a = mommy.make('Category')
with self.assertRaises(IntegrityError):
Category(example=a.example, user=a.user, label=a.label).save()

246
backend/labels/tests/test_span.py

@ -0,0 +1,246 @@
import abc
from django.core.exceptions import ValidationError
from django.db import IntegrityError
from django.test import TestCase
from model_mommy import mommy
from api.models import SEQUENCE_LABELING, SpanType
from labels.models import Span
from api.tests.api.utils import prepare_project
class TestSpanLabeling(abc.ABC, TestCase):
overlapping = False
collaborative = False
@classmethod
def setUpTestData(cls):
cls.project = prepare_project(
SEQUENCE_LABELING,
allow_overlapping=cls.overlapping,
collaborative_annotation=cls.collaborative
)
cls.example = mommy.make('Example', project=cls.project.item)
cls.label_type = mommy.make('SpanType', project=cls.project.item)
users = cls.project.users
cls.user = users[0]
cls.another_user = users[1]
cls.span = Span(
example=cls.example,
label=cls.label_type,
user=cls.user,
start_offset=0,
end_offset=5
)
def test_can_annotate_span_to_unannotated_data(self):
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class NonCollaborativeMixin:
def test_allow_another_user_to_annotate_same_span(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class TestNonOverlappingSpanLabeling(TestSpanLabeling, NonCollaborativeMixin):
overlapping = False
collaborative = False
def test_cannot_annotate_same_span_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset,
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
def test_cannot_annotate_different_span_type_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
class TestOverlappingSpanLabeling(TestSpanLabeling, NonCollaborativeMixin):
overlapping = True
collaborative = False
def test_can_annotate_same_span_to_annotated_data(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset,
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class TestCollaborativeNonOverlappingSpanLabeling(TestSpanLabeling):
overlapping = False
collaborative = True
def test_deny_another_user_to_annotate_same_span_type(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
def test_deny_another_user_to_annotate_different_span_type(self):
mommy.make(
'Span',
example=self.example,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertFalse(can_annotate)
class TestCollaborativeOverlappingSpanLabeling(TestSpanLabeling):
overlapping = True
collaborative = True
def test_allow_another_user_to_annotate_same_span(self):
mommy.make(
'Span',
example=self.example,
label=self.label_type,
user=self.another_user,
start_offset=self.span.start_offset,
end_offset=self.span.end_offset
)
can_annotate = Span.objects.can_annotate(self.span, self.project.item)
self.assertTrue(can_annotate)
class TestSpan(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
self.user = self.project.users[0]
def test_start_offset_is_not_negative(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=-1, end_offset=0)
def test_end_offset_is_not_negative(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=-2, end_offset=-1)
def test_start_offset_is_less_than_end_offset(self):
with self.assertRaises(IntegrityError):
mommy.make('Span', start_offset=0, end_offset=0)
def test_unique_constraint(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user)
mommy.make('Span', example=self.example, start_offset=0, end_offset=5, user=self.user)
mommy.make('Span', example=self.example, start_offset=10, end_offset=15, user=self.user)
def test_unique_constraint_violated(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user)
spans = [(5, 10), (5, 11), (4, 10), (6, 9), (9, 15), (0, 6)]
for start_offset, end_offset in spans:
with self.assertRaises(ValidationError):
mommy.make(
'Span',
example=self.example,
start_offset=start_offset,
end_offset=end_offset,
user=self.user
)
def test_unique_constraint_if_overlapping_is_allowed(self):
project = prepare_project(SEQUENCE_LABELING, allow_overlapping=True)
example = mommy.make('Example', project=project.item)
user = project.users[0]
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=user)
spans = [(5, 10), (5, 11), (4, 10), (6, 9), (9, 15), (0, 6)]
for start_offset, end_offset in spans:
mommy.make('Span', example=example, start_offset=start_offset, end_offset=end_offset, user=user)
def test_update(self):
span = mommy.make('Span', example=self.example, start_offset=0, end_offset=5)
span.end_offset = 6
span.save()
class TestSpanWithoutCollaborativeMode(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, False, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
def test_allow_users_to_create_same_spans(self):
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.project.users[0])
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.project.users[1])
class TestSpanWithCollaborativeMode(TestCase):
def test_deny_users_to_create_same_spans(self):
project = prepare_project(SEQUENCE_LABELING, True, allow_overlapping=False)
example = mommy.make('Example', project=project.item)
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[0])
with self.assertRaises(ValidationError):
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[1])
def test_allow_users_to_create_same_spans_if_overlapping_is_allowed(self):
project = prepare_project(SEQUENCE_LABELING, True, allow_overlapping=True)
example = mommy.make('Example', project=project.item)
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[0])
mommy.make('Span', example=example, start_offset=5, end_offset=10, user=project.users[1])
class TestLabelDistribution(TestCase):
def setUp(self):
self.project = prepare_project(SEQUENCE_LABELING, allow_overlapping=False)
self.example = mommy.make('Example', project=self.project.item)
self.user = self.project.users[0]
def test_calc_label_distribution(self):
label_a = mommy.make('SpanType', text='labelA', project=self.project.item)
label_b = mommy.make('SpanType', text='labelB', project=self.project.item)
mommy.make('Span', example=self.example, start_offset=5, end_offset=10, user=self.user, label=label_a)
mommy.make('Span', example=self.example, start_offset=10, end_offset=15, user=self.user, label=label_b)
distribution = Span.objects.calc_label_distribution(
examples=self.project.item.examples.all(),
members=self.project.users,
labels=SpanType.objects.all()
)
expected = {user.username: {label.text: 0 for label in SpanType.objects.all()} for user in self.project.users}
expected[self.user.username][label_a.text] = 1
expected[self.user.username][label_b.text] = 1
self.assertEqual(distribution, expected)

backend/api/tests/test_text_label.py → backend/labels/tests/test_text_label.py

@ -1,14 +1,15 @@
import abc
from django.db import IntegrityError
from django.test import TestCase
from model_mommy import mommy
from api.models import SEQ2SEQ, TextLabel
from api.models import SEQ2SEQ
from labels.models import TextLabel
from api.tests.api.utils import prepare_project
from .api.utils import prepare_project
class TestTextLabelAnnotation(abc.ABC, TestCase):
class TestTextLabeling(abc.ABC, TestCase):
collaborative = False
@classmethod
@ -31,8 +32,15 @@ class TestTextLabelAnnotation(abc.ABC, TestCase):
can_annotate = TextLabel.objects.can_annotate(self.text_label, self.project.item)
self.assertTrue(can_annotate)
def test_uniqueness(self):
a = mommy.make('TextLabel')
with self.assertRaises(IntegrityError):
TextLabel(example=a.example,
user=a.user,
text=a.text).save()
class TestNonCollaborativeTextLabelAnnotation(TestTextLabelAnnotation):
class TestNonCollaborativeTextLabeling(TestTextLabeling):
collaborative = False
def test_cannot_annotate_same_text_to_annotated_data(self):
@ -61,7 +69,7 @@ class TestNonCollaborativeTextLabelAnnotation(TestTextLabelAnnotation):
self.assertTrue(can_annotate)
class TestCollaborativeTextLabelAnnotation(TestTextLabelAnnotation):
class TestCollaborativeTextLabeling(TestTextLabeling):
collaborative = True
def test_deny_another_user_to_annotate_same_text(self):

backend/api/tests/api/test_annotation.py → backend/labels/tests/test_views.py

@ -1,14 +1,13 @@
from rest_framework import status
from rest_framework.reverse import reverse
from api.models import (DOCUMENT_CLASSIFICATION, SEQ2SEQ, SEQUENCE_LABELING,
Category, Span, TextLabel)
from api.models import (DOCUMENT_CLASSIFICATION, SEQ2SEQ, SEQUENCE_LABELING)
from labels.models import Category, Span, TextLabel
from api.tests.api.utils import (CRUDMixin, make_annotation, make_doc, make_label,
make_user, prepare_project)
from .utils import (CRUDMixin, make_annotation, make_doc, make_label,
make_user, prepare_project)
class TestAnnotationList:
class TestLabelList:
model = Category
task = DOCUMENT_CLASSIFICATION
view_name = 'annotation_list'
@ -43,13 +42,13 @@ class TestAnnotationList:
self.assertEqual(count, 2) # delete only own annotation
class TestCategoryList(TestAnnotationList, CRUDMixin):
class TestCategoryList(TestLabelList, CRUDMixin):
model = Category
task = DOCUMENT_CLASSIFICATION
view_name = 'category_list'
class TestSpanList(TestAnnotationList, CRUDMixin):
class TestSpanList(TestLabelList, CRUDMixin):
model = Span
task = SEQUENCE_LABELING
view_name = 'span_list'
@ -59,13 +58,13 @@ class TestSpanList(TestAnnotationList, CRUDMixin):
make_annotation(cls.task, doc=doc, user=member, start_offset=0, end_offset=1)
class TestTextList(TestAnnotationList, CRUDMixin):
class TestTextList(TestLabelList, CRUDMixin):
model = TextLabel
task = SEQ2SEQ
view_name = 'text_list'
class TestSharedAnnotationList:
class TestSharedLabelList:
model = Category
task = DOCUMENT_CLASSIFICATION
view_name = 'annotation_list'
@ -93,13 +92,13 @@ class TestSharedAnnotationList:
self.assertEqual(count, 0) # delete all annotation in the doc
class TestSharedCategoryList(TestSharedAnnotationList, CRUDMixin):
class TestSharedCategoryList(TestSharedLabelList, CRUDMixin):
model = Category
task = DOCUMENT_CLASSIFICATION
view_name = 'category_list'
class TestSharedSpanList(TestSharedAnnotationList, CRUDMixin):
class TestSharedSpanList(TestSharedLabelList, CRUDMixin):
model = Span
task = SEQUENCE_LABELING
view_name = 'span_list'
@ -117,13 +116,13 @@ class TestSharedSpanList(TestSharedAnnotationList, CRUDMixin):
cls.start_offset += 1
class TestSharedTextList(TestSharedAnnotationList, CRUDMixin):
class TestSharedTextList(TestSharedLabelList, CRUDMixin):
model = TextLabel
task = SEQ2SEQ
view_name = 'text_list'
class TestAnnotationCreation:
class TestDataLabeling:
task = DOCUMENT_CLASSIFICATION
view_name = 'annotation_list'
@ -149,11 +148,11 @@ class TestAnnotationCreation:
self.assert_create(expected=status.HTTP_403_FORBIDDEN)
class TestCategoryCreation(TestAnnotationCreation, CRUDMixin):
class TestCategoryCreation(TestDataLabeling, CRUDMixin):
view_name = 'category_list'
class TestSpanCreation(TestAnnotationCreation, CRUDMixin):
class TestSpanCreation(TestDataLabeling, CRUDMixin):
task = SEQUENCE_LABELING
view_name = 'span_list'
@ -162,7 +161,7 @@ class TestSpanCreation(TestAnnotationCreation, CRUDMixin):
return {'label': label.id, 'start_offset': 0, 'end_offset': 1}
class TestTextLabelCreation(TestAnnotationCreation, CRUDMixin):
class TestTextLabelCreation(TestDataLabeling, CRUDMixin):
task = SEQ2SEQ
view_name = 'text_list'
@ -170,7 +169,7 @@ class TestTextLabelCreation(TestAnnotationCreation, CRUDMixin):
return {'text': 'example'}
class TestAnnotationDetail:
class TestLabelDetail:
task = SEQUENCE_LABELING
view_name = 'annotation_detail'
@ -226,7 +225,7 @@ class TestAnnotationDetail:
self.assert_delete(self.non_member, status.HTTP_403_FORBIDDEN)
class TestCategoryDetail(TestAnnotationDetail, CRUDMixin):
class TestCategoryDetail(TestLabelDetail, CRUDMixin):
task = DOCUMENT_CLASSIFICATION
view_name = 'category_detail'
@ -234,12 +233,12 @@ class TestCategoryDetail(TestAnnotationDetail, CRUDMixin):
return make_annotation(task=self.task, doc=doc, user=self.project.users[0])
class TestSpanDetail(TestAnnotationDetail, CRUDMixin):
class TestSpanDetail(TestLabelDetail, CRUDMixin):
task = SEQUENCE_LABELING
view_name = 'span_detail'
class TestTextDetail(TestAnnotationDetail, CRUDMixin):
class TestTextDetail(TestLabelDetail, CRUDMixin):
task = SEQ2SEQ
view_name = 'text_detail'
@ -251,7 +250,7 @@ class TestTextDetail(TestAnnotationDetail, CRUDMixin):
return make_annotation(task=self.task, doc=doc, user=self.project.users[0])
class TestSharedAnnotationDetail:
class TestSharedLabelDetail:
task = DOCUMENT_CLASSIFICATION
view_name = 'annotation_detail'
@ -278,11 +277,11 @@ class TestSharedAnnotationDetail:
self.assert_delete(self.project.users[1], status.HTTP_204_NO_CONTENT)
class TestSharedCategoryDetail(TestSharedAnnotationDetail, CRUDMixin):
class TestSharedCategoryDetail(TestSharedLabelDetail, CRUDMixin):
view_name = 'category_detail'
class TestSharedSpanDetail(TestSharedAnnotationDetail, CRUDMixin):
class TestSharedSpanDetail(TestSharedLabelDetail, CRUDMixin):
task = SEQUENCE_LABELING
view_name = 'span_detail'
@ -290,7 +289,7 @@ class TestSharedSpanDetail(TestSharedAnnotationDetail, CRUDMixin):
return make_annotation(self.task, doc=doc, user=member, start_offset=0, end_offset=1)
class TestSharedTextDetail(TestSharedAnnotationDetail, CRUDMixin):
class TestSharedTextDetail(TestSharedLabelDetail, CRUDMixin):
task = SEQ2SEQ
view_name = 'text_detail'

50
backend/labels/urls.py

@ -0,0 +1,50 @@
from django.urls import path
from .views import CategoryListAPI, CategoryDetailAPI
from .views import SpanListAPI, SpanDetailAPI
from .views import TextLabelListAPI, TextLabelDetailAPI
from .views import RelationList, RelationDetail
urlpatterns = [
path(
route='annotation_relations',
view=RelationList.as_view(),
name='relation_list'
),
path(
route='annotation_relations/<int:annotation_id>',
view=RelationDetail.as_view(),
name='relation_detail'
),
path(
route='examples/<int:example_id>/categories',
view=CategoryListAPI.as_view(),
name='category_list'
),
path(
route='examples/<int:example_id>/categories/<int:annotation_id>',
view=CategoryDetailAPI.as_view(),
name='category_detail'
),
path(
route='examples/<int:example_id>/spans',
view=SpanListAPI.as_view(),
name='span_list'
),
path(
route='examples/<int:example_id>/spans/<int:annotation_id>',
view=SpanDetailAPI.as_view(),
name='span_detail'
),
path(
route='examples/<int:example_id>/texts',
view=TextLabelListAPI.as_view(),
name='text_list'
),
path(
route='examples/<int:example_id>/texts/<int:annotation_id>',
view=TextLabelDetailAPI.as_view(),
name='text_detail'
),
]

125
backend/labels/views.py

@ -0,0 +1,125 @@
from functools import partial
from django.core.exceptions import ValidationError
from django.shortcuts import get_object_or_404
from rest_framework import generics, status
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from api.models import Project
from labels.models import Category, Span, TextLabel, Relation
from members.permissions import IsInProjectOrAdmin, IsInProjectReadOnlyOrAdmin
from .permissions import CanEditLabel
from .serializers import CategorySerializer, SpanSerializer, TextLabelSerializer, RelationSerializer
class BaseListAPI(generics.ListCreateAPIView):
label_class = None
pagination_class = None
permission_classes = [IsAuthenticated & IsInProjectOrAdmin]
swagger_schema = None
@property
def project(self):
return get_object_or_404(Project, pk=self.kwargs['project_id'])
def get_queryset(self):
queryset = self.label_class.objects.filter(example=self.kwargs['example_id'])
if not self.project.collaborative_annotation:
queryset = queryset.filter(user=self.request.user)
return queryset
def create(self, request, *args, **kwargs):
request.data['example'] = self.kwargs['example_id']
try:
response = super().create(request, args, kwargs)
except ValidationError as err:
response = Response({'detail': err.messages}, status=status.HTTP_400_BAD_REQUEST)
return response
def perform_create(self, serializer):
serializer.save(example_id=self.kwargs['example_id'], user=self.request.user)
def delete(self, request, *args, **kwargs):
queryset = self.get_queryset()
queryset.all().delete()
return Response(status=status.HTTP_204_NO_CONTENT)
class BaseDetailAPI(generics.RetrieveUpdateDestroyAPIView):
lookup_url_kwarg = 'annotation_id'
swagger_schema = None
@property
def project(self):
return get_object_or_404(Project, pk=self.kwargs['project_id'])
def get_permissions(self):
if self.project.collaborative_annotation:
self.permission_classes = [IsAuthenticated & IsInProjectOrAdmin]
else:
self.permission_classes = [
IsAuthenticated & IsInProjectOrAdmin & partial(CanEditLabel, self.queryset)
]
return super().get_permissions()
class CategoryListAPI(BaseListAPI):
label_class = Category
serializer_class = CategorySerializer
def create(self, request, *args, **kwargs):
if self.project.single_class_classification:
self.get_queryset().delete()
return super().create(request, args, kwargs)
class CategoryDetailAPI(BaseDetailAPI):
queryset = Category.objects.all()
serializer_class = CategorySerializer
class SpanListAPI(BaseListAPI):
label_class = Span
serializer_class = SpanSerializer
class SpanDetailAPI(BaseDetailAPI):
queryset = Span.objects.all()
serializer_class = SpanSerializer
class TextLabelListAPI(BaseListAPI):
label_class = TextLabel
serializer_class = TextLabelSerializer
class TextLabelDetailAPI(BaseDetailAPI):
queryset = TextLabel.objects.all()
serializer_class = TextLabelSerializer
class RelationList(generics.ListCreateAPIView):
serializer_class = RelationSerializer
pagination_class = None
permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin]
def get_queryset(self):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
return project.annotation_relations
def perform_create(self, serializer):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
serializer.save(project=project)
def delete(self, request, *args, **kwargs):
delete_ids = request.data['ids']
Relation.objects.filter(pk__in=delete_ids).delete()
return Response(status=status.HTTP_204_NO_CONTENT)
class RelationDetail(generics.RetrieveUpdateDestroyAPIView):
queryset = Relation.objects.all()
serializer_class = RelationSerializer
lookup_url_kwarg = 'annotation_id'
permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin]

7
backend/metrics/views.py

@ -5,7 +5,8 @@ from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from api.models import Example, ExampleState, Annotation, Label, Category, CategoryType, Span, SpanType
from api.models import Example, ExampleState, CategoryType, SpanType, Label as LabelType
from labels.models import Label, Category, Span
from members.models import Member
from members.permissions import IsInProjectReadOnlyOrAdmin
@ -32,8 +33,8 @@ class MemberProgressAPI(APIView):
class LabelDistribution(abc.ABC, APIView):
permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin]
model = Annotation
label_type = Label
model = Label
label_type = LabelType
def get(self, request, *args, **kwargs):
labels = self.label_type.objects.filter(project=self.kwargs['project_id'])

Loading…
Cancel
Save