You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
8.4 KiB

5 years ago
3 years ago
6 years ago
6 years ago
5 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
3 years ago
6 years ago
3 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. import string
  2. from auto_labeling_pipeline.models import RequestModelFactory
  3. from django.contrib.auth.models import User
  4. from django.core.exceptions import ValidationError
  5. from django.db import models
  6. from polymorphic.models import PolymorphicModel
  7. from .managers import (AnnotationManager, RoleMappingManager,
  8. Seq2seqAnnotationManager)
  9. DOCUMENT_CLASSIFICATION = 'DocumentClassification'
  10. SEQUENCE_LABELING = 'SequenceLabeling'
  11. SEQ2SEQ = 'Seq2seq'
  12. SPEECH2TEXT = 'Speech2text'
  13. PROJECT_CHOICES = (
  14. (DOCUMENT_CLASSIFICATION, 'document classification'),
  15. (SEQUENCE_LABELING, 'sequence labeling'),
  16. (SEQ2SEQ, 'sequence to sequence'),
  17. (SPEECH2TEXT, 'speech to text'),
  18. )
  19. class Project(PolymorphicModel):
  20. name = models.CharField(max_length=100)
  21. description = models.TextField(default='')
  22. guideline = models.TextField(default='', blank=True)
  23. created_at = models.DateTimeField(auto_now_add=True)
  24. updated_at = models.DateTimeField(auto_now=True)
  25. users = models.ManyToManyField(User, related_name='projects')
  26. project_type = models.CharField(max_length=30, choices=PROJECT_CHOICES)
  27. randomize_document_order = models.BooleanField(default=False)
  28. collaborative_annotation = models.BooleanField(default=False)
  29. single_class_classification = models.BooleanField(default=False)
  30. def get_annotation_class(self):
  31. raise NotImplementedError()
  32. def __str__(self):
  33. return self.name
  34. class TextClassificationProject(Project):
  35. def get_annotation_class(self):
  36. return DocumentAnnotation
  37. class SequenceLabelingProject(Project):
  38. def get_annotation_class(self):
  39. return SequenceAnnotation
  40. class Seq2seqProject(Project):
  41. def get_annotation_class(self):
  42. return Seq2seqAnnotation
  43. class Speech2textProject(Project):
  44. def get_annotation_class(self):
  45. return Speech2textAnnotation
  46. class Label(models.Model):
  47. PREFIX_KEYS = (
  48. ('ctrl', 'ctrl'),
  49. ('shift', 'shift'),
  50. ('ctrl shift', 'ctrl shift')
  51. )
  52. SUFFIX_KEYS = tuple(
  53. (c, c) for c in string.digits + string.ascii_lowercase
  54. )
  55. text = models.CharField(max_length=100)
  56. prefix_key = models.CharField(max_length=10, blank=True, null=True, choices=PREFIX_KEYS)
  57. suffix_key = models.CharField(max_length=1, blank=True, null=True, choices=SUFFIX_KEYS)
  58. project = models.ForeignKey(Project, related_name='labels', on_delete=models.CASCADE)
  59. background_color = models.CharField(max_length=7, default='#209cee')
  60. text_color = models.CharField(max_length=7, default='#ffffff')
  61. created_at = models.DateTimeField(auto_now_add=True)
  62. updated_at = models.DateTimeField(auto_now=True)
  63. def __str__(self):
  64. return self.text
  65. def clean(self):
  66. # Don't allow shortcut key not to have a suffix key.
  67. if self.prefix_key and not self.suffix_key:
  68. raise ValidationError('Shortcut key may not have a suffix key.')
  69. # each shortcut (prefix key + suffix key) can only be assigned to one label
  70. if self.suffix_key or self.prefix_key:
  71. other_labels = self.project.labels.exclude(id=self.id)
  72. if other_labels.filter(suffix_key=self.suffix_key, prefix_key=self.prefix_key).exists():
  73. raise ValidationError('A label with this shortcut already exists in the project')
  74. super().clean()
  75. class Meta:
  76. unique_together = (
  77. ('project', 'text'),
  78. )
  79. class Document(models.Model):
  80. text = models.TextField()
  81. project = models.ForeignKey(Project, related_name='documents', on_delete=models.CASCADE)
  82. meta = models.JSONField(default=dict)
  83. filename = models.FilePathField(default='')
  84. created_at = models.DateTimeField(auto_now_add=True)
  85. updated_at = models.DateTimeField(auto_now=True)
  86. annotations_approved_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
  87. def __str__(self):
  88. return self.text[:50]
  89. @property
  90. def comment_count(self):
  91. return Comment.objects.filter(document=self.id).count()
  92. class Comment(models.Model):
  93. text = models.TextField()
  94. document = models.ForeignKey(Document, related_name='comments', on_delete=models.CASCADE)
  95. user = models.ForeignKey(User, on_delete=models.CASCADE, null=True)
  96. created_at = models.DateTimeField(auto_now_add=True)
  97. updated_at = models.DateTimeField(auto_now=True)
  98. @property
  99. def username(self):
  100. return self.user.username
  101. @property
  102. def document_text(self):
  103. return self.document.text
  104. class Meta:
  105. ordering = ('-created_at', )
  106. class Tag(models.Model):
  107. text = models.TextField()
  108. project = models.ForeignKey(Project, related_name='tags', on_delete=models.CASCADE)
  109. def __str__(self):
  110. return self.text
  111. class Annotation(models.Model):
  112. objects = AnnotationManager()
  113. prob = models.FloatField(default=0.0)
  114. manual = models.BooleanField(default=False)
  115. user = models.ForeignKey(User, on_delete=models.CASCADE)
  116. created_at = models.DateTimeField(auto_now_add=True)
  117. updated_at = models.DateTimeField(auto_now=True)
  118. class Meta:
  119. abstract = True
  120. class DocumentAnnotation(Annotation):
  121. document = models.ForeignKey(Document, related_name='doc_annotations', on_delete=models.CASCADE)
  122. label = models.ForeignKey(Label, on_delete=models.CASCADE)
  123. class Meta:
  124. unique_together = ('document', 'user', 'label')
  125. class SequenceAnnotation(Annotation):
  126. document = models.ForeignKey(Document, related_name='seq_annotations', on_delete=models.CASCADE)
  127. label = models.ForeignKey(Label, on_delete=models.CASCADE)
  128. start_offset = models.IntegerField()
  129. end_offset = models.IntegerField()
  130. def clean(self):
  131. if self.start_offset >= self.end_offset:
  132. raise ValidationError('start_offset is after end_offset')
  133. class Meta:
  134. unique_together = ('document', 'user', 'label', 'start_offset', 'end_offset')
  135. class Seq2seqAnnotation(Annotation):
  136. # Override AnnotationManager for custom functionality
  137. objects = Seq2seqAnnotationManager()
  138. document = models.ForeignKey(Document, related_name='seq2seq_annotations', on_delete=models.CASCADE)
  139. text = models.CharField(max_length=500)
  140. class Meta:
  141. unique_together = ('document', 'user', 'text')
  142. class Speech2textAnnotation(Annotation):
  143. document = models.ForeignKey(Document, related_name='speech2text_annotations', on_delete=models.CASCADE)
  144. text = models.TextField()
  145. class Meta:
  146. unique_together = ('document', 'user')
  147. class Role(models.Model):
  148. name = models.CharField(max_length=100, unique=True)
  149. description = models.TextField(default='')
  150. created_at = models.DateTimeField(auto_now_add=True)
  151. updated_at = models.DateTimeField(auto_now=True)
  152. def __str__(self):
  153. return self.name
  154. class RoleMapping(models.Model):
  155. user = models.ForeignKey(User, related_name='role_mappings', on_delete=models.CASCADE)
  156. project = models.ForeignKey(Project, related_name='role_mappings', on_delete=models.CASCADE)
  157. role = models.ForeignKey(Role, on_delete=models.CASCADE)
  158. created_at = models.DateTimeField(auto_now_add=True)
  159. updated_at = models.DateTimeField(auto_now=True)
  160. objects = RoleMappingManager()
  161. def clean(self):
  162. other_rolemappings = self.project.role_mappings.exclude(id=self.id)
  163. if other_rolemappings.filter(user=self.user, project=self.project).exists():
  164. raise ValidationError('This user is already assigned to a role in this project.')
  165. class Meta:
  166. unique_together = ("user", "project")
  167. class AutoLabelingConfig(models.Model):
  168. model_name = models.CharField(max_length=100)
  169. model_attrs = models.JSONField(default=dict)
  170. template = models.TextField(default='')
  171. label_mapping = models.JSONField(default=dict)
  172. project = models.ForeignKey(Project, related_name='auto_labeling_config', on_delete=models.CASCADE)
  173. default = models.BooleanField(default=False)
  174. created_at = models.DateTimeField(auto_now_add=True)
  175. updated_at = models.DateTimeField(auto_now=True)
  176. def __str__(self):
  177. return self.model_name
  178. def clean_fields(self, exclude=None):
  179. super().clean_fields(exclude=exclude)
  180. try:
  181. RequestModelFactory.find(self.model_name)
  182. except NameError:
  183. raise ValidationError(f'The specified model name {self.model_name} does not exist.')
  184. except Exception:
  185. raise ValidationError('The attributes does not match the model.')