You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

438 lines
12 KiB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
4 years ago
4 years ago
6 years ago
4 years ago
4 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. import abc
  2. import random
  3. import string
  4. import uuid
  5. from django.contrib.auth.models import User
  6. from django.core.exceptions import ValidationError
  7. from django.db import models
  8. from polymorphic.models import PolymorphicModel
  9. from .managers import AnnotationManager, ExampleManager, ExampleStateManager
  10. DOCUMENT_CLASSIFICATION = 'DocumentClassification'
  11. SEQUENCE_LABELING = 'SequenceLabeling'
  12. SEQ2SEQ = 'Seq2seq'
  13. SPEECH2TEXT = 'Speech2text'
  14. IMAGE_CLASSIFICATION = 'ImageClassification'
  15. INTENT_DETECTION_AND_SLOT_FILLING = 'IntentDetectionAndSlotFilling'
  16. PROJECT_CHOICES = (
  17. (DOCUMENT_CLASSIFICATION, 'document classification'),
  18. (SEQUENCE_LABELING, 'sequence labeling'),
  19. (SEQ2SEQ, 'sequence to sequence'),
  20. (INTENT_DETECTION_AND_SLOT_FILLING, 'intent detection and slot filling'),
  21. (SPEECH2TEXT, 'speech to text'),
  22. (IMAGE_CLASSIFICATION, 'image classification')
  23. )
  24. class Project(PolymorphicModel):
  25. name = models.CharField(max_length=100)
  26. description = models.TextField(default='')
  27. guideline = models.TextField(default='', blank=True)
  28. created_at = models.DateTimeField(auto_now_add=True)
  29. updated_at = models.DateTimeField(auto_now=True)
  30. created_by = models.ForeignKey(
  31. User,
  32. on_delete=models.SET_NULL,
  33. null=True,
  34. )
  35. project_type = models.CharField(max_length=30, choices=PROJECT_CHOICES)
  36. random_order = models.BooleanField(default=False)
  37. collaborative_annotation = models.BooleanField(default=False)
  38. single_class_classification = models.BooleanField(default=False)
  39. @property
  40. @abc.abstractmethod
  41. def is_text_project(self) -> bool:
  42. return False
  43. @property
  44. def can_define_label(self) -> bool:
  45. """Whether or not the project can define label(ignoring the type of label)"""
  46. return False
  47. @property
  48. def can_define_relation(self) -> bool:
  49. """Whether or not the project can define relation."""
  50. return False
  51. @property
  52. def can_define_category(self) -> bool:
  53. """Whether or not the project can define category."""
  54. return False
  55. @property
  56. def can_define_span(self) -> bool:
  57. """Whether or not the project can define span."""
  58. return False
  59. def __str__(self):
  60. return self.name
  61. class TextClassificationProject(Project):
  62. @property
  63. def is_text_project(self) -> bool:
  64. return True
  65. @property
  66. def can_define_label(self) -> bool:
  67. return True
  68. @property
  69. def can_define_category(self) -> bool:
  70. return True
  71. class SequenceLabelingProject(Project):
  72. allow_overlapping = models.BooleanField(default=False)
  73. grapheme_mode = models.BooleanField(default=False)
  74. @property
  75. def is_text_project(self) -> bool:
  76. return True
  77. @property
  78. def can_define_label(self) -> bool:
  79. return True
  80. @property
  81. def can_define_span(self) -> bool:
  82. return True
  83. class Seq2seqProject(Project):
  84. @property
  85. def is_text_project(self) -> bool:
  86. return True
  87. class IntentDetectionAndSlotFillingProject(Project):
  88. @property
  89. def is_text_project(self) -> bool:
  90. return True
  91. @property
  92. def can_define_label(self) -> bool:
  93. return True
  94. @property
  95. def can_define_category(self) -> bool:
  96. return True
  97. @property
  98. def can_define_span(self) -> bool:
  99. return True
  100. class Speech2textProject(Project):
  101. @property
  102. def is_text_project(self) -> bool:
  103. return False
  104. class ImageClassificationProject(Project):
  105. @property
  106. def is_text_project(self) -> bool:
  107. return False
  108. @property
  109. def can_define_label(self) -> bool:
  110. return True
  111. @property
  112. def can_define_category(self) -> bool:
  113. return True
  114. def generate_random_hex_color():
  115. return f'#{random.randint(0, 0xFFFFFF):06x}'
  116. class Label(models.Model):
  117. text = models.CharField(max_length=100, db_index=True)
  118. prefix_key = models.CharField(
  119. max_length=10,
  120. blank=True,
  121. null=True,
  122. choices=(
  123. ('ctrl', 'ctrl'),
  124. ('shift', 'shift'),
  125. ('ctrl shift', 'ctrl shift')
  126. )
  127. )
  128. suffix_key = models.CharField(
  129. max_length=1,
  130. blank=True,
  131. null=True,
  132. choices=tuple(
  133. (c, c) for c in string.digits + string.ascii_lowercase
  134. )
  135. )
  136. project = models.ForeignKey(
  137. to=Project,
  138. on_delete=models.CASCADE,
  139. # related_name='labels'
  140. )
  141. background_color = models.CharField(max_length=7, default=generate_random_hex_color)
  142. text_color = models.CharField(max_length=7, default='#ffffff')
  143. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  144. updated_at = models.DateTimeField(auto_now=True)
  145. def __str__(self):
  146. return self.text
  147. @property
  148. def labels(self):
  149. raise NotImplementedError()
  150. def clean(self):
  151. # Don't allow shortcut key not to have a suffix key.
  152. if self.prefix_key and not self.suffix_key:
  153. message = 'Shortcut key may not have a suffix key.'
  154. raise ValidationError(message)
  155. # each shortcut (prefix key + suffix key) can only be assigned to one label
  156. if self.suffix_key or self.prefix_key:
  157. other_labels = self.labels.exclude(id=self.id)
  158. if other_labels.filter(suffix_key=self.suffix_key, prefix_key=self.prefix_key).exists():
  159. message = 'A label with the shortcut already exists in the project.'
  160. raise ValidationError(message)
  161. super().clean()
  162. class Meta:
  163. abstract = True
  164. constraints = [
  165. models.UniqueConstraint(
  166. fields=['project', 'text'],
  167. name='%(app_label)s_%(class)s_is_unique'
  168. )
  169. ]
  170. ordering = ['created_at']
  171. class CategoryType(Label):
  172. @property
  173. def labels(self):
  174. return CategoryType.objects.filter(project=self.project)
  175. class SpanType(Label):
  176. @property
  177. def labels(self):
  178. return SpanType.objects.filter(project=self.project)
  179. class Example(models.Model):
  180. objects = ExampleManager()
  181. uuid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True, unique=True)
  182. meta = models.JSONField(default=dict)
  183. filename = models.FileField(default='.', max_length=1024)
  184. project = models.ForeignKey(
  185. to=Project,
  186. on_delete=models.CASCADE,
  187. related_name='examples'
  188. )
  189. annotations_approved_by = models.ForeignKey(
  190. to=User,
  191. on_delete=models.SET_NULL,
  192. null=True,
  193. blank=True
  194. )
  195. text = models.TextField(null=True, blank=True)
  196. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  197. updated_at = models.DateTimeField(auto_now=True)
  198. @property
  199. def comment_count(self):
  200. return Comment.objects.filter(example=self.id).count()
  201. def is_labeled(self, is_collaborative, user):
  202. if is_collaborative:
  203. for model in Annotation.__subclasses__():
  204. if model.objects.filter(example=self.id).exists():
  205. return True
  206. else:
  207. for model in Annotation.__subclasses__():
  208. if model.objects.filter(example=self.id, user=user).exists():
  209. return True
  210. return False
  211. class Meta:
  212. ordering = ['created_at']
  213. class ExampleState(models.Model):
  214. objects = ExampleStateManager()
  215. example = models.ForeignKey(
  216. to=Example,
  217. on_delete=models.CASCADE,
  218. related_name='states'
  219. )
  220. confirmed_by = models.ForeignKey(
  221. to=User,
  222. on_delete=models.CASCADE
  223. )
  224. confirmed_at = models.DateTimeField(auto_now=True)
  225. class Meta:
  226. unique_together = (('example', 'confirmed_by'),)
  227. class Comment(models.Model):
  228. text = models.TextField()
  229. example = models.ForeignKey(
  230. to=Example,
  231. on_delete=models.CASCADE,
  232. related_name='comments'
  233. )
  234. user = models.ForeignKey(
  235. to=User,
  236. on_delete=models.CASCADE,
  237. null=True
  238. )
  239. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  240. updated_at = models.DateTimeField(auto_now=True)
  241. @property
  242. def username(self):
  243. return self.user.username
  244. class Meta:
  245. ordering = ['created_at']
  246. class Tag(models.Model):
  247. text = models.TextField()
  248. project = models.ForeignKey(
  249. to=Project,
  250. on_delete=models.CASCADE,
  251. related_name='tags'
  252. )
  253. def __str__(self):
  254. return self.text
  255. class Annotation(models.Model):
  256. objects = AnnotationManager()
  257. prob = models.FloatField(default=0.0)
  258. manual = models.BooleanField(default=False)
  259. user = models.ForeignKey(User, on_delete=models.CASCADE)
  260. created_at = models.DateTimeField(auto_now_add=True)
  261. updated_at = models.DateTimeField(auto_now=True)
  262. class Meta:
  263. abstract = True
  264. class Category(Annotation):
  265. example = models.ForeignKey(
  266. to=Example,
  267. on_delete=models.CASCADE,
  268. related_name='categories'
  269. )
  270. label = models.ForeignKey(to=CategoryType, on_delete=models.CASCADE)
  271. class Meta:
  272. unique_together = (
  273. 'example',
  274. 'user',
  275. 'label'
  276. )
  277. class Span(Annotation):
  278. example = models.ForeignKey(
  279. to=Example,
  280. on_delete=models.CASCADE,
  281. related_name='spans'
  282. )
  283. label = models.ForeignKey(to=SpanType, on_delete=models.CASCADE)
  284. start_offset = models.IntegerField()
  285. end_offset = models.IntegerField()
  286. def validate_unique(self, exclude=None):
  287. allow_overlapping = getattr(self.example.project, 'allow_overlapping', False)
  288. is_collaborative = self.example.project.collaborative_annotation
  289. if allow_overlapping:
  290. super().validate_unique(exclude=exclude)
  291. return
  292. overlapping_span = Span.objects.exclude(id=self.id).filter(example=self.example).filter(
  293. models.Q(start_offset__gte=self.start_offset, start_offset__lt=self.end_offset) |
  294. models.Q(end_offset__gt=self.start_offset, end_offset__lte=self.end_offset) |
  295. models.Q(start_offset__lte=self.start_offset, end_offset__gte=self.end_offset)
  296. )
  297. if is_collaborative:
  298. if overlapping_span.exists():
  299. raise ValidationError('This overlapping is not allowed in this project.')
  300. else:
  301. if overlapping_span.filter(user=self.user).exists():
  302. raise ValidationError('This overlapping is not allowed in this project.')
  303. def save(self, force_insert=False, force_update=False, using=None,
  304. update_fields=None):
  305. self.full_clean()
  306. super().save(force_insert, force_update, using, update_fields)
  307. class Meta:
  308. constraints = [
  309. models.CheckConstraint(check=models.Q(start_offset__gte=0), name='startOffset >= 0'),
  310. models.CheckConstraint(check=models.Q(end_offset__gte=0), name='endOffset >= 0'),
  311. models.CheckConstraint(check=models.Q(start_offset__lt=models.F('end_offset')), name='start < end')
  312. ]
  313. class TextLabel(Annotation):
  314. example = models.ForeignKey(
  315. to=Example,
  316. on_delete=models.CASCADE,
  317. related_name='texts'
  318. )
  319. text = models.TextField()
  320. class Meta:
  321. unique_together = (
  322. 'example',
  323. 'user',
  324. 'text'
  325. )
  326. class RelationTypes(models.Model):
  327. color = models.TextField()
  328. name = models.TextField()
  329. project = models.ForeignKey(Project, related_name='relation_types', on_delete=models.CASCADE)
  330. def __str__(self):
  331. return self.name
  332. class Meta:
  333. unique_together = ('color', 'name')
  334. class AnnotationRelations(models.Model):
  335. annotation_id_1 = models.IntegerField()
  336. annotation_id_2 = models.IntegerField()
  337. type = models.ForeignKey(RelationTypes, related_name='annotation_relations', on_delete=models.CASCADE)
  338. timestamp = models.DateTimeField()
  339. user = models.ForeignKey(User, related_name='annotation_relations', on_delete=models.CASCADE)
  340. project = models.ForeignKey(Project, related_name='annotation_relations', on_delete=models.CASCADE)
  341. def __str__(self):
  342. return self.__dict__.__str__()
  343. class Meta:
  344. unique_together = ('annotation_id_1', 'annotation_id_2', 'type', 'project')