You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

442 lines
12 KiB

5 years ago
2 years ago
6 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
3 years ago
3 years ago
6 years ago
3 years ago
3 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
2 years ago
6 years ago
6 years ago
2 years ago
6 years ago
6 years ago
  1. import abc
  2. import random
  3. import string
  4. import uuid
  5. from django.contrib.auth.models import User
  6. from django.core.exceptions import ValidationError
  7. from django.db import models
  8. from polymorphic.models import PolymorphicModel
  9. from .managers import (AnnotationManager, CategoryManager, ExampleManager,
  10. ExampleStateManager, SpanManager)
  11. DOCUMENT_CLASSIFICATION = 'DocumentClassification'
  12. SEQUENCE_LABELING = 'SequenceLabeling'
  13. SEQ2SEQ = 'Seq2seq'
  14. SPEECH2TEXT = 'Speech2text'
  15. IMAGE_CLASSIFICATION = 'ImageClassification'
  16. INTENT_DETECTION_AND_SLOT_FILLING = 'IntentDetectionAndSlotFilling'
  17. PROJECT_CHOICES = (
  18. (DOCUMENT_CLASSIFICATION, 'document classification'),
  19. (SEQUENCE_LABELING, 'sequence labeling'),
  20. (SEQ2SEQ, 'sequence to sequence'),
  21. (INTENT_DETECTION_AND_SLOT_FILLING, 'intent detection and slot filling'),
  22. (SPEECH2TEXT, 'speech to text'),
  23. (IMAGE_CLASSIFICATION, 'image classification')
  24. )
  25. class Project(PolymorphicModel):
  26. name = models.CharField(max_length=100)
  27. description = models.TextField(default='')
  28. guideline = models.TextField(default='', blank=True)
  29. created_at = models.DateTimeField(auto_now_add=True)
  30. updated_at = models.DateTimeField(auto_now=True)
  31. created_by = models.ForeignKey(
  32. User,
  33. on_delete=models.SET_NULL,
  34. null=True,
  35. )
  36. project_type = models.CharField(max_length=30, choices=PROJECT_CHOICES)
  37. random_order = models.BooleanField(default=False)
  38. collaborative_annotation = models.BooleanField(default=False)
  39. single_class_classification = models.BooleanField(default=False)
  40. @property
  41. @abc.abstractmethod
  42. def is_text_project(self) -> bool:
  43. return False
  44. @property
  45. def can_define_label(self) -> bool:
  46. """Whether or not the project can define label(ignoring the type of label)"""
  47. return False
  48. @property
  49. def can_define_relation(self) -> bool:
  50. """Whether or not the project can define relation."""
  51. return False
  52. @property
  53. def can_define_category(self) -> bool:
  54. """Whether or not the project can define category."""
  55. return False
  56. @property
  57. def can_define_span(self) -> bool:
  58. """Whether or not the project can define span."""
  59. return False
  60. def __str__(self):
  61. return self.name
  62. class TextClassificationProject(Project):
  63. @property
  64. def is_text_project(self) -> bool:
  65. return True
  66. @property
  67. def can_define_label(self) -> bool:
  68. return True
  69. @property
  70. def can_define_category(self) -> bool:
  71. return True
  72. class SequenceLabelingProject(Project):
  73. allow_overlapping = models.BooleanField(default=False)
  74. grapheme_mode = models.BooleanField(default=False)
  75. @property
  76. def is_text_project(self) -> bool:
  77. return True
  78. @property
  79. def can_define_label(self) -> bool:
  80. return True
  81. @property
  82. def can_define_span(self) -> bool:
  83. return True
  84. class Seq2seqProject(Project):
  85. @property
  86. def is_text_project(self) -> bool:
  87. return True
  88. class IntentDetectionAndSlotFillingProject(Project):
  89. @property
  90. def is_text_project(self) -> bool:
  91. return True
  92. @property
  93. def can_define_label(self) -> bool:
  94. return True
  95. @property
  96. def can_define_category(self) -> bool:
  97. return True
  98. @property
  99. def can_define_span(self) -> bool:
  100. return True
  101. class Speech2textProject(Project):
  102. @property
  103. def is_text_project(self) -> bool:
  104. return False
  105. class ImageClassificationProject(Project):
  106. @property
  107. def is_text_project(self) -> bool:
  108. return False
  109. @property
  110. def can_define_label(self) -> bool:
  111. return True
  112. @property
  113. def can_define_category(self) -> bool:
  114. return True
  115. def generate_random_hex_color():
  116. return f'#{random.randint(0, 0xFFFFFF):06x}'
  117. class Label(models.Model):
  118. text = models.CharField(max_length=100, db_index=True)
  119. prefix_key = models.CharField(
  120. max_length=10,
  121. blank=True,
  122. null=True,
  123. choices=(
  124. ('ctrl', 'ctrl'),
  125. ('shift', 'shift'),
  126. ('ctrl shift', 'ctrl shift')
  127. )
  128. )
  129. suffix_key = models.CharField(
  130. max_length=1,
  131. blank=True,
  132. null=True,
  133. choices=tuple(
  134. (c, c) for c in string.digits + string.ascii_lowercase
  135. )
  136. )
  137. project = models.ForeignKey(
  138. to=Project,
  139. on_delete=models.CASCADE,
  140. # related_name='labels'
  141. )
  142. background_color = models.CharField(max_length=7, default=generate_random_hex_color)
  143. text_color = models.CharField(max_length=7, default='#ffffff')
  144. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  145. updated_at = models.DateTimeField(auto_now=True)
  146. def __str__(self):
  147. return self.text
  148. @property
  149. def labels(self):
  150. raise NotImplementedError()
  151. def clean(self):
  152. # Don't allow shortcut key not to have a suffix key.
  153. if self.prefix_key and not self.suffix_key:
  154. message = 'Shortcut key may not have a suffix key.'
  155. raise ValidationError(message)
  156. # each shortcut (prefix key + suffix key) can only be assigned to one label
  157. if self.suffix_key or self.prefix_key:
  158. other_labels = self.labels.exclude(id=self.id)
  159. if other_labels.filter(suffix_key=self.suffix_key, prefix_key=self.prefix_key).exists():
  160. message = 'A label with the shortcut already exists in the project.'
  161. raise ValidationError(message)
  162. super().clean()
  163. class Meta:
  164. abstract = True
  165. constraints = [
  166. models.UniqueConstraint(
  167. fields=['project', 'text'],
  168. name='%(app_label)s_%(class)s_is_unique'
  169. )
  170. ]
  171. ordering = ['created_at']
  172. class CategoryType(Label):
  173. @property
  174. def labels(self):
  175. return CategoryType.objects.filter(project=self.project)
  176. class SpanType(Label):
  177. @property
  178. def labels(self):
  179. return SpanType.objects.filter(project=self.project)
  180. class Example(models.Model):
  181. objects = ExampleManager()
  182. uuid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True, unique=True)
  183. meta = models.JSONField(default=dict)
  184. filename = models.FileField(default='.', max_length=1024)
  185. project = models.ForeignKey(
  186. to=Project,
  187. on_delete=models.CASCADE,
  188. related_name='examples'
  189. )
  190. annotations_approved_by = models.ForeignKey(
  191. to=User,
  192. on_delete=models.SET_NULL,
  193. null=True,
  194. blank=True
  195. )
  196. text = models.TextField(null=True, blank=True)
  197. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  198. updated_at = models.DateTimeField(auto_now=True)
  199. @property
  200. def comment_count(self):
  201. return Comment.objects.filter(example=self.id).count()
  202. @property
  203. def data(self):
  204. if self.project.is_text_project:
  205. return self.text
  206. else:
  207. return str(self.filename)
  208. class Meta:
  209. ordering = ['created_at']
  210. class ExampleState(models.Model):
  211. objects = ExampleStateManager()
  212. example = models.ForeignKey(
  213. to=Example,
  214. on_delete=models.CASCADE,
  215. related_name='states'
  216. )
  217. confirmed_by = models.ForeignKey(
  218. to=User,
  219. on_delete=models.CASCADE
  220. )
  221. confirmed_at = models.DateTimeField(auto_now=True)
  222. class Meta:
  223. unique_together = (('example', 'confirmed_by'),)
  224. class Comment(models.Model):
  225. text = models.TextField()
  226. example = models.ForeignKey(
  227. to=Example,
  228. on_delete=models.CASCADE,
  229. related_name='comments'
  230. )
  231. user = models.ForeignKey(
  232. to=User,
  233. on_delete=models.CASCADE,
  234. null=True
  235. )
  236. created_at = models.DateTimeField(auto_now_add=True, db_index=True)
  237. updated_at = models.DateTimeField(auto_now=True)
  238. @property
  239. def username(self):
  240. return self.user.username
  241. class Meta:
  242. ordering = ['created_at']
  243. class Tag(models.Model):
  244. text = models.TextField()
  245. project = models.ForeignKey(
  246. to=Project,
  247. on_delete=models.CASCADE,
  248. related_name='tags'
  249. )
  250. def __str__(self):
  251. return self.text
  252. class Annotation(models.Model):
  253. objects = AnnotationManager()
  254. prob = models.FloatField(default=0.0)
  255. manual = models.BooleanField(default=False)
  256. user = models.ForeignKey(User, on_delete=models.CASCADE)
  257. created_at = models.DateTimeField(auto_now_add=True)
  258. updated_at = models.DateTimeField(auto_now=True)
  259. class Meta:
  260. abstract = True
  261. class Category(Annotation):
  262. objects = CategoryManager()
  263. example = models.ForeignKey(
  264. to=Example,
  265. on_delete=models.CASCADE,
  266. related_name='categories'
  267. )
  268. label = models.ForeignKey(to=CategoryType, on_delete=models.CASCADE)
  269. class Meta:
  270. unique_together = (
  271. 'example',
  272. 'user',
  273. 'label'
  274. )
  275. class Span(Annotation):
  276. objects = SpanManager()
  277. example = models.ForeignKey(
  278. to=Example,
  279. on_delete=models.CASCADE,
  280. related_name='spans'
  281. )
  282. label = models.ForeignKey(to=SpanType, on_delete=models.CASCADE)
  283. start_offset = models.IntegerField()
  284. end_offset = models.IntegerField()
  285. def validate_unique(self, exclude=None):
  286. allow_overlapping = getattr(self.example.project, 'allow_overlapping', False)
  287. is_collaborative = self.example.project.collaborative_annotation
  288. if allow_overlapping:
  289. super().validate_unique(exclude=exclude)
  290. return
  291. overlapping_span = Span.objects.exclude(id=self.id).filter(example=self.example).filter(
  292. models.Q(start_offset__gte=self.start_offset, start_offset__lt=self.end_offset) |
  293. models.Q(end_offset__gt=self.start_offset, end_offset__lte=self.end_offset) |
  294. models.Q(start_offset__lte=self.start_offset, end_offset__gte=self.end_offset)
  295. )
  296. if is_collaborative:
  297. if overlapping_span.exists():
  298. raise ValidationError('This overlapping is not allowed in this project.')
  299. else:
  300. if overlapping_span.filter(user=self.user).exists():
  301. raise ValidationError('This overlapping is not allowed in this project.')
  302. def save(self, force_insert=False, force_update=False, using=None,
  303. update_fields=None):
  304. self.full_clean()
  305. super().save(force_insert, force_update, using, update_fields)
  306. def is_overlapping(self, other: 'Span'):
  307. return (other.start_offset <= self.start_offset < other.end_offset) or\
  308. (other.start_offset < self.end_offset <= other.end_offset) or\
  309. (self.start_offset < other.start_offset and other.end_offset < self.end_offset)
  310. class Meta:
  311. constraints = [
  312. models.CheckConstraint(check=models.Q(start_offset__gte=0), name='startOffset >= 0'),
  313. models.CheckConstraint(check=models.Q(end_offset__gte=0), name='endOffset >= 0'),
  314. models.CheckConstraint(check=models.Q(start_offset__lt=models.F('end_offset')), name='start < end')
  315. ]
  316. class TextLabel(Annotation):
  317. example = models.ForeignKey(
  318. to=Example,
  319. on_delete=models.CASCADE,
  320. related_name='texts'
  321. )
  322. text = models.TextField()
  323. class Meta:
  324. unique_together = (
  325. 'example',
  326. 'user',
  327. 'text'
  328. )
  329. class RelationTypes(models.Model):
  330. color = models.TextField()
  331. name = models.TextField()
  332. project = models.ForeignKey(Project, related_name='relation_types', on_delete=models.CASCADE)
  333. def __str__(self):
  334. return self.name
  335. class Meta:
  336. unique_together = ('color', 'name')
  337. class AnnotationRelations(models.Model):
  338. annotation_id_1 = models.IntegerField()
  339. annotation_id_2 = models.IntegerField()
  340. type = models.ForeignKey(RelationTypes, related_name='annotation_relations', on_delete=models.CASCADE)
  341. timestamp = models.DateTimeField()
  342. user = models.ForeignKey(User, related_name='annotation_relations', on_delete=models.CASCADE)
  343. project = models.ForeignKey(Project, related_name='annotation_relations', on_delete=models.CASCADE)
  344. def __str__(self):
  345. return self.__dict__.__str__()
  346. class Meta:
  347. unique_together = ('annotation_id_1', 'annotation_id_2', 'type', 'project')