You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

356 lines
9.8 KiB

5 years ago
3 years ago
6 years ago
6 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
3 years ago
3 years ago
6 years ago
3 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. import string
  2. from auto_labeling_pipeline.models import RequestModelFactory
  3. from django.contrib.auth.models import User
  4. from django.core.exceptions import ValidationError
  5. from django.db import models
  6. from polymorphic.models import PolymorphicModel
  7. from .managers import (AnnotationManager, RoleMappingManager,
  8. Seq2seqAnnotationManager)
  9. DOCUMENT_CLASSIFICATION = 'DocumentClassification'
  10. SEQUENCE_LABELING = 'SequenceLabeling'
  11. SEQ2SEQ = 'Seq2seq'
  12. SPEECH2TEXT = 'Speech2text'
  13. IMAGE_CLASSIFICATION = 'ImageClassification'
  14. PROJECT_CHOICES = (
  15. (DOCUMENT_CLASSIFICATION, 'document classification'),
  16. (SEQUENCE_LABELING, 'sequence labeling'),
  17. (SEQ2SEQ, 'sequence to sequence'),
  18. (SPEECH2TEXT, 'speech to text'),
  19. (IMAGE_CLASSIFICATION, 'image classification')
  20. )
  21. class Project(PolymorphicModel):
  22. name = models.CharField(max_length=100)
  23. description = models.TextField(default='')
  24. guideline = models.TextField(default='', blank=True)
  25. created_at = models.DateTimeField(auto_now_add=True)
  26. updated_at = models.DateTimeField(auto_now=True)
  27. users = models.ManyToManyField(User, related_name='projects')
  28. project_type = models.CharField(max_length=30, choices=PROJECT_CHOICES)
  29. random_order = models.BooleanField(default=False)
  30. collaborative_annotation = models.BooleanField(default=False)
  31. single_class_classification = models.BooleanField(default=False)
  32. def get_annotation_class(self):
  33. raise NotImplementedError()
  34. def __str__(self):
  35. return self.name
  36. class TextClassificationProject(Project):
  37. def get_annotation_class(self):
  38. return Category
  39. class SequenceLabelingProject(Project):
  40. def get_annotation_class(self):
  41. return Span
  42. class Seq2seqProject(Project):
  43. def get_annotation_class(self):
  44. return TextLabel
  45. class Speech2textProject(Project):
  46. def get_annotation_class(self):
  47. return TextLabel
  48. class ImageClassificationProject(Project):
  49. def get_annotation_class(self):
  50. return Category
  51. class Label(models.Model):
  52. text = models.CharField(max_length=100)
  53. prefix_key = models.CharField(
  54. max_length=10,
  55. blank=True,
  56. null=True,
  57. choices=(
  58. ('ctrl', 'ctrl'),
  59. ('shift', 'shift'),
  60. ('ctrl shift', 'ctrl shift')
  61. )
  62. )
  63. suffix_key = models.CharField(
  64. max_length=1,
  65. blank=True,
  66. null=True,
  67. choices=tuple(
  68. (c, c) for c in string.digits + string.ascii_lowercase
  69. )
  70. )
  71. project = models.ForeignKey(
  72. to=Project,
  73. on_delete=models.CASCADE,
  74. related_name='labels'
  75. )
  76. background_color = models.CharField(max_length=7, default='#209cee')
  77. text_color = models.CharField(max_length=7, default='#ffffff')
  78. created_at = models.DateTimeField(auto_now_add=True)
  79. updated_at = models.DateTimeField(auto_now=True)
  80. def __str__(self):
  81. return self.text
  82. def clean(self):
  83. # Don't allow shortcut key not to have a suffix key.
  84. if self.prefix_key and not self.suffix_key:
  85. message = 'Shortcut key may not have a suffix key.'
  86. raise ValidationError(message)
  87. # each shortcut (prefix key + suffix key) can only be assigned to one label
  88. if self.suffix_key or self.prefix_key:
  89. other_labels = self.project.labels.exclude(id=self.id)
  90. if other_labels.filter(suffix_key=self.suffix_key, prefix_key=self.prefix_key).exists():
  91. message = 'A label with the shortcut already exists in the project.'
  92. raise ValidationError(message)
  93. super().clean()
  94. class Meta:
  95. unique_together = (
  96. ('project', 'text'),
  97. )
  98. class Example(models.Model):
  99. meta = models.JSONField(default=dict)
  100. filename = models.FileField(default='.')
  101. project = models.ForeignKey(
  102. to=Project,
  103. on_delete=models.CASCADE,
  104. related_name='examples'
  105. )
  106. annotations_approved_by = models.ForeignKey(
  107. to=User,
  108. on_delete=models.SET_NULL,
  109. null=True,
  110. blank=True
  111. )
  112. text = models.TextField(null=True, blank=True)
  113. created_at = models.DateTimeField(auto_now_add=True)
  114. updated_at = models.DateTimeField(auto_now=True)
  115. @property
  116. def comment_count(self):
  117. return Comment.objects.filter(example=self.id).count()
  118. class Comment(models.Model):
  119. text = models.TextField()
  120. example = models.ForeignKey(
  121. to=Example,
  122. on_delete=models.CASCADE,
  123. related_name='comments'
  124. )
  125. user = models.ForeignKey(
  126. to=User,
  127. on_delete=models.CASCADE,
  128. null=True
  129. )
  130. created_at = models.DateTimeField(auto_now_add=True)
  131. updated_at = models.DateTimeField(auto_now=True)
  132. @property
  133. def username(self):
  134. return self.user.username
  135. class Meta:
  136. ordering = ('-created_at', )
  137. class Tag(models.Model):
  138. text = models.TextField()
  139. project = models.ForeignKey(
  140. to=Project,
  141. on_delete=models.CASCADE,
  142. related_name='tags'
  143. )
  144. def __str__(self):
  145. return self.text
  146. class Annotation(models.Model):
  147. objects = AnnotationManager()
  148. prob = models.FloatField(default=0.0)
  149. manual = models.BooleanField(default=False)
  150. user = models.ForeignKey(User, on_delete=models.CASCADE)
  151. created_at = models.DateTimeField(auto_now_add=True)
  152. updated_at = models.DateTimeField(auto_now=True)
  153. class Meta:
  154. abstract = True
  155. class Category(Annotation):
  156. example = models.ForeignKey(
  157. to=Example,
  158. on_delete=models.CASCADE,
  159. related_name='categories'
  160. )
  161. label = models.ForeignKey(
  162. to=Label,
  163. on_delete=models.CASCADE
  164. )
  165. class Meta:
  166. unique_together = (
  167. 'example',
  168. 'user',
  169. 'label'
  170. )
  171. class Span(Annotation):
  172. example = models.ForeignKey(
  173. to=Example,
  174. on_delete=models.CASCADE,
  175. related_name='spans'
  176. )
  177. label = models.ForeignKey(
  178. to=Label,
  179. on_delete=models.CASCADE
  180. )
  181. start_offset = models.IntegerField()
  182. end_offset = models.IntegerField()
  183. def clean(self):
  184. if self.start_offset >= self.end_offset:
  185. raise ValidationError('start_offset > end_offset')
  186. class Meta:
  187. unique_together = (
  188. 'example',
  189. 'user',
  190. 'label',
  191. 'start_offset',
  192. 'end_offset'
  193. )
  194. class TextLabel(Annotation):
  195. objects = Seq2seqAnnotationManager()
  196. example = models.ForeignKey(
  197. to=Example,
  198. on_delete=models.CASCADE,
  199. related_name='texts'
  200. )
  201. text = models.TextField()
  202. class Meta:
  203. unique_together = (
  204. 'example',
  205. 'user',
  206. 'text'
  207. )
  208. class Role(models.Model):
  209. name = models.CharField(max_length=100, unique=True)
  210. description = models.TextField(default='')
  211. created_at = models.DateTimeField(auto_now_add=True)
  212. updated_at = models.DateTimeField(auto_now=True)
  213. def __str__(self):
  214. return self.name
  215. class RoleMapping(models.Model):
  216. user = models.ForeignKey(
  217. to=User,
  218. on_delete=models.CASCADE,
  219. related_name='role_mappings'
  220. )
  221. project = models.ForeignKey(
  222. to=Project,
  223. on_delete=models.CASCADE,
  224. related_name='role_mappings'
  225. )
  226. role = models.ForeignKey(
  227. to=Role,
  228. on_delete=models.CASCADE
  229. )
  230. created_at = models.DateTimeField(auto_now_add=True)
  231. updated_at = models.DateTimeField(auto_now=True)
  232. objects = RoleMappingManager()
  233. def clean(self):
  234. other_rolemappings = self.project.role_mappings.exclude(id=self.id)
  235. if other_rolemappings.filter(user=self.user, project=self.project).exists():
  236. message = 'This user is already assigned to a role in this project.'
  237. raise ValidationError(message)
  238. class Meta:
  239. unique_together = ("user", "project")
  240. class AutoLabelingConfig(models.Model):
  241. model_name = models.CharField(max_length=100)
  242. model_attrs = models.JSONField(default=dict)
  243. template = models.TextField(default='')
  244. label_mapping = models.JSONField(default=dict)
  245. project = models.ForeignKey(
  246. to=Project,
  247. on_delete=models.CASCADE,
  248. related_name='auto_labeling_config'
  249. )
  250. default = models.BooleanField(default=False)
  251. created_at = models.DateTimeField(auto_now_add=True)
  252. updated_at = models.DateTimeField(auto_now=True)
  253. def __str__(self):
  254. return self.model_name
  255. def clean_fields(self, exclude=None):
  256. super().clean_fields(exclude=exclude)
  257. try:
  258. RequestModelFactory.find(self.model_name)
  259. except NameError:
  260. message = f'The specified model name {self.model_name} does not exist.'
  261. raise ValidationError(message)
  262. except Exception:
  263. message = 'The attributes does not match the model.'
  264. raise ValidationError(message)
  265. class RelationTypes(models.Model):
  266. color = models.TextField()
  267. name = models.TextField()
  268. project = models.ForeignKey(Project, related_name='relation_types', on_delete=models.CASCADE)
  269. def __str__(self):
  270. return self.name
  271. class Meta:
  272. unique_together = ('color', 'name')
  273. class AnnotationRelations(models.Model):
  274. annotation_id_1 = models.IntegerField()
  275. annotation_id_2 = models.IntegerField()
  276. type = models.ForeignKey(RelationTypes, related_name='annotation_relations', on_delete=models.CASCADE)
  277. timestamp = models.DateTimeField()
  278. user = models.ForeignKey(User, related_name='annotation_relations', on_delete=models.CASCADE)
  279. project = models.ForeignKey(Project, related_name='annotation_relations', on_delete=models.CASCADE)
  280. def __str__(self):
  281. return self.__dict__.__str__()
  282. class Meta:
  283. unique_together = ('annotation_id_1', 'annotation_id_2', 'type', 'project')