You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

341 lines
9.0 KiB

5 years ago
3 years ago
6 years ago
6 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
3 years ago
3 years ago
6 years ago
3 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. import string
  2. from auto_labeling_pipeline.models import RequestModelFactory
  3. from django.contrib.auth.models import User
  4. from django.core.exceptions import ValidationError
  5. from django.db import models
  6. from polymorphic.models import PolymorphicModel
  7. from .managers import (AnnotationManager, RoleMappingManager,
  8. Seq2seqAnnotationManager)
  9. DOCUMENT_CLASSIFICATION = 'DocumentClassification'
  10. SEQUENCE_LABELING = 'SequenceLabeling'
  11. SEQ2SEQ = 'Seq2seq'
  12. SPEECH2TEXT = 'Speech2text'
  13. IMAGE_CLASSIFICATION = 'ImageClassification'
  14. PROJECT_CHOICES = (
  15. (DOCUMENT_CLASSIFICATION, 'document classification'),
  16. (SEQUENCE_LABELING, 'sequence labeling'),
  17. (SEQ2SEQ, 'sequence to sequence'),
  18. (SPEECH2TEXT, 'speech to text'),
  19. (IMAGE_CLASSIFICATION, 'image classification')
  20. )
  21. class Project(PolymorphicModel):
  22. name = models.CharField(max_length=100)
  23. description = models.TextField(default='')
  24. guideline = models.TextField(default='', blank=True)
  25. created_at = models.DateTimeField(auto_now_add=True)
  26. updated_at = models.DateTimeField(auto_now=True)
  27. users = models.ManyToManyField(User, related_name='projects')
  28. project_type = models.CharField(max_length=30, choices=PROJECT_CHOICES)
  29. random_order = models.BooleanField(default=False)
  30. collaborative_annotation = models.BooleanField(default=False)
  31. single_class_classification = models.BooleanField(default=False)
  32. def get_annotation_class(self):
  33. raise NotImplementedError()
  34. def __str__(self):
  35. return self.name
  36. class TextClassificationProject(Project):
  37. def get_annotation_class(self):
  38. return Category
  39. class SequenceLabelingProject(Project):
  40. def get_annotation_class(self):
  41. return Span
  42. class Seq2seqProject(Project):
  43. def get_annotation_class(self):
  44. return TextLabel
  45. class Speech2textProject(Project):
  46. def get_annotation_class(self):
  47. return TextLabel
  48. class ImageClassificationProject(Project):
  49. def get_annotation_class(self):
  50. return Category
  51. class Label(models.Model):
  52. text = models.CharField(max_length=100)
  53. prefix_key = models.CharField(
  54. max_length=10,
  55. blank=True,
  56. null=True,
  57. choices=(
  58. ('ctrl', 'ctrl'),
  59. ('shift', 'shift'),
  60. ('ctrl shift', 'ctrl shift')
  61. )
  62. )
  63. suffix_key = models.CharField(
  64. max_length=1,
  65. blank=True,
  66. null=True,
  67. choices=tuple(
  68. (c, c) for c in string.digits + string.ascii_lowercase
  69. )
  70. )
  71. project = models.ForeignKey(
  72. to=Project,
  73. on_delete=models.CASCADE,
  74. related_name='labels'
  75. )
  76. background_color = models.CharField(max_length=7, default='#209cee')
  77. text_color = models.CharField(max_length=7, default='#ffffff')
  78. created_at = models.DateTimeField(auto_now_add=True)
  79. updated_at = models.DateTimeField(auto_now=True)
  80. def __str__(self):
  81. return self.text
  82. def clean(self):
  83. # Don't allow shortcut key not to have a suffix key.
  84. if self.prefix_key and not self.suffix_key:
  85. message = 'Shortcut key may not have a suffix key.'
  86. raise ValidationError(message)
  87. # each shortcut (prefix key + suffix key) can only be assigned to one label
  88. if self.suffix_key or self.prefix_key:
  89. other_labels = self.project.labels.exclude(id=self.id)
  90. if other_labels.filter(suffix_key=self.suffix_key, prefix_key=self.prefix_key).exists():
  91. message = 'A label with the shortcut already exists in the project.'
  92. raise ValidationError(message)
  93. super().clean()
  94. class Meta:
  95. unique_together = (
  96. ('project', 'text'),
  97. )
  98. class Example(PolymorphicModel):
  99. meta = models.JSONField(default=dict)
  100. filename = models.FileField(default='.')
  101. project = models.ForeignKey(
  102. to=Project,
  103. on_delete=models.CASCADE,
  104. related_name='examples'
  105. )
  106. annotations_approved_by = models.ForeignKey(
  107. to=User,
  108. on_delete=models.SET_NULL,
  109. null=True,
  110. blank=True
  111. )
  112. created_at = models.DateTimeField(auto_now_add=True)
  113. updated_at = models.DateTimeField(auto_now=True)
  114. @property
  115. def comment_count(self):
  116. return Comment.objects.filter(example=self.id).count()
  117. class Document(Example):
  118. text = models.TextField()
  119. def __str__(self):
  120. return self.text[:50]
  121. class Image(Example):
  122. def __str__(self):
  123. return self.filename
  124. class Comment(models.Model):
  125. text = models.TextField()
  126. example = models.ForeignKey(
  127. to=Example,
  128. on_delete=models.CASCADE,
  129. related_name='comments'
  130. )
  131. user = models.ForeignKey(
  132. to=User,
  133. on_delete=models.CASCADE,
  134. null=True
  135. )
  136. created_at = models.DateTimeField(auto_now_add=True)
  137. updated_at = models.DateTimeField(auto_now=True)
  138. @property
  139. def username(self):
  140. return self.user.username
  141. class Meta:
  142. ordering = ('-created_at', )
  143. class Tag(models.Model):
  144. text = models.TextField()
  145. project = models.ForeignKey(
  146. to=Project,
  147. on_delete=models.CASCADE,
  148. related_name='tags'
  149. )
  150. def __str__(self):
  151. return self.text
  152. class Annotation(models.Model):
  153. objects = AnnotationManager()
  154. prob = models.FloatField(default=0.0)
  155. manual = models.BooleanField(default=False)
  156. user = models.ForeignKey(User, on_delete=models.CASCADE)
  157. created_at = models.DateTimeField(auto_now_add=True)
  158. updated_at = models.DateTimeField(auto_now=True)
  159. class Meta:
  160. abstract = True
  161. class Category(Annotation):
  162. example = models.ForeignKey(
  163. to=Example,
  164. on_delete=models.CASCADE,
  165. related_name='categories'
  166. )
  167. label = models.ForeignKey(
  168. to=Label,
  169. on_delete=models.CASCADE
  170. )
  171. class Meta:
  172. unique_together = (
  173. 'example',
  174. 'user',
  175. 'label'
  176. )
  177. class Span(Annotation):
  178. example = models.ForeignKey(
  179. to=Example,
  180. on_delete=models.CASCADE,
  181. related_name='spans'
  182. )
  183. label = models.ForeignKey(
  184. to=Label,
  185. on_delete=models.CASCADE
  186. )
  187. start_offset = models.IntegerField()
  188. end_offset = models.IntegerField()
  189. def clean(self):
  190. if self.start_offset >= self.end_offset:
  191. raise ValidationError('start_offset > end_offset')
  192. class Meta:
  193. unique_together = (
  194. 'example',
  195. 'user',
  196. 'label',
  197. 'start_offset',
  198. 'end_offset'
  199. )
  200. class TextLabel(Annotation):
  201. objects = Seq2seqAnnotationManager()
  202. example = models.ForeignKey(
  203. to=Example,
  204. on_delete=models.CASCADE,
  205. related_name='texts'
  206. )
  207. text = models.TextField()
  208. class Meta:
  209. unique_together = (
  210. 'example',
  211. 'user',
  212. 'text'
  213. )
  214. class Role(models.Model):
  215. name = models.CharField(max_length=100, unique=True)
  216. description = models.TextField(default='')
  217. created_at = models.DateTimeField(auto_now_add=True)
  218. updated_at = models.DateTimeField(auto_now=True)
  219. def __str__(self):
  220. return self.name
  221. class RoleMapping(models.Model):
  222. user = models.ForeignKey(
  223. to=User,
  224. on_delete=models.CASCADE,
  225. related_name='role_mappings'
  226. )
  227. project = models.ForeignKey(
  228. to=Project,
  229. on_delete=models.CASCADE,
  230. related_name='role_mappings'
  231. )
  232. role = models.ForeignKey(
  233. to=Role,
  234. on_delete=models.CASCADE
  235. )
  236. created_at = models.DateTimeField(auto_now_add=True)
  237. updated_at = models.DateTimeField(auto_now=True)
  238. objects = RoleMappingManager()
  239. def clean(self):
  240. other_rolemappings = self.project.role_mappings.exclude(id=self.id)
  241. if other_rolemappings.filter(user=self.user, project=self.project).exists():
  242. message = 'This user is already assigned to a role in this project.'
  243. raise ValidationError(message)
  244. class Meta:
  245. unique_together = ("user", "project")
  246. class AutoLabelingConfig(models.Model):
  247. model_name = models.CharField(max_length=100)
  248. model_attrs = models.JSONField(default=dict)
  249. template = models.TextField(default='')
  250. label_mapping = models.JSONField(default=dict)
  251. project = models.ForeignKey(
  252. to=Project,
  253. on_delete=models.CASCADE,
  254. related_name='auto_labeling_config'
  255. )
  256. default = models.BooleanField(default=False)
  257. created_at = models.DateTimeField(auto_now_add=True)
  258. updated_at = models.DateTimeField(auto_now=True)
  259. def __str__(self):
  260. return self.model_name
  261. def clean_fields(self, exclude=None):
  262. super().clean_fields(exclude=exclude)
  263. try:
  264. RequestModelFactory.find(self.model_name)
  265. except NameError:
  266. message = f'The specified model name {self.model_name} does not exist.'
  267. raise ValidationError(message)
  268. except Exception:
  269. message = 'The attributes does not match the model.'
  270. raise ValidationError(message)