You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.7 KiB

2 years ago
  1. from django.db.models import Count, Manager
  2. class AnnotationManager(Manager):
  3. def calc_label_distribution(self, examples, members, labels):
  4. """Calculate label distribution.
  5. Args:
  6. examples: example queryset.
  7. members: user queryset.
  8. labels: label queryset.
  9. Returns:
  10. label distribution per user.
  11. Examples:
  12. >>> self.calc_label_distribution(examples, members, labels)
  13. {'admin': {'positive': 10, 'negative': 5}}
  14. """
  15. distribution = {member.username: {label.text: 0 for label in labels} for member in members}
  16. items = self.filter(example_id__in=examples)\
  17. .values('user__username', 'label__text')\
  18. .annotate(count=Count('label__text'))
  19. for item in items:
  20. username = item['user__username']
  21. label = item['label__text']
  22. count = item['count']
  23. distribution[username][label] = count
  24. return distribution
  25. def get_labels(self, label, project):
  26. if project.collaborative_annotation:
  27. return self.filter(example=label.example)
  28. else:
  29. return self.filter(example=label.example, user=label.user)
  30. def can_annotate(self, label, project) -> bool:
  31. raise NotImplementedError('Please implement this method in the subclass')
  32. def filter_annotatable_labels(self, labels, project):
  33. return [label for label in labels if self.can_annotate(label, project)]
  34. class CategoryManager(AnnotationManager):
  35. def can_annotate(self, label, project) -> bool:
  36. is_exclusive = project.single_class_classification
  37. categories = self.get_labels(label, project)
  38. if is_exclusive:
  39. return not categories.exists()
  40. else:
  41. return not categories.filter(label=label.label).exists()
  42. class SpanManager(AnnotationManager):
  43. def can_annotate(self, label, project) -> bool:
  44. overlapping = getattr(project, 'allow_overlapping', False)
  45. spans = self.get_labels(label, project)
  46. if overlapping:
  47. return True
  48. for span in spans:
  49. if span.is_overlapping(label):
  50. return False
  51. return True
  52. class ExampleManager(Manager):
  53. def bulk_create(self, objs, batch_size=None, ignore_conflicts=False):
  54. super().bulk_create(objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts)
  55. uuids = [data.uuid for data in objs]
  56. examples = self.in_bulk(uuids, field_name='uuid')
  57. return [examples[uid] for uid in uuids]
  58. class ExampleStateManager(Manager):
  59. def count_done(self, examples, user=None):
  60. if user:
  61. queryset = self.filter(example_id__in=examples, confirmed_by=user)
  62. else:
  63. queryset = self.filter(example_id__in=examples)
  64. return queryset.distinct().values('example').count()
  65. def measure_member_progress(self, examples, members):
  66. done_count = self.filter(example_id__in=examples)\
  67. .values('confirmed_by__username')\
  68. .annotate(total=Count('confirmed_by'))
  69. response = {
  70. 'total': examples.count(),
  71. 'progress': [
  72. {
  73. 'user': obj['confirmed_by__username'],
  74. 'done': obj['total']
  75. } for obj in done_count
  76. ]
  77. }
  78. members_with_progress = {o['confirmed_by__username'] for o in done_count}
  79. for member in members:
  80. if member.username not in members_with_progress:
  81. response['progress'].append({
  82. 'user': member.username,
  83. 'done': 0
  84. })
  85. return response