You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.5 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. from typing import List
  2. from projects.models import Project
  3. from .labels import Label, SpanLabel
  4. class Cleaner:
  5. def __init__(self, project: Project):
  6. pass
  7. def clean(self, labels: List[Label]) -> List[Label]:
  8. return labels
  9. @property
  10. def message(self) -> str:
  11. return ""
  12. class SpanCleaner(Cleaner):
  13. def __init__(self, project: Project):
  14. super().__init__(project)
  15. self.allow_overlapping = getattr(project, "allow_overlapping", False)
  16. def clean(self, labels: List[SpanLabel]) -> List[SpanLabel]: # type: ignore
  17. if self.allow_overlapping:
  18. return labels
  19. labels.sort(key=lambda label: label.start_offset)
  20. last_offset = -1
  21. new_labels = []
  22. for label in labels:
  23. if label.start_offset >= last_offset:
  24. last_offset = label.end_offset
  25. new_labels.append(label)
  26. return new_labels
  27. @property
  28. def message(self) -> str:
  29. return "This project cannot allow label overlapping. It's cleaned."
  30. class CategoryCleaner(Cleaner):
  31. def __init__(self, project: Project):
  32. super().__init__(project)
  33. self.exclusive = getattr(project, "single_class_classification", False)
  34. def clean(self, labels: List[Label]) -> List[Label]:
  35. if self.exclusive:
  36. return labels[:1]
  37. else:
  38. return labels
  39. @property
  40. def message(self) -> str:
  41. return "This project only one label can apply but multiple label found. It's cleaned."