mirror of https://github.com/doccano/doccano.git
Hironsan
2 years ago
8 changed files with 134 additions and 23 deletions
Split View
Diff Options
-
31backend/api/models.py
-
9backend/api/tasks.py
-
1backend/api/tests/data/sequence_labeling/example_overlapping.jsonl
-
37backend/api/tests/test_models.py
-
6backend/api/tests/test_tasks.py
-
46backend/api/views/upload/cleaners.py
-
13backend/api/views/upload/dataset.py
-
14backend/api/views/upload/factory.py
@ -0,0 +1 @@ |
|||
{"text": "exampleA", "label": [[0, 1, "LOC"], [0, 1, "LOC"]], "meta": {"wikiPageID": 1}} |
@ -0,0 +1,46 @@ |
|||
from typing import List |
|||
|
|||
from ...models import Project |
|||
from .label import CategoryLabel, Label, OffsetLabel |
|||
|
|||
|
|||
class Cleaner: |
|||
|
|||
def __init__(self, project: Project): |
|||
pass |
|||
|
|||
def clean(self, labels: List[Label]) -> List[Label]: |
|||
return labels |
|||
|
|||
|
|||
class SpanCleaner(Cleaner): |
|||
|
|||
def __init__(self, project: Project): |
|||
super().__init__(project) |
|||
self.allow_overlapping = getattr(project, 'allow_overlapping', False) |
|||
|
|||
def clean(self, labels: List[OffsetLabel]) -> List[OffsetLabel]: |
|||
if self.allow_overlapping: |
|||
return labels |
|||
|
|||
labels.sort(key=lambda label: label.start_offset) |
|||
last_offset = -1 |
|||
new_labels = [] |
|||
for label in labels: |
|||
if label.start_offset >= last_offset: |
|||
last_offset = label.end_offset |
|||
new_labels.append(label) |
|||
return new_labels |
|||
|
|||
|
|||
class CategoryCleaner(Cleaner): |
|||
|
|||
def __init__(self, project: Project): |
|||
super().__init__(project) |
|||
self.exclusive = getattr(project, 'single_class_classification', False) |
|||
|
|||
def clean(self, labels: List[CategoryLabel]) -> List[CategoryLabel]: |
|||
if self.exclusive: |
|||
return labels[:1] |
|||
else: |
|||
return labels |
Write
Preview
Loading…
Cancel
Save