diff --git a/app/server/models.py b/app/server/models.py index ac7f7365..6ff5d630 100644 --- a/app/server/models.py +++ b/app/server/models.py @@ -142,7 +142,7 @@ class Label(models.Model): ('shift', 'shift'), ('ctrl shift', 'ctrl shift') ) - SUFFIX_KEYS = ( + SUFFIX_KEYS = tuple( (c, c) for c in string.ascii_lowercase ) diff --git a/app/server/serializers.py b/app/server/serializers.py index 78b57a75..8acd0aff 100644 --- a/app/server/serializers.py +++ b/app/server/serializers.py @@ -34,12 +34,17 @@ class LabelSerializer(serializers.ModelSerializer): raise ValidationError('Shortcut key may not have a suffix key.') # Don't allow to save same shortcut key when prefix_key is null. - context = self.context['request'].parser_context - project_id = context['kwargs'].get('project_id') - if Label.objects.filter(suffix_key=suffix_key, - prefix_key__isnull=True, - project=project_id).exists(): - raise ValidationError('Duplicate key.') + try: + context = self.context['request'].parser_context + project_id = context['kwargs']['project_id'] + except (AttributeError, KeyError): + pass # unit tests don't always have the correct context set up + else: + if Label.objects.filter(suffix_key=suffix_key, + prefix_key__isnull=True, + project=project_id).exists(): + raise ValidationError('Duplicate key.') + return super().validate(attrs) class Meta: diff --git a/app/server/tests/data/classification.jsonl b/app/server/tests/data/classification.jsonl index db749e9f..13a16369 100644 --- a/app/server/tests/data/classification.jsonl +++ b/app/server/tests/data/classification.jsonl @@ -1,3 +1,4 @@ {"text": "example", "labels": ["positive"], "meta": {"wikiPageID": 1}} {"text": "example", "labels": ["positive", "negative"], "meta": {"wikiPageID": 2}} {"text": "example", "labels": ["negative"], "meta": {"wikiPageID": 3}} +{"text": "example", "labels": ["neutral"], "meta": {"wikiPageID": 4}} diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index 4e7d65f8..c448ca4b 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -747,8 +747,9 @@ class TestUploader(APITestCase): expected_status=status.HTTP_201_CREATED) self.label_test_helper(self.classification_labels_url, expected_labels=[ - {'text': 'positive'}, - {'text': 'negative'}, + {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None}, + {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None}, + {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'}, ]) def test_can_upload_labeling_jsonl(self): @@ -758,9 +759,9 @@ class TestUploader(APITestCase): expected_status=status.HTTP_201_CREATED) self.label_test_helper(self.labeling_labels_url, expected_labels=[ - {'text': 'LOC'}, - {'text': 'ORG'}, - {'text': 'PER'}, + {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None}, + {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None}, + {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None}, ]) def test_can_upload_seq2seq_jsonl(self): diff --git a/app/server/utils.py b/app/server/utils.py index 439cc870..4d60ae5a 100644 --- a/app/server/utils.py +++ b/app/server/utils.py @@ -74,16 +74,60 @@ class BaseStorage(object): """ return [label for label in labels if label not in created] - def to_serializer_format(self, labels): - """Exclude created labels. + @classmethod + def to_serializer_format(cls, labels, created): + """Convert a label to model dictionary. + + Also assigns shortkeys for each label that don't clash with existing + label shortkeys. Example: >>> labels = ["positive"] - >>> self.to_serializer_format(labels) - [{"text": "negative"}] - ``` + >>> created = {} + >>> BaseStorage.to_serializer_format(labels, created) + [{"text": "positive", "suffix_key": "p", "prefix_key": None}] + """ + existing_shortkeys = {(label.suffix_key, label.prefix_key) + for label in created.values()} + + serializer_labels = [] + + for label in sorted(labels): + serializer_label = {'text': label} + + shortkey = cls.get_shortkey(label, existing_shortkeys) + if shortkey: + serializer_label['suffix_key'] = shortkey[0] + serializer_label['prefix_key'] = shortkey[1] + existing_shortkeys.add(shortkey) + + serializer_labels.append(serializer_label) + + return serializer_labels + + @classmethod + def get_shortkey(cls, label, existing_shortkeys): + """Find the first non existing shortkey for the label. + + Example without existing shortkey: + >>> BaseStorage.get_shortkey("positive", set()) + ("p", None) + + Example with existing shortkey: + >>> BaseStorage.get_shortkey("positive", {("p", None)}) + ("p", "ctrl") """ - return [{'text': label} for label in labels] + model_prefix_keys = [key for (key, _) in Label.PREFIX_KEYS] + prefix_keys = [None] + model_prefix_keys + + model_suffix_keys = {key for (key, _) in Label.SUFFIX_KEYS} + suffix_keys = [key for key in label.lower() if key in model_suffix_keys] + + for shortkey in itertools.product(suffix_keys, prefix_keys): + if shortkey not in existing_shortkeys: + return shortkey + + return None def update_saved_labels(self, saved, new): """Update saved labels. @@ -120,7 +164,7 @@ class ClassificationStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels) @@ -170,7 +214,7 @@ class SequenceLabelingStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels)