From 112fba1a7cfb2f4e583acbe3fe0455b723215c40 Mon Sep 17 00:00:00 2001 From: Clemens Wolff Date: Wed, 15 May 2019 10:35:44 -0400 Subject: [PATCH 1/4] Add tests for label creation on corpus import --- app/server/tests/test_api.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index e8d80a51..4e7d65f8 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -682,7 +682,9 @@ class TestUploader(APITestCase): users=[super_user], project_type=SEQUENCE_LABELING) cls.seq2seq_project = mommy.make('server.Seq2seqProject', users=[super_user], project_type=SEQ2SEQ) cls.classification_url = reverse(viewname='doc_uploader', args=[cls.classification_project.id]) + cls.classification_labels_url = reverse(viewname='label_list', args=[cls.classification_project.id]) cls.labeling_url = reverse(viewname='doc_uploader', args=[cls.labeling_project.id]) + cls.labeling_labels_url = reverse(viewname='label_list', args=[cls.labeling_project.id]) cls.seq2seq_url = reverse(viewname='doc_uploader', args=[cls.seq2seq_project.id]) def setUp(self): @@ -694,6 +696,14 @@ class TestUploader(APITestCase): response = self.client.post(url, data={'file': f, 'format': format}) self.assertEqual(response.status_code, expected_status) + def label_test_helper(self, url, expected_labels): + expected_keys = {key for label in expected_labels for key in label} + + actual_labels = [{key: value for (key, value) in label.items() if key in expected_keys} + for label in self.client.get(url).json()] + + self.assertCountEqual(actual_labels, expected_labels) + def test_can_upload_conll_format_file(self): self.upload_test_helper(url=self.labeling_url, filename='labeling.conll', @@ -736,12 +746,23 @@ class TestUploader(APITestCase): format='json', expected_status=status.HTTP_201_CREATED) + self.label_test_helper(self.classification_labels_url, expected_labels=[ + {'text': 'positive'}, + {'text': 'negative'}, + ]) + def test_can_upload_labeling_jsonl(self): self.upload_test_helper(url=self.labeling_url, filename='labeling.jsonl', format='json', expected_status=status.HTTP_201_CREATED) + self.label_test_helper(self.labeling_labels_url, expected_labels=[ + {'text': 'LOC'}, + {'text': 'ORG'}, + {'text': 'PER'}, + ]) + def test_can_upload_seq2seq_jsonl(self): self.upload_test_helper(url=self.seq2seq_url, filename='seq2seq.jsonl', From e37e16bfad4c67d37b1467da43417dbb3e115202 Mon Sep 17 00:00:00 2001 From: Clemens Wolff Date: Wed, 15 May 2019 10:42:08 -0400 Subject: [PATCH 2/4] Auto-generate label shortkeys on corpus import --- app/server/models.py | 2 +- app/server/serializers.py | 17 +++--- app/server/tests/data/classification.jsonl | 1 + app/server/tests/test_api.py | 11 ++-- app/server/utils.py | 60 +++++++++++++++++++--- 5 files changed, 71 insertions(+), 20 deletions(-) diff --git a/app/server/models.py b/app/server/models.py index ac7f7365..6ff5d630 100644 --- a/app/server/models.py +++ b/app/server/models.py @@ -142,7 +142,7 @@ class Label(models.Model): ('shift', 'shift'), ('ctrl shift', 'ctrl shift') ) - SUFFIX_KEYS = ( + SUFFIX_KEYS = tuple( (c, c) for c in string.ascii_lowercase ) diff --git a/app/server/serializers.py b/app/server/serializers.py index 78b57a75..8acd0aff 100644 --- a/app/server/serializers.py +++ b/app/server/serializers.py @@ -34,12 +34,17 @@ class LabelSerializer(serializers.ModelSerializer): raise ValidationError('Shortcut key may not have a suffix key.') # Don't allow to save same shortcut key when prefix_key is null. - context = self.context['request'].parser_context - project_id = context['kwargs'].get('project_id') - if Label.objects.filter(suffix_key=suffix_key, - prefix_key__isnull=True, - project=project_id).exists(): - raise ValidationError('Duplicate key.') + try: + context = self.context['request'].parser_context + project_id = context['kwargs']['project_id'] + except (AttributeError, KeyError): + pass # unit tests don't always have the correct context set up + else: + if Label.objects.filter(suffix_key=suffix_key, + prefix_key__isnull=True, + project=project_id).exists(): + raise ValidationError('Duplicate key.') + return super().validate(attrs) class Meta: diff --git a/app/server/tests/data/classification.jsonl b/app/server/tests/data/classification.jsonl index db749e9f..13a16369 100644 --- a/app/server/tests/data/classification.jsonl +++ b/app/server/tests/data/classification.jsonl @@ -1,3 +1,4 @@ {"text": "example", "labels": ["positive"], "meta": {"wikiPageID": 1}} {"text": "example", "labels": ["positive", "negative"], "meta": {"wikiPageID": 2}} {"text": "example", "labels": ["negative"], "meta": {"wikiPageID": 3}} +{"text": "example", "labels": ["neutral"], "meta": {"wikiPageID": 4}} diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index 4e7d65f8..c448ca4b 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -747,8 +747,9 @@ class TestUploader(APITestCase): expected_status=status.HTTP_201_CREATED) self.label_test_helper(self.classification_labels_url, expected_labels=[ - {'text': 'positive'}, - {'text': 'negative'}, + {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None}, + {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None}, + {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'}, ]) def test_can_upload_labeling_jsonl(self): @@ -758,9 +759,9 @@ class TestUploader(APITestCase): expected_status=status.HTTP_201_CREATED) self.label_test_helper(self.labeling_labels_url, expected_labels=[ - {'text': 'LOC'}, - {'text': 'ORG'}, - {'text': 'PER'}, + {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None}, + {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None}, + {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None}, ]) def test_can_upload_seq2seq_jsonl(self): diff --git a/app/server/utils.py b/app/server/utils.py index 439cc870..4d60ae5a 100644 --- a/app/server/utils.py +++ b/app/server/utils.py @@ -74,16 +74,60 @@ class BaseStorage(object): """ return [label for label in labels if label not in created] - def to_serializer_format(self, labels): - """Exclude created labels. + @classmethod + def to_serializer_format(cls, labels, created): + """Convert a label to model dictionary. + + Also assigns shortkeys for each label that don't clash with existing + label shortkeys. Example: >>> labels = ["positive"] - >>> self.to_serializer_format(labels) - [{"text": "negative"}] - ``` + >>> created = {} + >>> BaseStorage.to_serializer_format(labels, created) + [{"text": "positive", "suffix_key": "p", "prefix_key": None}] + """ + existing_shortkeys = {(label.suffix_key, label.prefix_key) + for label in created.values()} + + serializer_labels = [] + + for label in sorted(labels): + serializer_label = {'text': label} + + shortkey = cls.get_shortkey(label, existing_shortkeys) + if shortkey: + serializer_label['suffix_key'] = shortkey[0] + serializer_label['prefix_key'] = shortkey[1] + existing_shortkeys.add(shortkey) + + serializer_labels.append(serializer_label) + + return serializer_labels + + @classmethod + def get_shortkey(cls, label, existing_shortkeys): + """Find the first non existing shortkey for the label. + + Example without existing shortkey: + >>> BaseStorage.get_shortkey("positive", set()) + ("p", None) + + Example with existing shortkey: + >>> BaseStorage.get_shortkey("positive", {("p", None)}) + ("p", "ctrl") """ - return [{'text': label} for label in labels] + model_prefix_keys = [key for (key, _) in Label.PREFIX_KEYS] + prefix_keys = [None] + model_prefix_keys + + model_suffix_keys = {key for (key, _) in Label.SUFFIX_KEYS} + suffix_keys = [key for key in label.lower() if key in model_suffix_keys] + + for shortkey in itertools.product(suffix_keys, prefix_keys): + if shortkey not in existing_shortkeys: + return shortkey + + return None def update_saved_labels(self, saved, new): """Update saved labels. @@ -120,7 +164,7 @@ class ClassificationStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels) @@ -170,7 +214,7 @@ class SequenceLabelingStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels) From 606ecfa90bd681ceb7ffb6c359fa362a5ef3e0a5 Mon Sep 17 00:00:00 2001 From: Clemens Wolff Date: Wed, 15 May 2019 14:03:31 -0400 Subject: [PATCH 3/4] Auto-generate label colors on corpus import --- app/server/static/js/label.vue | 2 +- app/server/tests/test_api.py | 42 ++++++++++++++++++++++++---------- app/server/utils.py | 22 ++++++++++++++++++ 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/app/server/static/js/label.vue b/app/server/static/js/label.vue index 5f01a336..fd36e8f5 100644 --- a/app/server/static/js/label.vue +++ b/app/server/static/js/label.vue @@ -203,7 +203,7 @@ export default { methods: { generateColor() { - const color = (Math.random() * 0xFFFFFF | 0).toString(16); // eslint-disable-line no-bitwise + const color = Math.floor(Math.random() * 0xFFFFFF).toString(16); const randomColor = '#' + ('000000' + color).slice(-6); return randomColor; }, diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index c448ca4b..3177b5c9 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -696,14 +696,20 @@ class TestUploader(APITestCase): response = self.client.post(url, data={'file': f, 'format': format}) self.assertEqual(response.status_code, expected_status) - def label_test_helper(self, url, expected_labels): + def label_test_helper(self, url, expected_labels, expected_label_keys): expected_keys = {key for label in expected_labels for key in label} + response = self.client.get(url).json() + actual_labels = [{key: value for (key, value) in label.items() if key in expected_keys} - for label in self.client.get(url).json()] + for label in response] self.assertCountEqual(actual_labels, expected_labels) + for label in response: + for expected_label_key in expected_label_keys: + self.assertIsNotNone(label.get(expected_label_key)) + def test_can_upload_conll_format_file(self): self.upload_test_helper(url=self.labeling_url, filename='labeling.conll', @@ -746,11 +752,17 @@ class TestUploader(APITestCase): format='json', expected_status=status.HTTP_201_CREATED) - self.label_test_helper(self.classification_labels_url, expected_labels=[ - {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None}, - {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None}, - {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'}, - ]) + self.label_test_helper( + url=self.classification_labels_url, + expected_labels=[ + {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None}, + {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None}, + {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'}, + ], + expected_label_keys=[ + 'background_color', + 'text_color', + ]) def test_can_upload_labeling_jsonl(self): self.upload_test_helper(url=self.labeling_url, @@ -758,11 +770,17 @@ class TestUploader(APITestCase): format='json', expected_status=status.HTTP_201_CREATED) - self.label_test_helper(self.labeling_labels_url, expected_labels=[ - {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None}, - {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None}, - {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None}, - ]) + self.label_test_helper( + url=self.labeling_labels_url, + expected_labels=[ + {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None}, + {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None}, + {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None}, + ], + expected_label_keys=[ + 'background_color', + 'text_color', + ]) def test_can_upload_seq2seq_jsonl(self): self.upload_test_helper(url=self.seq2seq_url, diff --git a/app/server/utils.py b/app/server/utils.py index 4d60ae5a..21423939 100644 --- a/app/server/utils.py +++ b/app/server/utils.py @@ -4,6 +4,8 @@ import itertools import json import re from collections import defaultdict +from math import floor +from random import random from django.db import transaction from rest_framework.renderers import JSONRenderer @@ -101,6 +103,11 @@ class BaseStorage(object): serializer_label['prefix_key'] = shortkey[1] existing_shortkeys.add(shortkey) + background_color = cls.generate_color() + text_color = cls.black_or_white(background_color) + serializer_label['background_color'] = background_color + serializer_label['text_color'] = text_color + serializer_labels.append(serializer_label) return serializer_labels @@ -129,6 +136,21 @@ class BaseStorage(object): return None + @classmethod + def generate_color(cls): + """Port of `label.vue:generateColor`.""" + color = hex(int(floor(random() * 0xFFFFFF)))[2:] + random_color = '#' + ('000000' + color)[-6:] + return random_color + + @classmethod + def black_or_white(cls, hexcolor): + """Port of `label.vue:blackOrWhite`.""" + r = int(hexcolor[1:3], 16) + g = int(hexcolor[3:5], 16) + b = int(hexcolor[5:7], 16) + return '#ffffff' if (((r * 299) + (g * 587) + (b * 114)) / 1000) < 128 else '#000000' + def update_saved_labels(self, saved, new): """Update saved labels. From c996d1040200900488f1cb6dca4d3918ba08bebc Mon Sep 17 00:00:00 2001 From: Clemens Wolff Date: Mon, 20 May 2019 09:15:00 -0400 Subject: [PATCH 4/4] Extract color utility class --- app/server/tests/test_utils.py | 22 ++++++++++++ app/server/utils.py | 65 +++++++++++++++++++++++----------- 2 files changed, 67 insertions(+), 20 deletions(-) create mode 100644 app/server/tests/test_utils.py diff --git a/app/server/tests/test_utils.py b/app/server/tests/test_utils.py new file mode 100644 index 00000000..f6c6ac96 --- /dev/null +++ b/app/server/tests/test_utils.py @@ -0,0 +1,22 @@ +from django.test import TestCase + +from server.utils import Color + + +class TestColor(TestCase): + def test_random_color(self): + color = Color.random() + self.assertTrue(0 <= color.red <= 255) + self.assertTrue(0 <= color.green <= 255) + self.assertTrue(0 <= color.blue <= 255) + + def test_hex(self): + color = Color(red=255, green=192, blue=203) + self.assertEqual(color.hex, '#ffc0cb') + + def test_contrast_color(self): + color = Color(red=255, green=192, blue=203) + self.assertEqual(color.contrast_color.hex, '#000000') + + color = Color(red=199, green=21, blue=133) + self.assertEqual(color.contrast_color.hex, '#ffffff') diff --git a/app/server/utils.py b/app/server/utils.py index 21423939..236d8880 100644 --- a/app/server/utils.py +++ b/app/server/utils.py @@ -5,7 +5,7 @@ import json import re from collections import defaultdict from math import floor -from random import random +from random import Random from django.db import transaction from rest_framework.renderers import JSONRenderer @@ -103,10 +103,9 @@ class BaseStorage(object): serializer_label['prefix_key'] = shortkey[1] existing_shortkeys.add(shortkey) - background_color = cls.generate_color() - text_color = cls.black_or_white(background_color) - serializer_label['background_color'] = background_color - serializer_label['text_color'] = text_color + color = Color.random() + serializer_label['background_color'] = color.hex + serializer_label['text_color'] = color.contrast_color.hex serializer_labels.append(serializer_label) @@ -136,21 +135,6 @@ class BaseStorage(object): return None - @classmethod - def generate_color(cls): - """Port of `label.vue:generateColor`.""" - color = hex(int(floor(random() * 0xFFFFFF)))[2:] - random_color = '#' + ('000000' + color)[-6:] - return random_color - - @classmethod - def black_or_white(cls, hexcolor): - """Port of `label.vue:blackOrWhite`.""" - r = int(hexcolor[1:3], 16) - g = int(hexcolor[3:5], 16) - b = int(hexcolor[5:7], 16) - return '#ffffff' if (((r * 299) + (g * 587) + (b * 114)) / 1000) < 128 else '#000000' - def update_saved_labels(self, saved, new): """Update saved labels. @@ -510,3 +494,44 @@ class CSVPainter(JSONPainter): for a in annotations: res.append({**d, **a}) return res + + +class Color: + def __init__(self, red, green, blue): + self.red = red + self.green = green + self.blue = blue + + @property + def contrast_color(self): + """Generate black or white color. + + Ensure that text and background color combinations provide + sufficient contrast when viewed by someone having color deficits or + when viewed on a black and white screen. + + Algorithm from w3c: + * https://www.w3.org/TR/AERT/#color-contrast + """ + return Color.white() if self.brightness < 128 else Color.black() + + @property + def brightness(self): + return ((self.red * 299) + (self.green * 587) + (self.blue * 114)) / 1000 + + @property + def hex(self): + return '#{:02x}{:02x}{:02x}'.format(self.red, self.green, self.blue) + + @classmethod + def white(cls): + return cls(red=255, green=255, blue=255) + + @classmethod + def black(cls): + return cls(red=0, green=0, blue=0) + + @classmethod + def random(cls, seed=None): + rgb = Random(seed).choices(range(256), k=3) + return cls(*rgb)