doccano/app/api/tests/test_utils.py

import io

from django.test import TestCase

from seqeval.metrics.sequence_labeling import get_entities

from ..models import Label, Document
from ..utils import BaseStorage, ClassificationStorage, SequenceLabelingStorage, Seq2seqStorage, CoNLLParser
from ..utils import Color, iterable_to_io


class TestColor(TestCase):
    def test_random_color(self):
        color = Color.random()
        self.assertTrue(0 <= color.red <= 255)
        self.assertTrue(0 <= color.green <= 255)
        self.assertTrue(0 <= color.blue <= 255)

    def test_hex(self):
        color = Color(red=255, green=192, blue=203)
        self.assertEqual(color.hex, '#ffc0cb')

    def test_contrast_color(self):
        color = Color(red=255, green=192, blue=203)
        self.assertEqual(color.contrast_color.hex, '#000000')

        color = Color(red=199, green=21, blue=133)
        self.assertEqual(color.contrast_color.hex, '#ffffff')


class TestBaseStorage(TestCase):
    def test_extract_label(self):
        data = [{'labels': ['positive']}, {'labels': ['negative']}]

        actual = BaseStorage.extract_label(data)

        self.assertEqual(actual, [['positive'], ['negative']])

    def test_exclude_created_labels(self):
        labels = ['positive', 'negative']
        created = {'positive': Label(text='positive')}

        actual = BaseStorage.exclude_created_labels(labels, created)

        self.assertEqual(actual, ['negative'])

    def test_to_serializer_format(self):
        labels = ['positive']
        created = {}

        actual = BaseStorage.to_serializer_format(labels, created, random_seed=123)

        self.assertEqual(actual, [{
            'text': 'positive',
            'prefix_key': None,
            'suffix_key': 'p',
            'background_color': '#0d1668',
            'text_color': '#ffffff',
        }])

    def test_get_shortkey_without_existing_shortkey(self):
        label = 'positive'
        created = {}

        actual = BaseStorage.get_shortkey(label, created)

        self.assertEqual(actual, ('p', None))

    def test_get_shortkey_with_existing_shortkey(self):
        label = 'positive'
        created = {('p', None)}

        actual = BaseStorage.get_shortkey(label, created)

        self.assertEqual(actual, ('p', 'ctrl'))

    def test_update_saved_labels(self):
        saved = {'positive': Label(text='positive', text_color='#000000')}
        new = [Label(text='positive', text_color='#ffffff')]

        actual = BaseStorage.update_saved_labels(saved, new)

        self.assertEqual(actual['positive'].text_color, '#ffffff')


class TestClassificationStorage(TestCase):
    def test_extract_unique_labels(self):
        labels = [['positive'], ['positive', 'negative'], ['negative']]

        actual = ClassificationStorage.extract_unique_labels(labels)

        self.assertCountEqual(actual, ['positive', 'negative'])

    def test_make_annotations(self):
        docs = [Document(text='a', id=1), Document(text='b', id=2), Document(text='c', id=3)]
        labels = [['positive'], ['positive', 'negative'], ['negative']]
        saved_labels = {'positive': Label(text='positive', id=1), 'negative': Label(text='negative', id=2)}

        actual = ClassificationStorage.make_annotations(docs, labels, saved_labels)

        self.assertCountEqual(actual, [
            {'document': 1, 'label': 1},
            {'document': 2, 'label': 1},
            {'document': 2, 'label': 2},
            {'document': 3, 'label': 2},
        ])


class TestSequenceLabelingStorage(TestCase):
    def test_extract_unique_labels(self):
        labels = [[[0, 1, 'LOC']], [[3, 4, 'ORG']]]

        actual = SequenceLabelingStorage.extract_unique_labels(labels)

        self.assertCountEqual(actual, ['LOC', 'ORG'])

    def test_make_annotations(self):
        docs = [Document(text='a', id=1), Document(text='b', id=2)]
        labels = [[[0, 1, 'LOC']], [[3, 4, 'ORG']]]
        saved_labels = {'LOC': Label(text='LOC', id=1), 'ORG': Label(text='ORG', id=2)}

        actual = SequenceLabelingStorage.make_annotations(docs, labels, saved_labels)

        self.assertEqual(actual, [
            {'document': 1, 'label': 1, 'start_offset': 0, 'end_offset': 1},
            {'document': 2, 'label': 2, 'start_offset': 3, 'end_offset': 4},
        ])


class TestSeq2seqStorage(TestCase):
    def test_make_annotations(self):
        docs = [Document(text='a', id=1), Document(text='b', id=2)]
        labels = [['Hello!'], ['How are you?', "What's up?"]]

        actual = Seq2seqStorage.make_annotations(docs, labels)

        self.assertEqual(actual, [
            {'document': 1, 'text': 'Hello!'},
            {'document': 2, 'text': 'How are you?'},
            {'document': 2, 'text': "What's up?"},
        ])


class TestCoNLLParser(TestCase):
    def test_calc_char_offset(self):
        words = ['EU', 'rejects', 'German', 'call']
        tags = ['B-ORG', 'O', 'B-MISC', 'O']

        entities = get_entities(tags)
        actual = CoNLLParser.calc_char_offset(words, tags)

        self.assertEqual(entities, [('ORG', 0, 0), ('MISC', 2, 2)])

        self.assertEqual(actual, {
            'text': 'EU rejects German call',
            'labels': [[0, 2, 'ORG'], [11, 17, 'MISC']]
        })


class TestIterableToIO(TestCase):
    def test(self):
        def iterable():
            yield b'fo'
            yield b'o\nbar\n'
            yield b'baz\nrest'

        stream = iterable_to_io(iterable())
        stream = io.TextIOWrapper(stream)

        self.assertEqual(stream.readlines(), ['foo\n', 'bar\n', 'baz\n', 'rest'])