diff --git a/README.md b/README.md index ec3dba48..93430988 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ git clone https://github.com/chakki-works/doccano.git cd doccano ``` -To install doccano, there are two options: +To install doccano, there are three options: -**Option1: Pull the Docker image** +**Option1: Pull the production Docker image** ```bash docker pull chakkiworks/doccano @@ -108,37 +108,10 @@ npm run build # npm start # for developers ``` -Next we need to make migration. Run the following command: +**Option3: Pull the development Docker-Compose images** ```bash -python manage.py migrate -``` - -Next we need to create a user who can login to the admin site. Run the following command: - - -```bash -python manage.py createsuperuser -``` - -Enter your desired username and press enter. - -```bash -Username: admin -``` - -You will then be prompted for your desired email address: - -```bash -Email address: admin@example.com -``` - -The final step is to enter your password. You will be asked to enter your password twice, the second time as a confirmation of the first. - -```bash -Password: ********** -Password (again): ********* -Superuser created successfully. +docker-compose pull ``` ## Usage @@ -165,10 +138,32 @@ docker exec doccano tools/create-admin.sh "admin" "admin@example.com" "password" **Option2: Running Django development server** +Before running, we need to make migration. Run the following command: + +```bash +python manage.py migrate +``` + +Next we need to create a user who can login to the admin site. Run the following command: + +```bash +python manage.py create_admin --noinput --username "admin" --email "admin@example.com" --password "password" +``` + +Finally, to start the server, run the following command: + ```bash python manage.py runserver ``` +**Option3: Running the development Docker-Compose stack** + +We can use docker-compose to set up the webpack server, django server, database, etc. all in one command: + +```bash +docker-compose up +``` + Now, open a Web browser and go to . You should see the login screen: Login Form diff --git a/app/server/models.py b/app/server/models.py index ac7f7365..6ff5d630 100644 --- a/app/server/models.py +++ b/app/server/models.py @@ -142,7 +142,7 @@ class Label(models.Model): ('shift', 'shift'), ('ctrl shift', 'ctrl shift') ) - SUFFIX_KEYS = ( + SUFFIX_KEYS = tuple( (c, c) for c in string.ascii_lowercase ) diff --git a/app/server/serializers.py b/app/server/serializers.py index 78b57a75..8acd0aff 100644 --- a/app/server/serializers.py +++ b/app/server/serializers.py @@ -34,12 +34,17 @@ class LabelSerializer(serializers.ModelSerializer): raise ValidationError('Shortcut key may not have a suffix key.') # Don't allow to save same shortcut key when prefix_key is null. - context = self.context['request'].parser_context - project_id = context['kwargs'].get('project_id') - if Label.objects.filter(suffix_key=suffix_key, - prefix_key__isnull=True, - project=project_id).exists(): - raise ValidationError('Duplicate key.') + try: + context = self.context['request'].parser_context + project_id = context['kwargs']['project_id'] + except (AttributeError, KeyError): + pass # unit tests don't always have the correct context set up + else: + if Label.objects.filter(suffix_key=suffix_key, + prefix_key__isnull=True, + project=project_id).exists(): + raise ValidationError('Duplicate key.') + return super().validate(attrs) class Meta: diff --git a/app/server/static/js/label.vue b/app/server/static/js/label.vue index 5f01a336..fd36e8f5 100644 --- a/app/server/static/js/label.vue +++ b/app/server/static/js/label.vue @@ -203,7 +203,7 @@ export default { methods: { generateColor() { - const color = (Math.random() * 0xFFFFFF | 0).toString(16); // eslint-disable-line no-bitwise + const color = Math.floor(Math.random() * 0xFFFFFF).toString(16); const randomColor = '#' + ('000000' + color).slice(-6); return randomColor; }, diff --git a/app/server/tests/data/classification.jsonl b/app/server/tests/data/classification.jsonl index db749e9f..13a16369 100644 --- a/app/server/tests/data/classification.jsonl +++ b/app/server/tests/data/classification.jsonl @@ -1,3 +1,4 @@ {"text": "example", "labels": ["positive"], "meta": {"wikiPageID": 1}} {"text": "example", "labels": ["positive", "negative"], "meta": {"wikiPageID": 2}} {"text": "example", "labels": ["negative"], "meta": {"wikiPageID": 3}} +{"text": "example", "labels": ["neutral"], "meta": {"wikiPageID": 4}} diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index e8d80a51..3177b5c9 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -682,7 +682,9 @@ class TestUploader(APITestCase): users=[super_user], project_type=SEQUENCE_LABELING) cls.seq2seq_project = mommy.make('server.Seq2seqProject', users=[super_user], project_type=SEQ2SEQ) cls.classification_url = reverse(viewname='doc_uploader', args=[cls.classification_project.id]) + cls.classification_labels_url = reverse(viewname='label_list', args=[cls.classification_project.id]) cls.labeling_url = reverse(viewname='doc_uploader', args=[cls.labeling_project.id]) + cls.labeling_labels_url = reverse(viewname='label_list', args=[cls.labeling_project.id]) cls.seq2seq_url = reverse(viewname='doc_uploader', args=[cls.seq2seq_project.id]) def setUp(self): @@ -694,6 +696,20 @@ class TestUploader(APITestCase): response = self.client.post(url, data={'file': f, 'format': format}) self.assertEqual(response.status_code, expected_status) + def label_test_helper(self, url, expected_labels, expected_label_keys): + expected_keys = {key for label in expected_labels for key in label} + + response = self.client.get(url).json() + + actual_labels = [{key: value for (key, value) in label.items() if key in expected_keys} + for label in response] + + self.assertCountEqual(actual_labels, expected_labels) + + for label in response: + for expected_label_key in expected_label_keys: + self.assertIsNotNone(label.get(expected_label_key)) + def test_can_upload_conll_format_file(self): self.upload_test_helper(url=self.labeling_url, filename='labeling.conll', @@ -736,12 +752,36 @@ class TestUploader(APITestCase): format='json', expected_status=status.HTTP_201_CREATED) + self.label_test_helper( + url=self.classification_labels_url, + expected_labels=[ + {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None}, + {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None}, + {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'}, + ], + expected_label_keys=[ + 'background_color', + 'text_color', + ]) + def test_can_upload_labeling_jsonl(self): self.upload_test_helper(url=self.labeling_url, filename='labeling.jsonl', format='json', expected_status=status.HTTP_201_CREATED) + self.label_test_helper( + url=self.labeling_labels_url, + expected_labels=[ + {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None}, + {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None}, + {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None}, + ], + expected_label_keys=[ + 'background_color', + 'text_color', + ]) + def test_can_upload_seq2seq_jsonl(self): self.upload_test_helper(url=self.seq2seq_url, filename='seq2seq.jsonl', diff --git a/app/server/tests/test_config.py b/app/server/tests/test_config.py index 2ba3a27d..14a68071 100644 --- a/app/server/tests/test_config.py +++ b/app/server/tests/test_config.py @@ -24,15 +24,23 @@ class TestDatabaseUrl(TestCase): with setenv('DATABASE_URL', 'pgsql://u:p@h/d?sslmode=require'): self._assert_sslmode_is('require') + def test_database_url_with_complex_user(self): + with setenv('DATABASE_URL', 'pgsql://user%40host:p@h/d'): + self._assert_user_is('user@host') + def _assert_sslmode_is(self, expected): - reload(settings) actual = settings.DATABASES['default'].get('OPTIONS', {}).get('sslmode') self.assertEqual(actual, expected) + def _assert_user_is(self, expected): + actual = settings.DATABASES['default'].get('USER', '') + self.assertEqual(actual, expected) + @contextmanager def setenv(key, value): environ[key] = value + reload(settings) yield del environ[key] diff --git a/app/server/tests/test_utils.py b/app/server/tests/test_utils.py new file mode 100644 index 00000000..f6c6ac96 --- /dev/null +++ b/app/server/tests/test_utils.py @@ -0,0 +1,22 @@ +from django.test import TestCase + +from server.utils import Color + + +class TestColor(TestCase): + def test_random_color(self): + color = Color.random() + self.assertTrue(0 <= color.red <= 255) + self.assertTrue(0 <= color.green <= 255) + self.assertTrue(0 <= color.blue <= 255) + + def test_hex(self): + color = Color(red=255, green=192, blue=203) + self.assertEqual(color.hex, '#ffc0cb') + + def test_contrast_color(self): + color = Color(red=255, green=192, blue=203) + self.assertEqual(color.contrast_color.hex, '#000000') + + color = Color(red=199, green=21, blue=133) + self.assertEqual(color.contrast_color.hex, '#ffffff') diff --git a/app/server/utils.py b/app/server/utils.py index 439cc870..236d8880 100644 --- a/app/server/utils.py +++ b/app/server/utils.py @@ -4,6 +4,8 @@ import itertools import json import re from collections import defaultdict +from math import floor +from random import Random from django.db import transaction from rest_framework.renderers import JSONRenderer @@ -74,16 +76,64 @@ class BaseStorage(object): """ return [label for label in labels if label not in created] - def to_serializer_format(self, labels): - """Exclude created labels. + @classmethod + def to_serializer_format(cls, labels, created): + """Convert a label to model dictionary. + + Also assigns shortkeys for each label that don't clash with existing + label shortkeys. Example: >>> labels = ["positive"] - >>> self.to_serializer_format(labels) - [{"text": "negative"}] - ``` + >>> created = {} + >>> BaseStorage.to_serializer_format(labels, created) + [{"text": "positive", "suffix_key": "p", "prefix_key": None}] """ - return [{'text': label} for label in labels] + existing_shortkeys = {(label.suffix_key, label.prefix_key) + for label in created.values()} + + serializer_labels = [] + + for label in sorted(labels): + serializer_label = {'text': label} + + shortkey = cls.get_shortkey(label, existing_shortkeys) + if shortkey: + serializer_label['suffix_key'] = shortkey[0] + serializer_label['prefix_key'] = shortkey[1] + existing_shortkeys.add(shortkey) + + color = Color.random() + serializer_label['background_color'] = color.hex + serializer_label['text_color'] = color.contrast_color.hex + + serializer_labels.append(serializer_label) + + return serializer_labels + + @classmethod + def get_shortkey(cls, label, existing_shortkeys): + """Find the first non existing shortkey for the label. + + Example without existing shortkey: + >>> BaseStorage.get_shortkey("positive", set()) + ("p", None) + + Example with existing shortkey: + >>> BaseStorage.get_shortkey("positive", {("p", None)}) + ("p", "ctrl") + """ + model_prefix_keys = [key for (key, _) in Label.PREFIX_KEYS] + prefix_keys = [None] + model_prefix_keys + + model_suffix_keys = {key for (key, _) in Label.SUFFIX_KEYS} + suffix_keys = [key for key in label.lower() if key in model_suffix_keys] + + for shortkey in itertools.product(suffix_keys, prefix_keys): + if shortkey not in existing_shortkeys: + return shortkey + + return None def update_saved_labels(self, saved, new): """Update saved labels. @@ -120,7 +170,7 @@ class ClassificationStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels) @@ -170,7 +220,7 @@ class SequenceLabelingStorage(BaseStorage): labels = self.extract_label(data) unique_labels = self.extract_unique_labels(labels) unique_labels = self.exclude_created_labels(unique_labels, saved_labels) - unique_labels = self.to_serializer_format(unique_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) new_labels = self.save_label(unique_labels) saved_labels = self.update_saved_labels(saved_labels, new_labels) annotations = self.make_annotations(docs, labels, saved_labels) @@ -444,3 +494,44 @@ class CSVPainter(JSONPainter): for a in annotations: res.append({**d, **a}) return res + + +class Color: + def __init__(self, red, green, blue): + self.red = red + self.green = green + self.blue = blue + + @property + def contrast_color(self): + """Generate black or white color. + + Ensure that text and background color combinations provide + sufficient contrast when viewed by someone having color deficits or + when viewed on a black and white screen. + + Algorithm from w3c: + * https://www.w3.org/TR/AERT/#color-contrast + """ + return Color.white() if self.brightness < 128 else Color.black() + + @property + def brightness(self): + return ((self.red * 299) + (self.green * 587) + (self.blue * 114)) / 1000 + + @property + def hex(self): + return '#{:02x}{:02x}{:02x}'.format(self.red, self.green, self.blue) + + @classmethod + def white(cls): + return cls(red=255, green=255, blue=255) + + @classmethod + def black(cls): + return cls(red=0, green=0, blue=0) + + @classmethod + def random(cls, seed=None): + rgb = Random(seed).choices(range(256), k=3) + return cls(*rgb) diff --git a/app/server/webpack.config.js b/app/server/webpack.config.js index 7477d93a..8f260bcc 100644 --- a/app/server/webpack.config.js +++ b/app/server/webpack.config.js @@ -4,6 +4,9 @@ const VueLoaderPlugin = require('vue-loader/lib/plugin') const devMode = process.env.DEBUG !== 'False'; const hotReload = process.env.HOT_RELOAD === '1'; +const webpackHost = process.env.WEBPACK_HOST || '127.0.0.1'; +const webpackPort = process.env.WEBPACK_PORT ? parseInt(process.env.WEBPACK_PORT, 10) : 8080; +const pollMillis = process.env.WEBPACK_POLL_MILLIS ? parseInt(process.env.WEBPACK_POLL_MILLIS, 10) : false; module.exports = { mode: devMode ? 'development' : 'production', @@ -27,16 +30,21 @@ module.exports = { 'download_text_classification': './static/js/download_text_classification.js', }, output: { - publicPath: hotReload ? 'http://localhost:8080/' : '', + publicPath: hotReload ? `http://127.0.0.1:${webpackPort}/` : '', path: __dirname + '/static/bundle', filename: '[name].js' }, devtool: devMode ? 'cheap-eval-source-map' : 'source-map', devServer: { + port: webpackPort, + host: webpackHost, hot: true, quiet: false, headers: { 'Access-Control-Allow-Origin': '*' } }, + watchOptions: { + poll: pollMillis, + }, module: { rules: [ { diff --git a/azuredeploy.json b/azuredeploy.json index fe286a6b..487ed3c3 100644 --- a/azuredeploy.json +++ b/azuredeploy.json @@ -118,9 +118,12 @@ "databaseSkuTier": "GeneralPurpose", "databaseSkuFamily": "Gen5", "databaseSkuName": "[concat('GP_', variables('databaseSkuFamily'), '_', parameters('databaseCores'))]", - "databaseConnectionString": "[concat('pgsql://', parameters('adminUserName'), '@', variables('databaseServerName'), ':', parameters('adminPassword'), '@', variables('databaseServerName'), '.postgres.database.azure.com:5432/', parameters('databaseName'))]", "databaseVersion": "9.6", + "databaseServerPort": 5432, "databaseServerName": "[concat(parameters('appName'),'-state')]", + "databaseUserCredentials" : "[concat(uriComponent(concat(parameters('adminUserName'), '@', variables('databaseServerName'))), ':', parameters('adminPassword'))]", + "databaseFqdn" : "[concat( variables('databaseServerName'), '.postgres.database.azure.com:', variables('databaseServerPort'))]", + "databaseConnectionString": "[concat('pgsql://', variables('databaseUserCredentials'), '@', variables('databaseFqdn'), '/', parameters('databaseName'))]", "setupScriptName": "[concat(parameters('appName'),'-setup')]", "appServicePlanName": "[concat(parameters('appName'),'-hosting')]", "analyticsName": "[concat(parameters('appName'),'-analytics')]", @@ -128,7 +131,8 @@ "password": "[parameters('dockerRegistryPassword')]", "username": "[parameters('dockerRegistryUserName')]", "server": "[parameters('dockerRegistry')]" - } + }, + "appFqdn": "[concat(parameters('appName'),'.azurewebsites.net')]" }, "resources": [ { @@ -308,5 +312,15 @@ "serverFarmId": "[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]" } } - ] + ], + "outputs": { + "appServer": { + "type": "string", + "value": "[concat(variables('appFqdn'))]" + }, + "databaseServer": { + "type": "string", + "value": "[variables('databaseFqdn')]" + } + } } diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..500e4239 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +version: "3" +services: + + django: + image: python:3.6 + volumes: + - .:/src + - venv:/src/venv + command: ["/src/tools/dev-django.sh", "0.0.0.0:8000"] + environment: + ADMIN_USERNAME: "admin" + ADMIN_PASSWORD: "password" + ADMIN_EMAIL: "admin@example.com" + DATABASE_URL: "postgres://doccano:doccano@postgres:5432/doccano?sslmode=disable" + ports: + - 8000:8000 + + webpack: + image: node:8 + volumes: + - .:/src + - node_modules:/src/app/server/node_modules + command: ["/src/tools/dev-webpack.sh"] + environment: + WEBPACK_HOST: "0.0.0.0" + WEBPACK_PORT: "8080" + WEBPACK_POLL_MILLIS: "1000" + ports: + - 8080:8080 + + postgres: + image: postgres:9.6 + environment: + POSTGRES_USER: "doccano" + POSTGRES_PASSWORD: "doccano" + POSTGRES_DB: "doccano" + ports: + - 5432:5432 + +volumes: + node_modules: + venv: diff --git a/tools/dev-django.sh b/tools/dev-django.sh new file mode 100755 index 00000000..a6d70df6 --- /dev/null +++ b/tools/dev-django.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -o errexit + +root="$(dirname "$0")/.." +app="${root}/app" +venv="${root}/venv" + +if [[ ! -f "${venv}/bin/python" ]]; then + echo "Creating virtualenv" + mkdir -p "${venv}" + python3 -m venv "${venv}" + "${venv}/bin/pip" install --upgrade pip setuptools +fi + +echo "Installing dependencies" +"${venv}/bin/pip" install -r "${root}/requirements.txt" + +echo "Initializing database" +"${venv}/bin/python" "${app}/manage.py" wait_for_db +"${venv}/bin/python" "${app}/manage.py" migrate + +if [[ -n "${ADMIN_USERNAME}" ]] && [[ -n "${ADMIN_PASSWORD}" ]] && [[ -n "${ADMIN_EMAIL}" ]]; then + "${venv}/bin/python" "${app}/manage.py" create_admin \ + --username "${ADMIN_USERNAME}" \ + --password "${ADMIN_PASSWORD}" \ + --email "${ADMIN_EMAIL}" \ + --noinput \ + || true +fi + +echo "Starting django" +"${venv}/bin/python" -u "${app}/manage.py" runserver "$@" diff --git a/tools/dev-webpack.sh b/tools/dev-webpack.sh new file mode 100755 index 00000000..fea3cf3e --- /dev/null +++ b/tools/dev-webpack.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -o errexit + +root="$(dirname "$0")/.." +server="${root}/app/server" + +( + cd "${server}" + + if [[ ! -d node_modules/.bin ]]; then + echo "Installing dependencies" + npm install + fi + + echo "Starting webpack" + npm start +)