diff --git a/README.md b/README.md
index ec3dba48..93430988 100644
--- a/README.md
+++ b/README.md
@@ -81,9 +81,9 @@ git clone https://github.com/chakki-works/doccano.git
cd doccano
```
-To install doccano, there are two options:
+To install doccano, there are three options:
-**Option1: Pull the Docker image**
+**Option1: Pull the production Docker image**
```bash
docker pull chakkiworks/doccano
@@ -108,37 +108,10 @@ npm run build
# npm start # for developers
```
-Next we need to make migration. Run the following command:
+**Option3: Pull the development Docker-Compose images**
```bash
-python manage.py migrate
-```
-
-Next we need to create a user who can login to the admin site. Run the following command:
-
-
-```bash
-python manage.py createsuperuser
-```
-
-Enter your desired username and press enter.
-
-```bash
-Username: admin
-```
-
-You will then be prompted for your desired email address:
-
-```bash
-Email address: admin@example.com
-```
-
-The final step is to enter your password. You will be asked to enter your password twice, the second time as a confirmation of the first.
-
-```bash
-Password: **********
-Password (again): *********
-Superuser created successfully.
+docker-compose pull
```
## Usage
@@ -165,10 +138,32 @@ docker exec doccano tools/create-admin.sh "admin" "admin@example.com" "password"
**Option2: Running Django development server**
+Before running, we need to make migration. Run the following command:
+
+```bash
+python manage.py migrate
+```
+
+Next we need to create a user who can login to the admin site. Run the following command:
+
+```bash
+python manage.py create_admin --noinput --username "admin" --email "admin@example.com" --password "password"
+```
+
+Finally, to start the server, run the following command:
+
```bash
python manage.py runserver
```
+**Option3: Running the development Docker-Compose stack**
+
+We can use docker-compose to set up the webpack server, django server, database, etc. all in one command:
+
+```bash
+docker-compose up
+```
+
Now, open a Web browser and go to . You should see the login screen:
diff --git a/app/server/models.py b/app/server/models.py
index ac7f7365..6ff5d630 100644
--- a/app/server/models.py
+++ b/app/server/models.py
@@ -142,7 +142,7 @@ class Label(models.Model):
('shift', 'shift'),
('ctrl shift', 'ctrl shift')
)
- SUFFIX_KEYS = (
+ SUFFIX_KEYS = tuple(
(c, c) for c in string.ascii_lowercase
)
diff --git a/app/server/serializers.py b/app/server/serializers.py
index 78b57a75..8acd0aff 100644
--- a/app/server/serializers.py
+++ b/app/server/serializers.py
@@ -34,12 +34,17 @@ class LabelSerializer(serializers.ModelSerializer):
raise ValidationError('Shortcut key may not have a suffix key.')
# Don't allow to save same shortcut key when prefix_key is null.
- context = self.context['request'].parser_context
- project_id = context['kwargs'].get('project_id')
- if Label.objects.filter(suffix_key=suffix_key,
- prefix_key__isnull=True,
- project=project_id).exists():
- raise ValidationError('Duplicate key.')
+ try:
+ context = self.context['request'].parser_context
+ project_id = context['kwargs']['project_id']
+ except (AttributeError, KeyError):
+ pass # unit tests don't always have the correct context set up
+ else:
+ if Label.objects.filter(suffix_key=suffix_key,
+ prefix_key__isnull=True,
+ project=project_id).exists():
+ raise ValidationError('Duplicate key.')
+
return super().validate(attrs)
class Meta:
diff --git a/app/server/static/js/label.vue b/app/server/static/js/label.vue
index 5f01a336..fd36e8f5 100644
--- a/app/server/static/js/label.vue
+++ b/app/server/static/js/label.vue
@@ -203,7 +203,7 @@ export default {
methods: {
generateColor() {
- const color = (Math.random() * 0xFFFFFF | 0).toString(16); // eslint-disable-line no-bitwise
+ const color = Math.floor(Math.random() * 0xFFFFFF).toString(16);
const randomColor = '#' + ('000000' + color).slice(-6);
return randomColor;
},
diff --git a/app/server/tests/data/classification.jsonl b/app/server/tests/data/classification.jsonl
index db749e9f..13a16369 100644
--- a/app/server/tests/data/classification.jsonl
+++ b/app/server/tests/data/classification.jsonl
@@ -1,3 +1,4 @@
{"text": "example", "labels": ["positive"], "meta": {"wikiPageID": 1}}
{"text": "example", "labels": ["positive", "negative"], "meta": {"wikiPageID": 2}}
{"text": "example", "labels": ["negative"], "meta": {"wikiPageID": 3}}
+{"text": "example", "labels": ["neutral"], "meta": {"wikiPageID": 4}}
diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py
index e8d80a51..3177b5c9 100644
--- a/app/server/tests/test_api.py
+++ b/app/server/tests/test_api.py
@@ -682,7 +682,9 @@ class TestUploader(APITestCase):
users=[super_user], project_type=SEQUENCE_LABELING)
cls.seq2seq_project = mommy.make('server.Seq2seqProject', users=[super_user], project_type=SEQ2SEQ)
cls.classification_url = reverse(viewname='doc_uploader', args=[cls.classification_project.id])
+ cls.classification_labels_url = reverse(viewname='label_list', args=[cls.classification_project.id])
cls.labeling_url = reverse(viewname='doc_uploader', args=[cls.labeling_project.id])
+ cls.labeling_labels_url = reverse(viewname='label_list', args=[cls.labeling_project.id])
cls.seq2seq_url = reverse(viewname='doc_uploader', args=[cls.seq2seq_project.id])
def setUp(self):
@@ -694,6 +696,20 @@ class TestUploader(APITestCase):
response = self.client.post(url, data={'file': f, 'format': format})
self.assertEqual(response.status_code, expected_status)
+ def label_test_helper(self, url, expected_labels, expected_label_keys):
+ expected_keys = {key for label in expected_labels for key in label}
+
+ response = self.client.get(url).json()
+
+ actual_labels = [{key: value for (key, value) in label.items() if key in expected_keys}
+ for label in response]
+
+ self.assertCountEqual(actual_labels, expected_labels)
+
+ for label in response:
+ for expected_label_key in expected_label_keys:
+ self.assertIsNotNone(label.get(expected_label_key))
+
def test_can_upload_conll_format_file(self):
self.upload_test_helper(url=self.labeling_url,
filename='labeling.conll',
@@ -736,12 +752,36 @@ class TestUploader(APITestCase):
format='json',
expected_status=status.HTTP_201_CREATED)
+ self.label_test_helper(
+ url=self.classification_labels_url,
+ expected_labels=[
+ {'text': 'positive', 'suffix_key': 'p', 'prefix_key': None},
+ {'text': 'negative', 'suffix_key': 'n', 'prefix_key': None},
+ {'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'},
+ ],
+ expected_label_keys=[
+ 'background_color',
+ 'text_color',
+ ])
+
def test_can_upload_labeling_jsonl(self):
self.upload_test_helper(url=self.labeling_url,
filename='labeling.jsonl',
format='json',
expected_status=status.HTTP_201_CREATED)
+ self.label_test_helper(
+ url=self.labeling_labels_url,
+ expected_labels=[
+ {'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None},
+ {'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None},
+ {'text': 'PER', 'suffix_key': 'p', 'prefix_key': None},
+ ],
+ expected_label_keys=[
+ 'background_color',
+ 'text_color',
+ ])
+
def test_can_upload_seq2seq_jsonl(self):
self.upload_test_helper(url=self.seq2seq_url,
filename='seq2seq.jsonl',
diff --git a/app/server/tests/test_config.py b/app/server/tests/test_config.py
index 2ba3a27d..14a68071 100644
--- a/app/server/tests/test_config.py
+++ b/app/server/tests/test_config.py
@@ -24,15 +24,23 @@ class TestDatabaseUrl(TestCase):
with setenv('DATABASE_URL', 'pgsql://u:p@h/d?sslmode=require'):
self._assert_sslmode_is('require')
+ def test_database_url_with_complex_user(self):
+ with setenv('DATABASE_URL', 'pgsql://user%40host:p@h/d'):
+ self._assert_user_is('user@host')
+
def _assert_sslmode_is(self, expected):
- reload(settings)
actual = settings.DATABASES['default'].get('OPTIONS', {}).get('sslmode')
self.assertEqual(actual, expected)
+ def _assert_user_is(self, expected):
+ actual = settings.DATABASES['default'].get('USER', '')
+ self.assertEqual(actual, expected)
+
@contextmanager
def setenv(key, value):
environ[key] = value
+ reload(settings)
yield
del environ[key]
diff --git a/app/server/tests/test_utils.py b/app/server/tests/test_utils.py
new file mode 100644
index 00000000..f6c6ac96
--- /dev/null
+++ b/app/server/tests/test_utils.py
@@ -0,0 +1,22 @@
+from django.test import TestCase
+
+from server.utils import Color
+
+
+class TestColor(TestCase):
+ def test_random_color(self):
+ color = Color.random()
+ self.assertTrue(0 <= color.red <= 255)
+ self.assertTrue(0 <= color.green <= 255)
+ self.assertTrue(0 <= color.blue <= 255)
+
+ def test_hex(self):
+ color = Color(red=255, green=192, blue=203)
+ self.assertEqual(color.hex, '#ffc0cb')
+
+ def test_contrast_color(self):
+ color = Color(red=255, green=192, blue=203)
+ self.assertEqual(color.contrast_color.hex, '#000000')
+
+ color = Color(red=199, green=21, blue=133)
+ self.assertEqual(color.contrast_color.hex, '#ffffff')
diff --git a/app/server/utils.py b/app/server/utils.py
index 439cc870..236d8880 100644
--- a/app/server/utils.py
+++ b/app/server/utils.py
@@ -4,6 +4,8 @@ import itertools
import json
import re
from collections import defaultdict
+from math import floor
+from random import Random
from django.db import transaction
from rest_framework.renderers import JSONRenderer
@@ -74,16 +76,64 @@ class BaseStorage(object):
"""
return [label for label in labels if label not in created]
- def to_serializer_format(self, labels):
- """Exclude created labels.
+ @classmethod
+ def to_serializer_format(cls, labels, created):
+ """Convert a label to model dictionary.
+
+ Also assigns shortkeys for each label that don't clash with existing
+ label shortkeys.
Example:
>>> labels = ["positive"]
- >>> self.to_serializer_format(labels)
- [{"text": "negative"}]
- ```
+ >>> created = {}
+ >>> BaseStorage.to_serializer_format(labels, created)
+ [{"text": "positive", "suffix_key": "p", "prefix_key": None}]
"""
- return [{'text': label} for label in labels]
+ existing_shortkeys = {(label.suffix_key, label.prefix_key)
+ for label in created.values()}
+
+ serializer_labels = []
+
+ for label in sorted(labels):
+ serializer_label = {'text': label}
+
+ shortkey = cls.get_shortkey(label, existing_shortkeys)
+ if shortkey:
+ serializer_label['suffix_key'] = shortkey[0]
+ serializer_label['prefix_key'] = shortkey[1]
+ existing_shortkeys.add(shortkey)
+
+ color = Color.random()
+ serializer_label['background_color'] = color.hex
+ serializer_label['text_color'] = color.contrast_color.hex
+
+ serializer_labels.append(serializer_label)
+
+ return serializer_labels
+
+ @classmethod
+ def get_shortkey(cls, label, existing_shortkeys):
+ """Find the first non existing shortkey for the label.
+
+ Example without existing shortkey:
+ >>> BaseStorage.get_shortkey("positive", set())
+ ("p", None)
+
+ Example with existing shortkey:
+ >>> BaseStorage.get_shortkey("positive", {("p", None)})
+ ("p", "ctrl")
+ """
+ model_prefix_keys = [key for (key, _) in Label.PREFIX_KEYS]
+ prefix_keys = [None] + model_prefix_keys
+
+ model_suffix_keys = {key for (key, _) in Label.SUFFIX_KEYS}
+ suffix_keys = [key for key in label.lower() if key in model_suffix_keys]
+
+ for shortkey in itertools.product(suffix_keys, prefix_keys):
+ if shortkey not in existing_shortkeys:
+ return shortkey
+
+ return None
def update_saved_labels(self, saved, new):
"""Update saved labels.
@@ -120,7 +170,7 @@ class ClassificationStorage(BaseStorage):
labels = self.extract_label(data)
unique_labels = self.extract_unique_labels(labels)
unique_labels = self.exclude_created_labels(unique_labels, saved_labels)
- unique_labels = self.to_serializer_format(unique_labels)
+ unique_labels = self.to_serializer_format(unique_labels, saved_labels)
new_labels = self.save_label(unique_labels)
saved_labels = self.update_saved_labels(saved_labels, new_labels)
annotations = self.make_annotations(docs, labels, saved_labels)
@@ -170,7 +220,7 @@ class SequenceLabelingStorage(BaseStorage):
labels = self.extract_label(data)
unique_labels = self.extract_unique_labels(labels)
unique_labels = self.exclude_created_labels(unique_labels, saved_labels)
- unique_labels = self.to_serializer_format(unique_labels)
+ unique_labels = self.to_serializer_format(unique_labels, saved_labels)
new_labels = self.save_label(unique_labels)
saved_labels = self.update_saved_labels(saved_labels, new_labels)
annotations = self.make_annotations(docs, labels, saved_labels)
@@ -444,3 +494,44 @@ class CSVPainter(JSONPainter):
for a in annotations:
res.append({**d, **a})
return res
+
+
+class Color:
+ def __init__(self, red, green, blue):
+ self.red = red
+ self.green = green
+ self.blue = blue
+
+ @property
+ def contrast_color(self):
+ """Generate black or white color.
+
+ Ensure that text and background color combinations provide
+ sufficient contrast when viewed by someone having color deficits or
+ when viewed on a black and white screen.
+
+ Algorithm from w3c:
+ * https://www.w3.org/TR/AERT/#color-contrast
+ """
+ return Color.white() if self.brightness < 128 else Color.black()
+
+ @property
+ def brightness(self):
+ return ((self.red * 299) + (self.green * 587) + (self.blue * 114)) / 1000
+
+ @property
+ def hex(self):
+ return '#{:02x}{:02x}{:02x}'.format(self.red, self.green, self.blue)
+
+ @classmethod
+ def white(cls):
+ return cls(red=255, green=255, blue=255)
+
+ @classmethod
+ def black(cls):
+ return cls(red=0, green=0, blue=0)
+
+ @classmethod
+ def random(cls, seed=None):
+ rgb = Random(seed).choices(range(256), k=3)
+ return cls(*rgb)
diff --git a/app/server/webpack.config.js b/app/server/webpack.config.js
index 7477d93a..8f260bcc 100644
--- a/app/server/webpack.config.js
+++ b/app/server/webpack.config.js
@@ -4,6 +4,9 @@ const VueLoaderPlugin = require('vue-loader/lib/plugin')
const devMode = process.env.DEBUG !== 'False';
const hotReload = process.env.HOT_RELOAD === '1';
+const webpackHost = process.env.WEBPACK_HOST || '127.0.0.1';
+const webpackPort = process.env.WEBPACK_PORT ? parseInt(process.env.WEBPACK_PORT, 10) : 8080;
+const pollMillis = process.env.WEBPACK_POLL_MILLIS ? parseInt(process.env.WEBPACK_POLL_MILLIS, 10) : false;
module.exports = {
mode: devMode ? 'development' : 'production',
@@ -27,16 +30,21 @@ module.exports = {
'download_text_classification': './static/js/download_text_classification.js',
},
output: {
- publicPath: hotReload ? 'http://localhost:8080/' : '',
+ publicPath: hotReload ? `http://127.0.0.1:${webpackPort}/` : '',
path: __dirname + '/static/bundle',
filename: '[name].js'
},
devtool: devMode ? 'cheap-eval-source-map' : 'source-map',
devServer: {
+ port: webpackPort,
+ host: webpackHost,
hot: true,
quiet: false,
headers: { 'Access-Control-Allow-Origin': '*' }
},
+ watchOptions: {
+ poll: pollMillis,
+ },
module: {
rules: [
{
diff --git a/azuredeploy.json b/azuredeploy.json
index fe286a6b..487ed3c3 100644
--- a/azuredeploy.json
+++ b/azuredeploy.json
@@ -118,9 +118,12 @@
"databaseSkuTier": "GeneralPurpose",
"databaseSkuFamily": "Gen5",
"databaseSkuName": "[concat('GP_', variables('databaseSkuFamily'), '_', parameters('databaseCores'))]",
- "databaseConnectionString": "[concat('pgsql://', parameters('adminUserName'), '@', variables('databaseServerName'), ':', parameters('adminPassword'), '@', variables('databaseServerName'), '.postgres.database.azure.com:5432/', parameters('databaseName'))]",
"databaseVersion": "9.6",
+ "databaseServerPort": 5432,
"databaseServerName": "[concat(parameters('appName'),'-state')]",
+ "databaseUserCredentials" : "[concat(uriComponent(concat(parameters('adminUserName'), '@', variables('databaseServerName'))), ':', parameters('adminPassword'))]",
+ "databaseFqdn" : "[concat( variables('databaseServerName'), '.postgres.database.azure.com:', variables('databaseServerPort'))]",
+ "databaseConnectionString": "[concat('pgsql://', variables('databaseUserCredentials'), '@', variables('databaseFqdn'), '/', parameters('databaseName'))]",
"setupScriptName": "[concat(parameters('appName'),'-setup')]",
"appServicePlanName": "[concat(parameters('appName'),'-hosting')]",
"analyticsName": "[concat(parameters('appName'),'-analytics')]",
@@ -128,7 +131,8 @@
"password": "[parameters('dockerRegistryPassword')]",
"username": "[parameters('dockerRegistryUserName')]",
"server": "[parameters('dockerRegistry')]"
- }
+ },
+ "appFqdn": "[concat(parameters('appName'),'.azurewebsites.net')]"
},
"resources": [
{
@@ -308,5 +312,15 @@
"serverFarmId": "[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]"
}
}
- ]
+ ],
+ "outputs": {
+ "appServer": {
+ "type": "string",
+ "value": "[concat(variables('appFqdn'))]"
+ },
+ "databaseServer": {
+ "type": "string",
+ "value": "[variables('databaseFqdn')]"
+ }
+ }
}
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..500e4239
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,42 @@
+version: "3"
+services:
+
+ django:
+ image: python:3.6
+ volumes:
+ - .:/src
+ - venv:/src/venv
+ command: ["/src/tools/dev-django.sh", "0.0.0.0:8000"]
+ environment:
+ ADMIN_USERNAME: "admin"
+ ADMIN_PASSWORD: "password"
+ ADMIN_EMAIL: "admin@example.com"
+ DATABASE_URL: "postgres://doccano:doccano@postgres:5432/doccano?sslmode=disable"
+ ports:
+ - 8000:8000
+
+ webpack:
+ image: node:8
+ volumes:
+ - .:/src
+ - node_modules:/src/app/server/node_modules
+ command: ["/src/tools/dev-webpack.sh"]
+ environment:
+ WEBPACK_HOST: "0.0.0.0"
+ WEBPACK_PORT: "8080"
+ WEBPACK_POLL_MILLIS: "1000"
+ ports:
+ - 8080:8080
+
+ postgres:
+ image: postgres:9.6
+ environment:
+ POSTGRES_USER: "doccano"
+ POSTGRES_PASSWORD: "doccano"
+ POSTGRES_DB: "doccano"
+ ports:
+ - 5432:5432
+
+volumes:
+ node_modules:
+ venv:
diff --git a/tools/dev-django.sh b/tools/dev-django.sh
new file mode 100755
index 00000000..a6d70df6
--- /dev/null
+++ b/tools/dev-django.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+set -o errexit
+
+root="$(dirname "$0")/.."
+app="${root}/app"
+venv="${root}/venv"
+
+if [[ ! -f "${venv}/bin/python" ]]; then
+ echo "Creating virtualenv"
+ mkdir -p "${venv}"
+ python3 -m venv "${venv}"
+ "${venv}/bin/pip" install --upgrade pip setuptools
+fi
+
+echo "Installing dependencies"
+"${venv}/bin/pip" install -r "${root}/requirements.txt"
+
+echo "Initializing database"
+"${venv}/bin/python" "${app}/manage.py" wait_for_db
+"${venv}/bin/python" "${app}/manage.py" migrate
+
+if [[ -n "${ADMIN_USERNAME}" ]] && [[ -n "${ADMIN_PASSWORD}" ]] && [[ -n "${ADMIN_EMAIL}" ]]; then
+ "${venv}/bin/python" "${app}/manage.py" create_admin \
+ --username "${ADMIN_USERNAME}" \
+ --password "${ADMIN_PASSWORD}" \
+ --email "${ADMIN_EMAIL}" \
+ --noinput \
+ || true
+fi
+
+echo "Starting django"
+"${venv}/bin/python" -u "${app}/manage.py" runserver "$@"
diff --git a/tools/dev-webpack.sh b/tools/dev-webpack.sh
new file mode 100755
index 00000000..fea3cf3e
--- /dev/null
+++ b/tools/dev-webpack.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -o errexit
+
+root="$(dirname "$0")/.."
+server="${root}/app/server"
+
+(
+ cd "${server}"
+
+ if [[ ! -d node_modules/.bin ]]; then
+ echo "Installing dependencies"
+ npm install
+ fi
+
+ echo "Starting webpack"
+ npm start
+)