Browse Source

Merge branch 'master' into enhancement/azure-pipelines

pull/208/head
Hiroki Nakayama 5 years ago
committed by GitHub
parent
commit
c0d9895340
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 329 additions and 52 deletions
  1. 57
      README.md
  2. 2
      app/server/models.py
  3. 17
      app/server/serializers.py
  4. 2
      app/server/static/js/label.vue
  5. 1
      app/server/tests/data/classification.jsonl
  6. 40
      app/server/tests/test_api.py
  7. 10
      app/server/tests/test_config.py
  8. 22
      app/server/tests/test_utils.py
  9. 107
      app/server/utils.py
  10. 10
      app/server/webpack.config.js
  11. 20
      azuredeploy.json
  12. 42
      docker-compose.yml
  13. 33
      tools/dev-django.sh
  14. 18
      tools/dev-webpack.sh

57
README.md

@ -81,9 +81,9 @@ git clone https://github.com/chakki-works/doccano.git
cd doccano
```
To install doccano, there are two options:
To install doccano, there are three options:
**Option1: Pull the Docker image**
**Option1: Pull the production Docker image**
```bash
docker pull chakkiworks/doccano
@ -108,37 +108,10 @@ npm run build
# npm start # for developers
```
Next we need to make migration. Run the following command:
**Option3: Pull the development Docker-Compose images**
```bash
python manage.py migrate
```
Next we need to create a user who can login to the admin site. Run the following command:
```bash
python manage.py createsuperuser
```
Enter your desired username and press enter.
```bash
Username: admin
```
You will then be prompted for your desired email address:
```bash
Email address: admin@example.com
```
The final step is to enter your password. You will be asked to enter your password twice, the second time as a confirmation of the first.
```bash
Password: **********
Password (again): *********
Superuser created successfully.
docker-compose pull
```
## Usage
@ -165,10 +138,32 @@ docker exec doccano tools/create-admin.sh "admin" "admin@example.com" "password"
**Option2: Running Django development server**
Before running, we need to make migration. Run the following command:
```bash
python manage.py migrate
```
Next we need to create a user who can login to the admin site. Run the following command:
```bash
python manage.py create_admin --noinput --username "admin" --email "admin@example.com" --password "password"
```
Finally, to start the server, run the following command:
```bash
python manage.py runserver
```
**Option3: Running the development Docker-Compose stack**
We can use docker-compose to set up the webpack server, django server, database, etc. all in one command:
```bash
docker-compose up
```
Now, open a Web browser and go to <http://127.0.0.1:8000/login/>. You should see the login screen:
<img src="./docs/login_form.png" alt="Login Form" width=400>

2
app/server/models.py

@ -142,7 +142,7 @@ class Label(models.Model):
('shift', 'shift'),
('ctrl shift', 'ctrl shift')
)
SUFFIX_KEYS = (
SUFFIX_KEYS = tuple(
(c, c) for c in string.ascii_lowercase
)

17
app/server/serializers.py

@ -34,12 +34,17 @@ class LabelSerializer(serializers.ModelSerializer):
raise ValidationError('Shortcut key may not have a suffix key.')
# Don't allow to save same shortcut key when prefix_key is null.
context = self.context['request'].parser_context
project_id = context['kwargs'].get('project_id')
if Label.objects.filter(suffix_key=suffix_key,
prefix_key__isnull=True,
project=project_id).exists():
raise ValidationError('Duplicate key.')
try:
context = self.context['request'].parser_context
project_id = context['kwargs']['project_id']
except (AttributeError, KeyError):
pass # unit tests don't always have the correct context set up
else:
if Label.objects.filter(suffix_key=suffix_key,
prefix_key__isnull=True,
project=project_id).exists():
raise ValidationError('Duplicate key.')
return super().validate(attrs)
class Meta:

2
app/server/static/js/label.vue

@ -203,7 +203,7 @@ export default {
methods: {
generateColor() {
const color = (Math.random() * 0xFFFFFF | 0).toString(16); // eslint-disable-line no-bitwise
const color = Math.floor(Math.random() * 0xFFFFFF).toString(16);
const randomColor = '#' + ('000000' + color).slice(-6);
return randomColor;
},

1
app/server/tests/data/classification.jsonl

@ -1,3 +1,4 @@
{"text": "example", "labels": ["positive"], "meta": {"wikiPageID": 1}}
{"text": "example", "labels": ["positive", "negative"], "meta": {"wikiPageID": 2}}
{"text": "example", "labels": ["negative"], "meta": {"wikiPageID": 3}}
{"text": "example", "labels": ["neutral"], "meta": {"wikiPageID": 4}}

40
app/server/tests/test_api.py

@ -682,7 +682,9 @@ class TestUploader(APITestCase):
users=[super_user], project_type=SEQUENCE_LABELING)
cls.seq2seq_project = mommy.make('server.Seq2seqProject', users=[super_user], project_type=SEQ2SEQ)
cls.classification_url = reverse(viewname='doc_uploader', args=[cls.classification_project.id])
cls.classification_labels_url = reverse(viewname='label_list', args=[cls.classification_project.id])
cls.labeling_url = reverse(viewname='doc_uploader', args=[cls.labeling_project.id])
cls.labeling_labels_url = reverse(viewname='label_list', args=[cls.labeling_project.id])
cls.seq2seq_url = reverse(viewname='doc_uploader', args=[cls.seq2seq_project.id])
def setUp(self):
@ -694,6 +696,20 @@ class TestUploader(APITestCase):
response = self.client.post(url, data={'file': f, 'format': format})
self.assertEqual(response.status_code, expected_status)
def label_test_helper(self, url, expected_labels, expected_label_keys):
expected_keys = {key for label in expected_labels for key in label}
response = self.client.get(url).json()
actual_labels = [{key: value for (key, value) in label.items() if key in expected_keys}
for label in response]
self.assertCountEqual(actual_labels, expected_labels)
for label in response:
for expected_label_key in expected_label_keys:
self.assertIsNotNone(label.get(expected_label_key))
def test_can_upload_conll_format_file(self):
self.upload_test_helper(url=self.labeling_url,
filename='labeling.conll',
@ -736,12 +752,36 @@ class TestUploader(APITestCase):
format='json',
expected_status=status.HTTP_201_CREATED)
self.label_test_helper(
url=self.classification_labels_url,
expected_labels=[
{'text': 'positive', 'suffix_key': 'p', 'prefix_key': None},
{'text': 'negative', 'suffix_key': 'n', 'prefix_key': None},
{'text': 'neutral', 'suffix_key': 'n', 'prefix_key': 'ctrl'},
],
expected_label_keys=[
'background_color',
'text_color',
])
def test_can_upload_labeling_jsonl(self):
self.upload_test_helper(url=self.labeling_url,
filename='labeling.jsonl',
format='json',
expected_status=status.HTTP_201_CREATED)
self.label_test_helper(
url=self.labeling_labels_url,
expected_labels=[
{'text': 'LOC', 'suffix_key': 'l', 'prefix_key': None},
{'text': 'ORG', 'suffix_key': 'o', 'prefix_key': None},
{'text': 'PER', 'suffix_key': 'p', 'prefix_key': None},
],
expected_label_keys=[
'background_color',
'text_color',
])
def test_can_upload_seq2seq_jsonl(self):
self.upload_test_helper(url=self.seq2seq_url,
filename='seq2seq.jsonl',

10
app/server/tests/test_config.py

@ -24,15 +24,23 @@ class TestDatabaseUrl(TestCase):
with setenv('DATABASE_URL', 'pgsql://u:p@h/d?sslmode=require'):
self._assert_sslmode_is('require')
def test_database_url_with_complex_user(self):
with setenv('DATABASE_URL', 'pgsql://user%40host:p@h/d'):
self._assert_user_is('user@host')
def _assert_sslmode_is(self, expected):
reload(settings)
actual = settings.DATABASES['default'].get('OPTIONS', {}).get('sslmode')
self.assertEqual(actual, expected)
def _assert_user_is(self, expected):
actual = settings.DATABASES['default'].get('USER', '')
self.assertEqual(actual, expected)
@contextmanager
def setenv(key, value):
environ[key] = value
reload(settings)
yield
del environ[key]

22
app/server/tests/test_utils.py

@ -0,0 +1,22 @@
from django.test import TestCase
from server.utils import Color
class TestColor(TestCase):
def test_random_color(self):
color = Color.random()
self.assertTrue(0 <= color.red <= 255)
self.assertTrue(0 <= color.green <= 255)
self.assertTrue(0 <= color.blue <= 255)
def test_hex(self):
color = Color(red=255, green=192, blue=203)
self.assertEqual(color.hex, '#ffc0cb')
def test_contrast_color(self):
color = Color(red=255, green=192, blue=203)
self.assertEqual(color.contrast_color.hex, '#000000')
color = Color(red=199, green=21, blue=133)
self.assertEqual(color.contrast_color.hex, '#ffffff')

107
app/server/utils.py

@ -4,6 +4,8 @@ import itertools
import json
import re
from collections import defaultdict
from math import floor
from random import Random
from django.db import transaction
from rest_framework.renderers import JSONRenderer
@ -74,16 +76,64 @@ class BaseStorage(object):
"""
return [label for label in labels if label not in created]
def to_serializer_format(self, labels):
"""Exclude created labels.
@classmethod
def to_serializer_format(cls, labels, created):
"""Convert a label to model dictionary.
Also assigns shortkeys for each label that don't clash with existing
label shortkeys.
Example:
>>> labels = ["positive"]
>>> self.to_serializer_format(labels)
[{"text": "negative"}]
```
>>> created = {}
>>> BaseStorage.to_serializer_format(labels, created)
[{"text": "positive", "suffix_key": "p", "prefix_key": None}]
"""
return [{'text': label} for label in labels]
existing_shortkeys = {(label.suffix_key, label.prefix_key)
for label in created.values()}
serializer_labels = []
for label in sorted(labels):
serializer_label = {'text': label}
shortkey = cls.get_shortkey(label, existing_shortkeys)
if shortkey:
serializer_label['suffix_key'] = shortkey[0]
serializer_label['prefix_key'] = shortkey[1]
existing_shortkeys.add(shortkey)
color = Color.random()
serializer_label['background_color'] = color.hex
serializer_label['text_color'] = color.contrast_color.hex
serializer_labels.append(serializer_label)
return serializer_labels
@classmethod
def get_shortkey(cls, label, existing_shortkeys):
"""Find the first non existing shortkey for the label.
Example without existing shortkey:
>>> BaseStorage.get_shortkey("positive", set())
("p", None)
Example with existing shortkey:
>>> BaseStorage.get_shortkey("positive", {("p", None)})
("p", "ctrl")
"""
model_prefix_keys = [key for (key, _) in Label.PREFIX_KEYS]
prefix_keys = [None] + model_prefix_keys
model_suffix_keys = {key for (key, _) in Label.SUFFIX_KEYS}
suffix_keys = [key for key in label.lower() if key in model_suffix_keys]
for shortkey in itertools.product(suffix_keys, prefix_keys):
if shortkey not in existing_shortkeys:
return shortkey
return None
def update_saved_labels(self, saved, new):
"""Update saved labels.
@ -120,7 +170,7 @@ class ClassificationStorage(BaseStorage):
labels = self.extract_label(data)
unique_labels = self.extract_unique_labels(labels)
unique_labels = self.exclude_created_labels(unique_labels, saved_labels)
unique_labels = self.to_serializer_format(unique_labels)
unique_labels = self.to_serializer_format(unique_labels, saved_labels)
new_labels = self.save_label(unique_labels)
saved_labels = self.update_saved_labels(saved_labels, new_labels)
annotations = self.make_annotations(docs, labels, saved_labels)
@ -170,7 +220,7 @@ class SequenceLabelingStorage(BaseStorage):
labels = self.extract_label(data)
unique_labels = self.extract_unique_labels(labels)
unique_labels = self.exclude_created_labels(unique_labels, saved_labels)
unique_labels = self.to_serializer_format(unique_labels)
unique_labels = self.to_serializer_format(unique_labels, saved_labels)
new_labels = self.save_label(unique_labels)
saved_labels = self.update_saved_labels(saved_labels, new_labels)
annotations = self.make_annotations(docs, labels, saved_labels)
@ -444,3 +494,44 @@ class CSVPainter(JSONPainter):
for a in annotations:
res.append({**d, **a})
return res
class Color:
def __init__(self, red, green, blue):
self.red = red
self.green = green
self.blue = blue
@property
def contrast_color(self):
"""Generate black or white color.
Ensure that text and background color combinations provide
sufficient contrast when viewed by someone having color deficits or
when viewed on a black and white screen.
Algorithm from w3c:
* https://www.w3.org/TR/AERT/#color-contrast
"""
return Color.white() if self.brightness < 128 else Color.black()
@property
def brightness(self):
return ((self.red * 299) + (self.green * 587) + (self.blue * 114)) / 1000
@property
def hex(self):
return '#{:02x}{:02x}{:02x}'.format(self.red, self.green, self.blue)
@classmethod
def white(cls):
return cls(red=255, green=255, blue=255)
@classmethod
def black(cls):
return cls(red=0, green=0, blue=0)
@classmethod
def random(cls, seed=None):
rgb = Random(seed).choices(range(256), k=3)
return cls(*rgb)

10
app/server/webpack.config.js

@ -4,6 +4,9 @@ const VueLoaderPlugin = require('vue-loader/lib/plugin')
const devMode = process.env.DEBUG !== 'False';
const hotReload = process.env.HOT_RELOAD === '1';
const webpackHost = process.env.WEBPACK_HOST || '127.0.0.1';
const webpackPort = process.env.WEBPACK_PORT ? parseInt(process.env.WEBPACK_PORT, 10) : 8080;
const pollMillis = process.env.WEBPACK_POLL_MILLIS ? parseInt(process.env.WEBPACK_POLL_MILLIS, 10) : false;
module.exports = {
mode: devMode ? 'development' : 'production',
@ -27,16 +30,21 @@ module.exports = {
'download_text_classification': './static/js/download_text_classification.js',
},
output: {
publicPath: hotReload ? 'http://localhost:8080/' : '',
publicPath: hotReload ? `http://127.0.0.1:${webpackPort}/` : '',
path: __dirname + '/static/bundle',
filename: '[name].js'
},
devtool: devMode ? 'cheap-eval-source-map' : 'source-map',
devServer: {
port: webpackPort,
host: webpackHost,
hot: true,
quiet: false,
headers: { 'Access-Control-Allow-Origin': '*' }
},
watchOptions: {
poll: pollMillis,
},
module: {
rules: [
{

20
azuredeploy.json

@ -118,9 +118,12 @@
"databaseSkuTier": "GeneralPurpose",
"databaseSkuFamily": "Gen5",
"databaseSkuName": "[concat('GP_', variables('databaseSkuFamily'), '_', parameters('databaseCores'))]",
"databaseConnectionString": "[concat('pgsql://', parameters('adminUserName'), '@', variables('databaseServerName'), ':', parameters('adminPassword'), '@', variables('databaseServerName'), '.postgres.database.azure.com:5432/', parameters('databaseName'))]",
"databaseVersion": "9.6",
"databaseServerPort": 5432,
"databaseServerName": "[concat(parameters('appName'),'-state')]",
"databaseUserCredentials" : "[concat(uriComponent(concat(parameters('adminUserName'), '@', variables('databaseServerName'))), ':', parameters('adminPassword'))]",
"databaseFqdn" : "[concat( variables('databaseServerName'), '.postgres.database.azure.com:', variables('databaseServerPort'))]",
"databaseConnectionString": "[concat('pgsql://', variables('databaseUserCredentials'), '@', variables('databaseFqdn'), '/', parameters('databaseName'))]",
"setupScriptName": "[concat(parameters('appName'),'-setup')]",
"appServicePlanName": "[concat(parameters('appName'),'-hosting')]",
"analyticsName": "[concat(parameters('appName'),'-analytics')]",
@ -128,7 +131,8 @@
"password": "[parameters('dockerRegistryPassword')]",
"username": "[parameters('dockerRegistryUserName')]",
"server": "[parameters('dockerRegistry')]"
}
},
"appFqdn": "[concat(parameters('appName'),'.azurewebsites.net')]"
},
"resources": [
{
@ -308,5 +312,15 @@
"serverFarmId": "[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]"
}
}
]
],
"outputs": {
"appServer": {
"type": "string",
"value": "[concat(variables('appFqdn'))]"
},
"databaseServer": {
"type": "string",
"value": "[variables('databaseFqdn')]"
}
}
}

42
docker-compose.yml

@ -0,0 +1,42 @@
version: "3"
services:
django:
image: python:3.6
volumes:
- .:/src
- venv:/src/venv
command: ["/src/tools/dev-django.sh", "0.0.0.0:8000"]
environment:
ADMIN_USERNAME: "admin"
ADMIN_PASSWORD: "password"
ADMIN_EMAIL: "admin@example.com"
DATABASE_URL: "postgres://doccano:doccano@postgres:5432/doccano?sslmode=disable"
ports:
- 8000:8000
webpack:
image: node:8
volumes:
- .:/src
- node_modules:/src/app/server/node_modules
command: ["/src/tools/dev-webpack.sh"]
environment:
WEBPACK_HOST: "0.0.0.0"
WEBPACK_PORT: "8080"
WEBPACK_POLL_MILLIS: "1000"
ports:
- 8080:8080
postgres:
image: postgres:9.6
environment:
POSTGRES_USER: "doccano"
POSTGRES_PASSWORD: "doccano"
POSTGRES_DB: "doccano"
ports:
- 5432:5432
volumes:
node_modules:
venv:

33
tools/dev-django.sh

@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -o errexit
root="$(dirname "$0")/.."
app="${root}/app"
venv="${root}/venv"
if [[ ! -f "${venv}/bin/python" ]]; then
echo "Creating virtualenv"
mkdir -p "${venv}"
python3 -m venv "${venv}"
"${venv}/bin/pip" install --upgrade pip setuptools
fi
echo "Installing dependencies"
"${venv}/bin/pip" install -r "${root}/requirements.txt"
echo "Initializing database"
"${venv}/bin/python" "${app}/manage.py" wait_for_db
"${venv}/bin/python" "${app}/manage.py" migrate
if [[ -n "${ADMIN_USERNAME}" ]] && [[ -n "${ADMIN_PASSWORD}" ]] && [[ -n "${ADMIN_EMAIL}" ]]; then
"${venv}/bin/python" "${app}/manage.py" create_admin \
--username "${ADMIN_USERNAME}" \
--password "${ADMIN_PASSWORD}" \
--email "${ADMIN_EMAIL}" \
--noinput \
|| true
fi
echo "Starting django"
"${venv}/bin/python" -u "${app}/manage.py" runserver "$@"

18
tools/dev-webpack.sh

@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -o errexit
root="$(dirname "$0")/.."
server="${root}/app/server"
(
cd "${server}"
if [[ ! -d node_modules/.bin ]]; then
echo "Installing dependencies"
npm install
fi
echo "Starting webpack"
npm start
)
Loading…
Cancel
Save