Browse Source

Merge branch 'master' into #f1175

# Conflicts:
#	frontend/rules/index.js
pull/1175/head
Hironsan 3 years ago
parent
commit
0520004de7
9 changed files with 97 additions and 22 deletions
  1. 5
      app/api/tests/test_api.py
  2. 40
      app/api/utils.py
  3. 14
      app/api/views.py
  4. 39
      frontend/components/organisms/documents/DocumentUploadForm.vue
  5. 2
      frontend/i18n/en/projects/dataset.js
  6. 2
      frontend/i18n/en/projects/errors.js
  7. 2
      frontend/rules/index.js
  8. 3
      frontend/services/document.service.js
  9. 12
      frontend/store/projects.js

5
app/api/tests/test_api.py

@ -1617,6 +1617,11 @@ class TestDownloader(APITestCase):
format='plain',
expected_status=status.HTTP_400_BAD_REQUEST)
def test_can_download_classification_fasttext(self):
self.download_test_helper(url=self.classification_url,
format='txt',
expected_status=status.HTTP_200_OK)
class TestStatisticsAPI(APITestCase, TestUtilsMixin):

40
app/api/utils.py

@ -13,7 +13,7 @@ from django.db import transaction
from django.conf import settings
from colour import Color
import pyexcel
from rest_framework.renderers import JSONRenderer
from rest_framework.renderers import JSONRenderer, BaseRenderer
from seqeval.metrics.sequence_labeling import get_entities
from .exceptions import FileParseException
@ -497,6 +497,44 @@ class JSONLRenderer(JSONRenderer):
allow_nan=not self.strict) + '\n'
class FastTextPainter(object):
@staticmethod
def paint_labels(documents, labels):
serializer = DocumentSerializer(documents, many=True)
serializer_labels = LabelSerializer(labels, many=True)
data = []
for d in serializer.data:
labels = []
for a in d['annotations']:
label_obj = [x for x in serializer_labels.data if x['id'] == a['label']][0]
labels.append('__label__{}'.format(label_obj['text'].replace(' ', '_')))
text = d['text'].replace('\n', ' ')
if labels:
data.append('{} {}'.format(' '.join(labels), text))
else:
data.append(text)
return data
class PlainTextRenderer(BaseRenderer):
media_type = 'text/plain'
format = 'txt'
charset = 'utf-8'
def render(self, data, accepted_media_type=None, renderer_context=None):
if data is None:
return bytes()
if not isinstance(data, list):
data = [data]
buffer = io.BytesIO()
for d in data:
buffer.write((d + '\n').encode(self.charset))
return buffer.getvalue()
class JSONPainter(object):
def paint(self, documents):

14
app/api/views.py

@ -24,9 +24,9 @@ from .models import Project, Label, Document, RoleMapping, Role, Comment
from .permissions import IsProjectAdmin, IsAnnotatorAndReadOnly, IsAnnotator, IsAnnotationApproverAndReadOnly, IsOwnAnnotation, IsAnnotationApprover, IsOwnComment
from .serializers import ProjectSerializer, LabelSerializer, DocumentSerializer, UserSerializer, ApproverSerializer, CommentSerializer
from .serializers import ProjectPolymorphicSerializer, RoleMappingSerializer, RoleSerializer
from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, CoNLLParser, AudioParser, FastTextParser, iterable_to_io
from .utils import JSONLRenderer
from .utils import JSONPainter, CSVPainter
from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, FastTextParser, CoNLLParser, AudioParser, iterable_to_io
from .utils import JSONLRenderer, PlainTextRenderer
from .utils import JSONPainter, CSVPainter, FastTextPainter
IsInProjectReadOnlyOrAdmin = (IsAnnotatorAndReadOnly | IsAnnotationApproverAndReadOnly | IsProjectAdmin)
IsInProjectOrAdmin = (IsAnnotator | IsAnnotationApprover | IsProjectAdmin)
@ -391,7 +391,7 @@ class CloudUploadAPI(APIView):
class TextDownloadAPI(APIView):
permission_classes = TextUploadAPI.permission_classes
renderer_classes = (CSVRenderer, JSONLRenderer)
renderer_classes = (CSVRenderer, JSONLRenderer, PlainTextRenderer)
def get(self, request, *args, **kwargs):
format = request.query_params.get('q')
@ -407,9 +407,9 @@ class TextDownloadAPI(APIView):
# jsonl-textlabel format prints text labels while jsonl format prints annotations with label ids
# jsonl-textlabel format - "labels": [[0, 15, "PERSON"], ..]
# jsonl format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..]
if format == 'jsonl':
if format in ('jsonl', 'txt'):
labels = project.labels.all()
data = JSONPainter.paint_labels(documents, labels)
data = painter.paint_labels(documents, labels)
else:
data = painter.paint(documents)
return Response(data)
@ -419,6 +419,8 @@ class TextDownloadAPI(APIView):
return CSVPainter()
elif format == 'jsonl' or format == 'json':
return JSONPainter()
elif format == 'txt':
return FastTextPainter()
else:
raise ValidationError('format {} is invalid.'.format(format))

39
frontend/components/organisms/documents/DocumentUploadForm.vue

@ -18,7 +18,7 @@
type="error"
dismissible
>
{{ $t('errors.fileCannotUpload') }}
{{ $t('errors.fileCannotUpload') + errorMsg }}
</v-alert>
<h2>{{ $t('dataset.importDataMessage1') }}</h2>
<v-radio-group
@ -45,6 +45,7 @@
<h2>{{ $t('dataset.importDataMessage2') }}</h2>
<v-file-input
v-model="file"
multiple
:accept="acceptType"
:rules="uploadFileRules($t('rules.uploadFileRules'))"
:label="$t('labels.filePlaceholder')"
@ -81,7 +82,9 @@ export default {
selectedFormat: null,
fileFormatRules,
uploadFileRules,
showError: false
showError: false,
errors: [],
errorMsg: ''
}
},
@ -107,18 +110,32 @@ export default {
},
create() {
if (this.validate()) {
this.uploadDocument({
projectId: this.$route.params.id,
format: this.selectedFormat.type,
file: this.file
})
.then((response) => {
this.reset()
this.cancel()
this.errors = []
const promises = []
const id = this.$route.params.id
const type = this.selectedFormat.type
this.file.forEach((item) => {
promises.push({
projectId: id,
format: type,
file: item
})
.catch(() => {
})
let p = Promise.resolve()
promises.forEach((item) => {
p = p.then(() => this.uploadDocument(item)).catch(() => {
this.errors.push(item.file.name)
this.showError = true
})
})
p.finally(() => {
if (!this.errors.length) {
this.reset()
this.cancel()
} else {
this.errorMsg = this.errors.join(', ')
}
})
}
}
}

2
frontend/i18n/en/projects/dataset.js

@ -9,7 +9,7 @@ export default {
annotate: 'Annotate',
importDataTitle: 'Upload Data',
importDataMessage1: 'Select a file format',
importDataMessage2: 'Select a file',
importDataMessage2: 'Select file(s)',
importDataPlaceholder: 'File input',
exportDataTitle: 'Export Data',
exportDataMessage: 'Select a file format',

2
frontend/i18n/en/projects/errors.js

@ -1,5 +1,5 @@
export default {
fileCannotUpload: 'The file could not be uploaded. Maybe invalid format.\n Please check available formats carefully.',
fileCannotUpload: 'The file(s) could not be uploaded. Maybe invalid format.\n Please check available formats and the following file(s): ',
labelCannotCreate: 'The label could not be created.\n You cannot use the same label name or shortcut key.',
invalidUserOrPass: 'Incorrect username or password, or something went wrong.'
}

2
frontend/rules/index.js

@ -57,7 +57,7 @@ export const fileFormatRules = (msg) => {
export const uploadFileRules = (msg) => {
return [
v => !!v || msg.fileRequired,
v => !v || v.size < 100000000 || msg.fileLessThan1MB
v => !v || v.some(file => file.size < 100000000) || msg.fileLessThan1MB
]
}

3
frontend/services/document.service.js

@ -34,6 +34,9 @@ class DocumentService {
if (format === 'csv') {
headers.Accept = 'text/csv; charset=utf-8'
headers['Content-Type'] = 'text/csv; charset=utf-8'
} else if (format === 'txt') {
headers.Accept = 'text/plain; charset=utf-8'
headers['Content-Type'] = 'text/plain; charset=utf-8'
} else {
headers.Accept = 'application/json'
headers['Content-Type'] = 'application/json'

12
frontend/store/projects.js

@ -180,6 +180,11 @@ export const getters = {
text: 'JSONL(Text label)',
suffix: 'jsonl'
}
const fastText = {
type: 'txt',
text: 'FastText',
suffix: 'txt'
}
if (state.current.project_type === 'DocumentClassification') {
json.examples = [
'{"id": 1, "text": "Terrible customer service.", "annotations": [{"id": 1, "label": 1, "user": 1}]}\n',
@ -192,9 +197,14 @@ export const getters = {
'2,"Really great transaction.",2,1\n',
'3,"Great price.",2,1'
]
fastText.examples = [
'__label__pet dog cat \n',
'__label__car VW BMW'
]
return [
csv,
json
json,
fastText
]
} else if (state.current.project_type === 'SequenceLabeling') {
json.examples = [

Loading…
Cancel
Save