diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 3095f549..4ecaed8f 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1617,6 +1617,11 @@ class TestDownloader(APITestCase): format='plain', expected_status=status.HTTP_400_BAD_REQUEST) + def test_can_download_classification_fasttext(self): + self.download_test_helper(url=self.classification_url, + format='txt', + expected_status=status.HTTP_200_OK) + class TestStatisticsAPI(APITestCase, TestUtilsMixin): diff --git a/app/api/utils.py b/app/api/utils.py index 977cba5a..7edda553 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -13,7 +13,7 @@ from django.db import transaction from django.conf import settings from colour import Color import pyexcel -from rest_framework.renderers import JSONRenderer +from rest_framework.renderers import JSONRenderer, BaseRenderer from seqeval.metrics.sequence_labeling import get_entities from .exceptions import FileParseException @@ -497,6 +497,44 @@ class JSONLRenderer(JSONRenderer): allow_nan=not self.strict) + '\n' +class FastTextPainter(object): + + @staticmethod + def paint_labels(documents, labels): + serializer = DocumentSerializer(documents, many=True) + serializer_labels = LabelSerializer(labels, many=True) + data = [] + for d in serializer.data: + labels = [] + for a in d['annotations']: + label_obj = [x for x in serializer_labels.data if x['id'] == a['label']][0] + labels.append('__label__{}'.format(label_obj['text'].replace(' ', '_'))) + text = d['text'].replace('\n', ' ') + if labels: + data.append('{} {}'.format(' '.join(labels), text)) + else: + data.append(text) + return data + + +class PlainTextRenderer(BaseRenderer): + media_type = 'text/plain' + format = 'txt' + charset = 'utf-8' + + def render(self, data, accepted_media_type=None, renderer_context=None): + if data is None: + return bytes() + + if not isinstance(data, list): + data = [data] + + buffer = io.BytesIO() + for d in data: + buffer.write((d + '\n').encode(self.charset)) + return buffer.getvalue() + + class JSONPainter(object): def paint(self, documents): diff --git a/app/api/views.py b/app/api/views.py index aaa8ae9d..a4f0707c 100644 --- a/app/api/views.py +++ b/app/api/views.py @@ -24,9 +24,9 @@ from .models import Project, Label, Document, RoleMapping, Role, Comment from .permissions import IsProjectAdmin, IsAnnotatorAndReadOnly, IsAnnotator, IsAnnotationApproverAndReadOnly, IsOwnAnnotation, IsAnnotationApprover, IsOwnComment from .serializers import ProjectSerializer, LabelSerializer, DocumentSerializer, UserSerializer, ApproverSerializer, CommentSerializer from .serializers import ProjectPolymorphicSerializer, RoleMappingSerializer, RoleSerializer -from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, CoNLLParser, AudioParser, FastTextParser, iterable_to_io -from .utils import JSONLRenderer -from .utils import JSONPainter, CSVPainter +from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, FastTextParser, CoNLLParser, AudioParser, iterable_to_io +from .utils import JSONLRenderer, PlainTextRenderer +from .utils import JSONPainter, CSVPainter, FastTextPainter IsInProjectReadOnlyOrAdmin = (IsAnnotatorAndReadOnly | IsAnnotationApproverAndReadOnly | IsProjectAdmin) IsInProjectOrAdmin = (IsAnnotator | IsAnnotationApprover | IsProjectAdmin) @@ -391,7 +391,7 @@ class CloudUploadAPI(APIView): class TextDownloadAPI(APIView): permission_classes = TextUploadAPI.permission_classes - renderer_classes = (CSVRenderer, JSONLRenderer) + renderer_classes = (CSVRenderer, JSONLRenderer, PlainTextRenderer) def get(self, request, *args, **kwargs): format = request.query_params.get('q') @@ -407,9 +407,9 @@ class TextDownloadAPI(APIView): # jsonl-textlabel format prints text labels while jsonl format prints annotations with label ids # jsonl-textlabel format - "labels": [[0, 15, "PERSON"], ..] # jsonl format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..] - if format == 'jsonl': + if format in ('jsonl', 'txt'): labels = project.labels.all() - data = JSONPainter.paint_labels(documents, labels) + data = painter.paint_labels(documents, labels) else: data = painter.paint(documents) return Response(data) @@ -419,6 +419,8 @@ class TextDownloadAPI(APIView): return CSVPainter() elif format == 'jsonl' or format == 'json': return JSONPainter() + elif format == 'txt': + return FastTextPainter() else: raise ValidationError('format {} is invalid.'.format(format)) diff --git a/frontend/components/organisms/documents/DocumentUploadForm.vue b/frontend/components/organisms/documents/DocumentUploadForm.vue index 623ae1d3..6159728a 100644 --- a/frontend/components/organisms/documents/DocumentUploadForm.vue +++ b/frontend/components/organisms/documents/DocumentUploadForm.vue @@ -18,7 +18,7 @@ type="error" dismissible > - {{ $t('errors.fileCannotUpload') }} + {{ $t('errors.fileCannotUpload') + errorMsg }}

{{ $t('dataset.importDataMessage1') }}

{{ $t('dataset.importDataMessage2') }} { - this.reset() - this.cancel() + this.errors = [] + const promises = [] + const id = this.$route.params.id + const type = this.selectedFormat.type + this.file.forEach((item) => { + promises.push({ + projectId: id, + format: type, + file: item }) - .catch(() => { + }) + let p = Promise.resolve() + promises.forEach((item) => { + p = p.then(() => this.uploadDocument(item)).catch(() => { + this.errors.push(item.file.name) this.showError = true }) + }) + p.finally(() => { + if (!this.errors.length) { + this.reset() + this.cancel() + } else { + this.errorMsg = this.errors.join(', ') + } + }) } } } diff --git a/frontend/i18n/en/projects/dataset.js b/frontend/i18n/en/projects/dataset.js index 2d7959df..b9930dab 100644 --- a/frontend/i18n/en/projects/dataset.js +++ b/frontend/i18n/en/projects/dataset.js @@ -9,7 +9,7 @@ export default { annotate: 'Annotate', importDataTitle: 'Upload Data', importDataMessage1: 'Select a file format', - importDataMessage2: 'Select a file', + importDataMessage2: 'Select file(s)', importDataPlaceholder: 'File input', exportDataTitle: 'Export Data', exportDataMessage: 'Select a file format', diff --git a/frontend/i18n/en/projects/errors.js b/frontend/i18n/en/projects/errors.js index f84d04a9..fcb93846 100644 --- a/frontend/i18n/en/projects/errors.js +++ b/frontend/i18n/en/projects/errors.js @@ -1,5 +1,5 @@ export default { - fileCannotUpload: 'The file could not be uploaded. Maybe invalid format.\n Please check available formats carefully.', + fileCannotUpload: 'The file(s) could not be uploaded. Maybe invalid format.\n Please check available formats and the following file(s): ', labelCannotCreate: 'The label could not be created.\n You cannot use the same label name or shortcut key.', invalidUserOrPass: 'Incorrect username or password, or something went wrong.' } diff --git a/frontend/rules/index.js b/frontend/rules/index.js index c7fd3e8b..edaad7e8 100644 --- a/frontend/rules/index.js +++ b/frontend/rules/index.js @@ -57,7 +57,7 @@ export const fileFormatRules = (msg) => { export const uploadFileRules = (msg) => { return [ v => !!v || msg.fileRequired, - v => !v || v.size < 100000000 || msg.fileLessThan1MB + v => !v || v.some(file => file.size < 100000000) || msg.fileLessThan1MB ] } diff --git a/frontend/services/document.service.js b/frontend/services/document.service.js index efe1c0af..6eaa0503 100644 --- a/frontend/services/document.service.js +++ b/frontend/services/document.service.js @@ -34,6 +34,9 @@ class DocumentService { if (format === 'csv') { headers.Accept = 'text/csv; charset=utf-8' headers['Content-Type'] = 'text/csv; charset=utf-8' + } else if (format === 'txt') { + headers.Accept = 'text/plain; charset=utf-8' + headers['Content-Type'] = 'text/plain; charset=utf-8' } else { headers.Accept = 'application/json' headers['Content-Type'] = 'application/json' diff --git a/frontend/store/projects.js b/frontend/store/projects.js index 5622be20..48d057c0 100644 --- a/frontend/store/projects.js +++ b/frontend/store/projects.js @@ -180,6 +180,11 @@ export const getters = { text: 'JSONL(Text label)', suffix: 'jsonl' } + const fastText = { + type: 'txt', + text: 'FastText', + suffix: 'txt' + } if (state.current.project_type === 'DocumentClassification') { json.examples = [ '{"id": 1, "text": "Terrible customer service.", "annotations": [{"id": 1, "label": 1, "user": 1}]}\n', @@ -192,9 +197,14 @@ export const getters = { '2,"Really great transaction.",2,1\n', '3,"Great price.",2,1' ] + fastText.examples = [ + '__label__pet dog cat \n', + '__label__car VW BMW' + ] return [ csv, - json + json, + fastText ] } else if (state.current.project_type === 'SequenceLabeling') { json.examples = [