diff --git a/app/api/tests/test_api.py b/app/api/tests/test_api.py index 3095f549..4ecaed8f 100644 --- a/app/api/tests/test_api.py +++ b/app/api/tests/test_api.py @@ -1617,6 +1617,11 @@ class TestDownloader(APITestCase): format='plain', expected_status=status.HTTP_400_BAD_REQUEST) + def test_can_download_classification_fasttext(self): + self.download_test_helper(url=self.classification_url, + format='txt', + expected_status=status.HTTP_200_OK) + class TestStatisticsAPI(APITestCase, TestUtilsMixin): diff --git a/app/api/utils.py b/app/api/utils.py index 977cba5a..7edda553 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -13,7 +13,7 @@ from django.db import transaction from django.conf import settings from colour import Color import pyexcel -from rest_framework.renderers import JSONRenderer +from rest_framework.renderers import JSONRenderer, BaseRenderer from seqeval.metrics.sequence_labeling import get_entities from .exceptions import FileParseException @@ -497,6 +497,44 @@ class JSONLRenderer(JSONRenderer): allow_nan=not self.strict) + '\n' +class FastTextPainter(object): + + @staticmethod + def paint_labels(documents, labels): + serializer = DocumentSerializer(documents, many=True) + serializer_labels = LabelSerializer(labels, many=True) + data = [] + for d in serializer.data: + labels = [] + for a in d['annotations']: + label_obj = [x for x in serializer_labels.data if x['id'] == a['label']][0] + labels.append('__label__{}'.format(label_obj['text'].replace(' ', '_'))) + text = d['text'].replace('\n', ' ') + if labels: + data.append('{} {}'.format(' '.join(labels), text)) + else: + data.append(text) + return data + + +class PlainTextRenderer(BaseRenderer): + media_type = 'text/plain' + format = 'txt' + charset = 'utf-8' + + def render(self, data, accepted_media_type=None, renderer_context=None): + if data is None: + return bytes() + + if not isinstance(data, list): + data = [data] + + buffer = io.BytesIO() + for d in data: + buffer.write((d + '\n').encode(self.charset)) + return buffer.getvalue() + + class JSONPainter(object): def paint(self, documents): diff --git a/app/api/views.py b/app/api/views.py index aaa8ae9d..a4f0707c 100644 --- a/app/api/views.py +++ b/app/api/views.py @@ -24,9 +24,9 @@ from .models import Project, Label, Document, RoleMapping, Role, Comment from .permissions import IsProjectAdmin, IsAnnotatorAndReadOnly, IsAnnotator, IsAnnotationApproverAndReadOnly, IsOwnAnnotation, IsAnnotationApprover, IsOwnComment from .serializers import ProjectSerializer, LabelSerializer, DocumentSerializer, UserSerializer, ApproverSerializer, CommentSerializer from .serializers import ProjectPolymorphicSerializer, RoleMappingSerializer, RoleSerializer -from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, CoNLLParser, AudioParser, FastTextParser, iterable_to_io -from .utils import JSONLRenderer -from .utils import JSONPainter, CSVPainter +from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, FastTextParser, CoNLLParser, AudioParser, iterable_to_io +from .utils import JSONLRenderer, PlainTextRenderer +from .utils import JSONPainter, CSVPainter, FastTextPainter IsInProjectReadOnlyOrAdmin = (IsAnnotatorAndReadOnly | IsAnnotationApproverAndReadOnly | IsProjectAdmin) IsInProjectOrAdmin = (IsAnnotator | IsAnnotationApprover | IsProjectAdmin) @@ -391,7 +391,7 @@ class CloudUploadAPI(APIView): class TextDownloadAPI(APIView): permission_classes = TextUploadAPI.permission_classes - renderer_classes = (CSVRenderer, JSONLRenderer) + renderer_classes = (CSVRenderer, JSONLRenderer, PlainTextRenderer) def get(self, request, *args, **kwargs): format = request.query_params.get('q') @@ -407,9 +407,9 @@ class TextDownloadAPI(APIView): # jsonl-textlabel format prints text labels while jsonl format prints annotations with label ids # jsonl-textlabel format - "labels": [[0, 15, "PERSON"], ..] # jsonl format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..] - if format == 'jsonl': + if format in ('jsonl', 'txt'): labels = project.labels.all() - data = JSONPainter.paint_labels(documents, labels) + data = painter.paint_labels(documents, labels) else: data = painter.paint(documents) return Response(data) @@ -419,6 +419,8 @@ class TextDownloadAPI(APIView): return CSVPainter() elif format == 'jsonl' or format == 'json': return JSONPainter() + elif format == 'txt': + return FastTextPainter() else: raise ValidationError('format {} is invalid.'.format(format)) diff --git a/frontend/components/organisms/documents/DocumentUploadForm.vue b/frontend/components/organisms/documents/DocumentUploadForm.vue index 623ae1d3..6159728a 100644 --- a/frontend/components/organisms/documents/DocumentUploadForm.vue +++ b/frontend/components/organisms/documents/DocumentUploadForm.vue @@ -18,7 +18,7 @@ type="error" dismissible > - {{ $t('errors.fileCannotUpload') }} + {{ $t('errors.fileCannotUpload') + errorMsg }}