You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
5.9 KiB

3 years ago
3 years ago
3 years ago
  1. from django.conf import settings
  2. from django.shortcuts import get_object_or_404, redirect
  3. from libcloud import DriverType, get_driver
  4. from libcloud.storage.types import (ContainerDoesNotExistError,
  5. ObjectDoesNotExistError)
  6. from rest_framework import status
  7. from rest_framework.exceptions import ParseError, ValidationError
  8. from rest_framework.parsers import MultiPartParser
  9. from rest_framework.permissions import IsAuthenticated
  10. from rest_framework.response import Response
  11. from rest_framework.views import APIView
  12. from rest_framework_csv.renderers import CSVRenderer
  13. from ..models import Project
  14. from ..permissions import IsProjectAdmin
  15. from ..utils import (AudioParser, CoNLLParser, CSVPainter, CSVParser,
  16. ExcelParser, FastTextPainter, FastTextParser,
  17. JSONLRenderer, JSONPainter, JSONParser, PlainTextParser,
  18. PlainTextRenderer, iterable_to_io)
  19. class Features(APIView):
  20. permission_classes = (IsAuthenticated,)
  21. def get(self, request, *args, **kwargs):
  22. return Response({
  23. 'cloud_upload': bool(settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER),
  24. })
  25. class TextUploadAPI(APIView):
  26. parser_classes = (MultiPartParser,)
  27. permission_classes = [IsAuthenticated & IsProjectAdmin]
  28. def post(self, request, *args, **kwargs):
  29. if 'file' not in request.data:
  30. raise ParseError('Empty content')
  31. self.save_file(
  32. user=request.user,
  33. file=request.data['file'],
  34. file_format=request.data['format'],
  35. project_id=kwargs['project_id'],
  36. )
  37. return Response(status=status.HTTP_201_CREATED)
  38. @classmethod
  39. def save_file(cls, user, file, file_format, project_id):
  40. project = get_object_or_404(Project, pk=project_id)
  41. parser = cls.select_parser(file_format)
  42. data = parser.parse(file)
  43. storage = project.get_storage(data)
  44. storage.save(user)
  45. @classmethod
  46. def select_parser(cls, file_format):
  47. if file_format == 'plain':
  48. return PlainTextParser()
  49. elif file_format == 'csv':
  50. return CSVParser()
  51. elif file_format == 'json':
  52. return JSONParser()
  53. elif file_format == 'conll':
  54. return CoNLLParser()
  55. elif file_format == 'excel':
  56. return ExcelParser()
  57. elif file_format == 'audio':
  58. return AudioParser()
  59. elif file_format == 'fastText':
  60. return FastTextParser()
  61. else:
  62. raise ValidationError('format {} is invalid.'.format(file_format))
  63. class CloudUploadAPI(APIView):
  64. permission_classes = TextUploadAPI.permission_classes
  65. def get(self, request, *args, **kwargs):
  66. try:
  67. project_id = request.query_params['project_id']
  68. file_format = request.query_params['upload_format']
  69. cloud_container = request.query_params['container']
  70. cloud_object = request.query_params['object']
  71. except KeyError as ex:
  72. raise ValidationError('query parameter {} is missing'.format(ex))
  73. try:
  74. cloud_file = self.get_cloud_object_as_io(cloud_container, cloud_object)
  75. except ContainerDoesNotExistError:
  76. raise ValidationError('cloud container {} does not exist'.format(cloud_container))
  77. except ObjectDoesNotExistError:
  78. raise ValidationError('cloud object {} does not exist'.format(cloud_object))
  79. TextUploadAPI.save_file(
  80. user=request.user,
  81. file=cloud_file,
  82. file_format=file_format,
  83. project_id=project_id,
  84. )
  85. next_url = request.query_params.get('next')
  86. if next_url == 'about:blank':
  87. return Response(data='', content_type='text/plain', status=status.HTTP_201_CREATED)
  88. if next_url:
  89. return redirect(next_url)
  90. return Response(status=status.HTTP_201_CREATED)
  91. @classmethod
  92. def get_cloud_object_as_io(cls, container_name, object_name):
  93. provider = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_PROVIDER.lower()
  94. account = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_ACCOUNT
  95. key = settings.CLOUD_BROWSER_APACHE_LIBCLOUD_SECRET_KEY
  96. driver = get_driver(DriverType.STORAGE, provider)
  97. client = driver(account, key)
  98. cloud_container = client.get_container(container_name)
  99. cloud_object = cloud_container.get_object(object_name)
  100. return iterable_to_io(cloud_object.as_stream())
  101. class TextDownloadAPI(APIView):
  102. permission_classes = TextUploadAPI.permission_classes
  103. renderer_classes = (CSVRenderer, JSONLRenderer, PlainTextRenderer)
  104. def get(self, request, *args, **kwargs):
  105. format = request.query_params.get('q')
  106. only_approved = request.query_params.get('onlyApproved')
  107. project = get_object_or_404(Project, pk=self.kwargs['project_id'])
  108. documents = (
  109. project.documents.exclude(annotations_approved_by = None)
  110. if only_approved == 'true'
  111. else project.documents.all()
  112. )
  113. painter = self.select_painter(format)
  114. # jsonl-textlabel format prints text labels while jsonl format prints annotations with label ids
  115. # jsonl-textlabel format - "labels": [[0, 15, "PERSON"], ..]
  116. # jsonl format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..]
  117. if format in ('jsonl', 'txt'):
  118. labels = project.labels.all()
  119. data = painter.paint_labels(documents, labels)
  120. else:
  121. data = painter.paint(documents)
  122. return Response(data)
  123. def select_painter(self, format):
  124. if format == 'csv':
  125. return CSVPainter()
  126. elif format == 'jsonl' or format == 'json':
  127. return JSONPainter()
  128. elif format == 'txt':
  129. return FastTextPainter()
  130. else:
  131. raise ValidationError('format {} is invalid.'.format(format))