You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.4 KiB

  1. import os
  2. import shutil
  3. import uuid
  4. from celery import shared_task
  5. from celery.utils.log import get_task_logger
  6. from django.conf import settings
  7. from django.shortcuts import get_object_or_404
  8. from .pipeline.dataset import Dataset
  9. from .pipeline.factories import create_formatter, create_labels, create_writer
  10. from .pipeline.services import ExportApplicationService
  11. from data_export.models import ExportedExample
  12. from projects.models import Member, Project
  13. logger = get_task_logger(__name__)
  14. def create_collaborative_dataset(project: Project, dirpath: str, confirmed_only: bool, formatters, writer):
  15. is_text_project = project.is_text_project
  16. if confirmed_only:
  17. examples = ExportedExample.objects.confirmed(project)
  18. else:
  19. examples = ExportedExample.objects.filter(project=project)
  20. labels = create_labels(project, examples)
  21. dataset = Dataset(examples, labels, is_text_project)
  22. service = ExportApplicationService(dataset, formatters, writer)
  23. filepath = os.path.join(dirpath, f"all.{writer.extension}")
  24. service.export(filepath)
  25. def create_individual_dataset(project: Project, dirpath: str, confirmed_only: bool, formatters, writer):
  26. is_text_project = project.is_text_project
  27. members = Member.objects.filter(project=project)
  28. for member in members:
  29. if confirmed_only:
  30. examples = ExportedExample.objects.confirmed(project, user=member.user)
  31. else:
  32. examples = ExportedExample.objects.filter(project=project)
  33. labels = create_labels(project, examples, member.user)
  34. dataset = Dataset(examples, labels, is_text_project)
  35. service = ExportApplicationService(dataset, formatters, writer)
  36. filepath = os.path.join(dirpath, f"{member.username}.{writer.extension}")
  37. service.export(filepath)
  38. @shared_task
  39. def export_dataset(project_id, file_format: str, confirmed_only=False):
  40. project = get_object_or_404(Project, pk=project_id)
  41. dirpath = os.path.join(settings.MEDIA_ROOT, str(uuid.uuid4()))
  42. os.makedirs(dirpath, exist_ok=True)
  43. formatters = create_formatter(project, file_format)
  44. writer = create_writer(file_format)
  45. if project.collaborative_annotation:
  46. create_collaborative_dataset(project, dirpath, confirmed_only, formatters, writer)
  47. else:
  48. create_individual_dataset(project, dirpath, confirmed_only, formatters, writer)
  49. zip_file = shutil.make_archive(dirpath, "zip", dirpath)
  50. shutil.rmtree(dirpath)
  51. return zip_file