Browse Source

Replace zip_files with shutil.make_archive

pull/1799/head
Hironsan 3 years ago
parent
commit
e83bc924f1
2 changed files with 15 additions and 24 deletions
  1. 23
      backend/data_export/celery_tasks.py
  2. 16
      backend/data_export/pipeline/writers.py

23
backend/data_export/celery_tasks.py

@ -1,4 +1,6 @@
import os
import shutil
import uuid
from celery import shared_task
from celery.utils.log import get_task_logger
@ -8,7 +10,6 @@ from django.shortcuts import get_object_or_404
from .pipeline.dataset import Dataset
from .pipeline.factories import create_formatter, create_labels, create_writer
from .pipeline.services import ExportApplicationService
from .pipeline.writers import remove_files, zip_files
from data_export.models import ExportedExample
from projects.models import Member, Project
@ -27,15 +28,22 @@ def create_collaborative_dataset(project: Project, file_format: str, confirmed_o
formatters = create_formatter(project, file_format)
writer = create_writer(file_format)
service = ExportApplicationService(dataset, formatters, writer)
filepath = os.path.join(settings.MEDIA_ROOT, f"all.{writer.extension}")
dirname = str(uuid.uuid4())
dirpath = os.path.join(settings.MEDIA_ROOT, dirname)
os.makedirs(dirpath, exist_ok=True)
filepath = os.path.join(dirpath, f"all.{writer.extension}")
service.export(filepath)
return filepath
zip_file = shutil.make_archive(dirpath, "zip", dirpath)
shutil.rmtree(dirpath)
return zip_file
def create_individual_dataset(project: Project, file_format: str, confirmed_only: bool):
files = []
members = Member.objects.filter(project=project)
is_text_project = project.is_text_project
dirname = str(uuid.uuid4())
dirpath = os.path.join(settings.MEDIA_ROOT, dirname)
os.makedirs(dirpath, exist_ok=True)
for member in members:
if confirmed_only:
examples = ExportedExample.objects.confirmed(project, user=member.user)
@ -47,11 +55,10 @@ def create_individual_dataset(project: Project, file_format: str, confirmed_only
formatters = create_formatter(project, file_format)
writer = create_writer(file_format)
service = ExportApplicationService(dataset, formatters, writer)
filepath = os.path.join(settings.MEDIA_ROOT, f"{member.username}.{writer.extension}")
filepath = os.path.join(dirpath, f"{member.username}.{writer.extension}")
service.export(filepath)
files.append(filepath)
zip_file = zip_files(files, settings.MEDIA_ROOT)
remove_files(files)
zip_file = shutil.make_archive(dirpath, "zip", dirpath)
shutil.rmtree(dirpath)
return zip_file

16
backend/data_export/pipeline/writers.py

@ -1,24 +1,8 @@
import abc
import os
import uuid
import zipfile
import pandas as pd
def zip_files(files, dirname):
save_file = os.path.join(dirname, f"{uuid.uuid4()}.zip")
with zipfile.ZipFile(save_file, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for file in files:
zf.write(filename=file, arcname=os.path.basename(file))
return save_file
def remove_files(files):
for file in files:
os.remove(file)
class Writer(abc.ABC):
extension = ""

Loading…
Cancel
Save