117 lines
4.6 KiB

import os
import zipfile
import numpy as np
import pandas as pd
from django.test import TestCase, override_settings
from model_mommy import mommy
from pandas.testing import assert_frame_equal
from ..celery_tasks import export_dataset
from projects.models import DOCUMENT_CLASSIFICATION
from projects.tests.utils import prepare_project
def read_zip_content(file):
datasets = {}
with zipfile.ZipFile(file) as z:
for file in z.filelist:
username = file.filename.split(".")[0]
with z.open(file) as f:
try:
df = pd.read_csv(f)
except pd.errors.EmptyDataError:
continue
datasets[username] = df
return datasets
@override_settings(MEDIA_URL=os.path.dirname(__file__))
class TestExportTask(TestCase):
def prepare_data(self, collaborative=False):
self.project = prepare_project(DOCUMENT_CLASSIFICATION, collaborative_annotation=collaborative)
self.example1 = mommy.make("Example", project=self.project.item, text="confirmed")
self.example2 = mommy.make("Example", project=self.project.item, text="unconfirmed")
self.category1 = mommy.make("Category", example=self.example1, user=self.project.admin)
self.category2 = mommy.make("Category", example=self.example1, user=self.project.annotator)
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
def test_unconfirmed_and_non_collaborative(self):
self.prepare_data()
file = export_dataset(self.project.id, "CSV", False)
datasets = read_zip_content(file)
os.remove(file)
expected_datasets = {
self.project.admin.username: pd.DataFrame(
[
{"id": self.example1.id, "data": self.example1.text, "categories": self.category1.label.text},
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
]
),
self.project.approver.username: pd.DataFrame(
[
{"id": self.example1.id, "data": self.example1.text, "categories": np.nan},
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
]
),
self.project.annotator.username: pd.DataFrame(
[
{"id": self.example1.id, "data": self.example1.text, "categories": self.category2.label.text},
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
]
),
}
for username, dataset in expected_datasets.items():
assert_frame_equal(dataset, datasets[username])
def test_unconfirmed_and_collaborative(self):
self.prepare_data(collaborative=True)
file = export_dataset(self.project.id, "CSV", False)
dataset = pd.read_csv(file)
os.remove(file)
expected_dataset = pd.DataFrame(
[
{
"id": self.example1.id,
"data": self.example1.text,
"categories": "#".join(sorted([self.category1.label.text, self.category2.label.text])),
},
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
]
)
assert_frame_equal(dataset, expected_dataset)
def test_confirmed_and_non_collaborative(self):
self.prepare_data()
file = export_dataset(self.project.id, "CSV", True)
datasets = read_zip_content(file)
os.remove(file)
expected_datasets = {
self.project.admin.username: pd.DataFrame(
[
{
"id": self.example1.id,
"data": self.example1.text,
"categories": self.category1.label.text,
}
]
)
}
for username, dataset in expected_datasets.items():
assert_frame_equal(dataset, datasets[username])
def test_confirmed_and_collaborative(self):
self.prepare_data(collaborative=True)
file = export_dataset(self.project.id, "CSV", True)
dataset = pd.read_csv(file)
os.remove(file)
expected_dataset = pd.DataFrame(
[
{
"id": self.example1.id,
"data": self.example1.text,
"categories": "#".join(sorted([self.category1.label.text, self.category2.label.text])),
}
]
)
assert_frame_equal(dataset, expected_dataset)