mirror of https://github.com/doccano/doccano.git
pythondatasetsactive-learningtext-annotationdatasetnatural-language-processingdata-labelingmachine-learningannotation-tool
117 lines
4.6 KiB
117 lines
4.6 KiB
import os
|
|
import zipfile
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from django.test import TestCase, override_settings
|
|
from model_mommy import mommy
|
|
from pandas.testing import assert_frame_equal
|
|
|
|
from ..celery_tasks import export_dataset
|
|
from projects.models import DOCUMENT_CLASSIFICATION
|
|
from projects.tests.utils import prepare_project
|
|
|
|
|
|
def read_zip_content(file):
|
|
datasets = {}
|
|
with zipfile.ZipFile(file) as z:
|
|
for file in z.filelist:
|
|
username = file.filename.split(".")[0]
|
|
with z.open(file) as f:
|
|
try:
|
|
df = pd.read_csv(f)
|
|
except pd.errors.EmptyDataError:
|
|
continue
|
|
datasets[username] = df
|
|
return datasets
|
|
|
|
|
|
@override_settings(MEDIA_URL=os.path.dirname(__file__))
|
|
class TestExportTask(TestCase):
|
|
def prepare_data(self, collaborative=False):
|
|
self.project = prepare_project(DOCUMENT_CLASSIFICATION, collaborative_annotation=collaborative)
|
|
self.example1 = mommy.make("Example", project=self.project.item, text="confirmed")
|
|
self.example2 = mommy.make("Example", project=self.project.item, text="unconfirmed")
|
|
self.category1 = mommy.make("Category", example=self.example1, user=self.project.admin)
|
|
self.category2 = mommy.make("Category", example=self.example1, user=self.project.annotator)
|
|
mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
|
|
|
|
def test_unconfirmed_and_non_collaborative(self):
|
|
self.prepare_data()
|
|
file = export_dataset(self.project.id, "CSV", False)
|
|
datasets = read_zip_content(file)
|
|
os.remove(file)
|
|
expected_datasets = {
|
|
self.project.admin.username: pd.DataFrame(
|
|
[
|
|
{"id": self.example1.id, "data": self.example1.text, "categories": self.category1.label.text},
|
|
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
|
|
]
|
|
),
|
|
self.project.approver.username: pd.DataFrame(
|
|
[
|
|
{"id": self.example1.id, "data": self.example1.text, "categories": np.nan},
|
|
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
|
|
]
|
|
),
|
|
self.project.annotator.username: pd.DataFrame(
|
|
[
|
|
{"id": self.example1.id, "data": self.example1.text, "categories": self.category2.label.text},
|
|
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
|
|
]
|
|
),
|
|
}
|
|
for username, dataset in expected_datasets.items():
|
|
assert_frame_equal(dataset, datasets[username])
|
|
|
|
def test_unconfirmed_and_collaborative(self):
|
|
self.prepare_data(collaborative=True)
|
|
file = export_dataset(self.project.id, "CSV", False)
|
|
dataset = pd.read_csv(file)
|
|
os.remove(file)
|
|
expected_dataset = pd.DataFrame(
|
|
[
|
|
{
|
|
"id": self.example1.id,
|
|
"data": self.example1.text,
|
|
"categories": "#".join(sorted([self.category1.label.text, self.category2.label.text])),
|
|
},
|
|
{"id": self.example2.id, "data": self.example2.text, "categories": np.nan},
|
|
]
|
|
)
|
|
assert_frame_equal(dataset, expected_dataset)
|
|
|
|
def test_confirmed_and_non_collaborative(self):
|
|
self.prepare_data()
|
|
file = export_dataset(self.project.id, "CSV", True)
|
|
datasets = read_zip_content(file)
|
|
os.remove(file)
|
|
expected_datasets = {
|
|
self.project.admin.username: pd.DataFrame(
|
|
[
|
|
{
|
|
"id": self.example1.id,
|
|
"data": self.example1.text,
|
|
"categories": self.category1.label.text,
|
|
}
|
|
]
|
|
)
|
|
}
|
|
for username, dataset in expected_datasets.items():
|
|
assert_frame_equal(dataset, datasets[username])
|
|
|
|
def test_confirmed_and_collaborative(self):
|
|
self.prepare_data(collaborative=True)
|
|
file = export_dataset(self.project.id, "CSV", True)
|
|
dataset = pd.read_csv(file)
|
|
os.remove(file)
|
|
expected_dataset = pd.DataFrame(
|
|
[
|
|
{
|
|
"id": self.example1.id,
|
|
"data": self.example1.text,
|
|
"categories": "#".join(sorted([self.category1.label.text, self.category2.label.text])),
|
|
}
|
|
]
|
|
)
|
|
assert_frame_equal(dataset, expected_dataset)
|