You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

832 lines
37 KiB

  1. import os
  2. import zipfile
  3. import pandas as pd
  4. from django.test import TestCase, override_settings
  5. from model_mommy import mommy
  6. from ..celery_tasks import export_dataset
  7. from data_export.models import DATA
  8. from projects.models import (
  9. BOUNDING_BOX,
  10. DOCUMENT_CLASSIFICATION,
  11. IMAGE_CAPTIONING,
  12. IMAGE_CLASSIFICATION,
  13. INTENT_DETECTION_AND_SLOT_FILLING,
  14. SEGMENTATION,
  15. SEQ2SEQ,
  16. SEQUENCE_LABELING,
  17. SPEECH2TEXT,
  18. )
  19. from projects.tests.utils import prepare_project
  20. def read_zip_content(file):
  21. datasets = {}
  22. with zipfile.ZipFile(file) as z:
  23. for file in z.filelist:
  24. username = file.filename.split(".")[0]
  25. with z.open(file) as f:
  26. try:
  27. df = pd.read_json(f, lines=True)
  28. except pd.errors.EmptyDataError:
  29. continue
  30. datasets[username] = df.to_dict(orient="records")
  31. return datasets
  32. @override_settings(MEDIA_URL=os.path.dirname(__file__))
  33. class TestExport(TestCase):
  34. def export_dataset(self, confirmed_only=False):
  35. file = export_dataset(self.project.id, "JSONL", confirmed_only)
  36. if self.project.item.collaborative_annotation:
  37. dataset = pd.read_json(file, lines=True).to_dict(orient="records")
  38. else:
  39. dataset = read_zip_content(file)
  40. os.remove(file)
  41. return dataset
  42. def data_to_text(self, example):
  43. d = example.to_dict()
  44. d["text"] = d.pop(DATA)
  45. return d
  46. def data_to_filename(self, example):
  47. d = example.to_dict(is_text_project=False)
  48. d["filename"] = d.pop(DATA)
  49. return d
  50. class TestExportCategory(TestExport):
  51. def prepare_data(self, collaborative=False):
  52. self.project = prepare_project(DOCUMENT_CLASSIFICATION, collaborative_annotation=collaborative)
  53. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example1")
  54. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2")
  55. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  56. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  57. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  58. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  59. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  60. self.data1 = self.data_to_text(self.example1)
  61. self.data2 = self.data_to_text(self.example2)
  62. def test_unconfirmed_and_non_collaborative(self):
  63. self.prepare_data()
  64. datasets = self.export_dataset()
  65. expected_datasets = {
  66. self.project.admin.username: [
  67. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
  68. {**self.data2, "label": [], "Comments": []},
  69. ],
  70. self.project.approver.username: [
  71. {**self.data1, "label": [], "Comments": []},
  72. {**self.data2, "label": [], "Comments": []},
  73. ],
  74. self.project.annotator.username: [
  75. {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
  76. {**self.data2, "label": [], "Comments": []},
  77. ],
  78. }
  79. for username, dataset in expected_datasets.items():
  80. self.assertEqual(datasets[username], dataset)
  81. def test_unconfirmed_and_collaborative(self):
  82. self.prepare_data(collaborative=True)
  83. dataset = self.export_dataset()
  84. expected_dataset = [
  85. {
  86. **self.data1,
  87. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  88. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  89. },
  90. {**self.data2, "label": [], "Comments": []},
  91. ]
  92. self.assertEqual(dataset, expected_dataset)
  93. def test_confirmed_and_non_collaborative(self):
  94. self.prepare_data()
  95. datasets = self.export_dataset(confirmed_only=True)
  96. expected_datasets = {
  97. self.project.admin.username: [
  98. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
  99. ]
  100. }
  101. for username, dataset in expected_datasets.items():
  102. self.assertEqual(datasets[username], dataset)
  103. def test_confirmed_and_collaborative(self):
  104. self.prepare_data(collaborative=True)
  105. dataset = self.export_dataset(confirmed_only=True)
  106. expected_dataset = [
  107. {
  108. **self.data1,
  109. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  110. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  111. }
  112. ]
  113. self.assertEqual(dataset, expected_dataset)
  114. class TestExportSeq2seq(TestExport):
  115. def prepare_data(self, collaborative=False):
  116. self.project = prepare_project(SEQ2SEQ, collaborative_annotation=collaborative)
  117. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  118. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  119. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  120. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  121. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  122. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  123. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  124. self.data1 = self.data_to_text(self.example1)
  125. self.data2 = self.data_to_text(self.example2)
  126. def test_unconfirmed_and_non_collaborative(self):
  127. self.prepare_data()
  128. datasets = self.export_dataset()
  129. expected_datasets = {
  130. self.project.admin.username: [
  131. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  132. {**self.data2, "label": [], "Comments": []},
  133. ],
  134. self.project.approver.username: [
  135. {**self.data1, "label": [], "Comments": []},
  136. {**self.data2, "label": [], "Comments": []},
  137. ],
  138. self.project.annotator.username: [
  139. {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
  140. {**self.data2, "label": [], "Comments": []},
  141. ],
  142. }
  143. for username, dataset in expected_datasets.items():
  144. self.assertEqual(datasets[username], dataset)
  145. def test_unconfirmed_and_collaborative(self):
  146. self.prepare_data(collaborative=True)
  147. dataset = self.export_dataset()
  148. expected_dataset = [
  149. {
  150. **self.data1,
  151. "label": sorted([self.text1.text, self.text2.text]),
  152. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  153. },
  154. {**self.data2, "label": [], "Comments": []},
  155. ]
  156. self.assertEqual(dataset, expected_dataset)
  157. def test_confirmed_and_non_collaborative(self):
  158. self.prepare_data()
  159. datasets = self.export_dataset(confirmed_only=True)
  160. expected_datasets = {
  161. self.project.admin.username: [
  162. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  163. ],
  164. self.project.approver.username: [],
  165. self.project.annotator.username: [],
  166. }
  167. for username, dataset in datasets.items():
  168. self.assertEqual(dataset, expected_datasets[username])
  169. def test_confirmed_and_collaborative(self):
  170. self.prepare_data(collaborative=True)
  171. dataset = self.export_dataset(confirmed_only=True)
  172. expected_dataset = [
  173. {
  174. **self.data1,
  175. "label": sorted([self.text1.text, self.text2.text]),
  176. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  177. }
  178. ]
  179. self.assertEqual(dataset, expected_dataset)
  180. class TestExportIntentDetectionAndSlotFilling(TestExport):
  181. def prepare_data(self, collaborative=False):
  182. self.project = prepare_project(INTENT_DETECTION_AND_SLOT_FILLING, collaborative_annotation=collaborative)
  183. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  184. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  185. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  186. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  187. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  188. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  189. self.span = mommy.make(
  190. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  191. )
  192. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  193. self.data1 = self.data_to_text(self.example1)
  194. self.data2 = self.data_to_text(self.example2)
  195. def test_unconfirmed_and_non_collaborative(self):
  196. self.prepare_data()
  197. datasets = self.export_dataset()
  198. expected_datasets = {
  199. self.project.admin.username: [
  200. {
  201. **self.data1,
  202. "entities": [list(self.span.to_tuple())],
  203. "cats": [self.category1.to_string()],
  204. "Comments": [self.comment1.to_string()],
  205. },
  206. {**self.data2, "entities": [], "cats": [], "Comments": []},
  207. ],
  208. self.project.annotator.username: [
  209. {
  210. **self.data1,
  211. "entities": [],
  212. "cats": [self.category2.to_string()],
  213. "Comments": [self.comment2.to_string()],
  214. },
  215. {**self.data2, "entities": [], "cats": [], "Comments": []},
  216. ],
  217. self.project.approver.username: [
  218. {**self.data1, "entities": [], "cats": [], "Comments": []},
  219. {**self.data2, "entities": [], "cats": [], "Comments": []},
  220. ],
  221. }
  222. for username, dataset in expected_datasets.items():
  223. self.assertEqual(dataset, datasets[username])
  224. def test_unconfirmed_and_collaborative(self):
  225. self.prepare_data(collaborative=True)
  226. dataset = self.export_dataset()
  227. expected_dataset = [
  228. {
  229. **self.data1,
  230. "entities": [list(self.span.to_tuple())],
  231. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  232. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  233. },
  234. {**self.data2, "entities": [], "cats": [], "Comments": []},
  235. ]
  236. self.assertEqual(dataset, expected_dataset)
  237. def test_confirmed_and_non_collaborative(self):
  238. self.prepare_data()
  239. datasets = self.export_dataset(confirmed_only=True)
  240. expected_datasets = {
  241. self.project.admin.username: [
  242. {
  243. **self.data1,
  244. "entities": [list(self.span.to_tuple())],
  245. "cats": [self.category1.to_string()],
  246. "Comments": [self.comment1.to_string()],
  247. },
  248. ],
  249. self.project.annotator.username: [],
  250. self.project.approver.username: [],
  251. }
  252. for username, dataset in expected_datasets.items():
  253. self.assertEqual(dataset, datasets[username])
  254. def test_confirmed_and_collaborative(self):
  255. self.prepare_data(collaborative=True)
  256. dataset = self.export_dataset(confirmed_only=True)
  257. expected_dataset = [
  258. {
  259. **self.data1,
  260. "entities": [list(self.span.to_tuple())],
  261. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  262. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  263. },
  264. ]
  265. self.assertEqual(dataset, expected_dataset)
  266. class TestExportSequenceLabeling(TestExport):
  267. def prepare_data(self, collaborative=False):
  268. self.project = prepare_project(SEQUENCE_LABELING, collaborative_annotation=collaborative)
  269. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  270. self.span1 = mommy.make(
  271. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  272. )
  273. self.span2 = mommy.make(
  274. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=1, end_offset=2
  275. )
  276. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  277. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  278. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  279. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  280. self.data1 = self.data_to_text(self.example1)
  281. self.data2 = self.data_to_text(self.example2)
  282. def test_unconfirmed_and_non_collaborative(self):
  283. self.prepare_data()
  284. datasets = self.export_dataset()
  285. expected_datasets = {
  286. self.project.admin.username: [
  287. {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
  288. {**self.data2, "label": [], "Comments": []},
  289. ],
  290. self.project.annotator.username: [
  291. {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": [self.comment2.to_string()]},
  292. {**self.data2, "label": [], "Comments": []},
  293. ],
  294. self.project.approver.username: [
  295. {**self.data1, "label": [], "Comments": []},
  296. {**self.data2, "label": [], "Comments": []},
  297. ],
  298. }
  299. for username, dataset in expected_datasets.items():
  300. self.assertEqual(dataset, datasets[username])
  301. def test_unconfirmed_and_collaborative(self):
  302. self.prepare_data(collaborative=True)
  303. dataset = self.export_dataset()
  304. expected_dataset = [
  305. {
  306. **self.data1,
  307. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  308. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  309. },
  310. {**self.data2, "label": [], "Comments": []},
  311. ]
  312. self.assertEqual(dataset, expected_dataset)
  313. def test_confirmed_and_non_collaborative(self):
  314. self.prepare_data()
  315. datasets = self.export_dataset(confirmed_only=True)
  316. expected_datasets = {
  317. self.project.admin.username: [
  318. {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
  319. ],
  320. self.project.annotator.username: [],
  321. self.project.approver.username: [],
  322. }
  323. for username, dataset in expected_datasets.items():
  324. self.assertEqual(dataset, datasets[username])
  325. def test_confirmed_and_collaborative(self):
  326. self.prepare_data(collaborative=True)
  327. dataset = self.export_dataset(confirmed_only=True)
  328. expected_dataset = [
  329. {
  330. **self.data1,
  331. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  332. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  333. },
  334. ]
  335. self.assertEqual(dataset, expected_dataset)
  336. class TestExportSpeechToText(TestExport):
  337. def prepare_data(self, collaborative=False):
  338. self.project = prepare_project(SPEECH2TEXT, collaborative_annotation=collaborative)
  339. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  340. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  341. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  342. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  343. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  344. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  345. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  346. self.data1 = self.data_to_filename(self.example1)
  347. self.data2 = self.data_to_filename(self.example2)
  348. def test_unconfirmed_and_non_collaborative(self):
  349. self.prepare_data()
  350. datasets = self.export_dataset()
  351. expected_datasets = {
  352. self.project.admin.username: [
  353. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  354. {**self.data2, "label": [], "Comments": []},
  355. ],
  356. self.project.approver.username: [
  357. {**self.data1, "label": [], "Comments": []},
  358. {**self.data2, "label": [], "Comments": []},
  359. ],
  360. self.project.annotator.username: [
  361. {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
  362. {**self.data2, "label": [], "Comments": []},
  363. ],
  364. }
  365. for username, dataset in expected_datasets.items():
  366. self.assertEqual(datasets[username], dataset)
  367. def test_unconfirmed_and_collaborative(self):
  368. self.prepare_data(collaborative=True)
  369. dataset = self.export_dataset()
  370. expected_dataset = [
  371. {
  372. **self.data1,
  373. "label": sorted([self.text1.text, self.text2.text]),
  374. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  375. },
  376. {**self.data2, "label": [], "Comments": []},
  377. ]
  378. self.assertEqual(dataset, expected_dataset)
  379. def test_confirmed_and_non_collaborative(self):
  380. self.prepare_data()
  381. datasets = self.export_dataset(confirmed_only=True)
  382. expected_datasets = {
  383. self.project.admin.username: [
  384. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  385. ],
  386. self.project.annotator.username: [],
  387. self.project.approver.username: [],
  388. }
  389. for username, dataset in datasets.items():
  390. self.assertEqual(dataset, expected_datasets[username])
  391. def test_confirmed_and_collaborative(self):
  392. self.prepare_data(collaborative=True)
  393. dataset = self.export_dataset(confirmed_only=True)
  394. expected_dataset = [
  395. {
  396. **self.data1,
  397. "label": sorted([self.text1.text, self.text2.text]),
  398. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  399. }
  400. ]
  401. self.assertEqual(dataset, expected_dataset)
  402. class TestExportImageClassification(TestExport):
  403. def prepare_data(self, collaborative=False):
  404. self.project = prepare_project(IMAGE_CLASSIFICATION, collaborative_annotation=collaborative)
  405. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  406. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  407. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  408. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  409. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  410. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  411. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  412. self.data1 = self.data_to_filename(self.example1)
  413. self.data2 = self.data_to_filename(self.example2)
  414. def test_unconfirmed_and_non_collaborative(self):
  415. self.prepare_data()
  416. datasets = self.export_dataset()
  417. expected_datasets = {
  418. self.project.admin.username: [
  419. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
  420. {**self.data2, "label": [], "Comments": []},
  421. ],
  422. self.project.approver.username: [
  423. {**self.data1, "label": [], "Comments": []},
  424. {**self.data2, "label": [], "Comments": []},
  425. ],
  426. self.project.annotator.username: [
  427. {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
  428. {**self.data2, "label": [], "Comments": []},
  429. ],
  430. }
  431. for username, dataset in expected_datasets.items():
  432. self.assertEqual(datasets[username], dataset)
  433. def test_unconfirmed_and_collaborative(self):
  434. self.prepare_data(collaborative=True)
  435. dataset = self.export_dataset()
  436. expected_dataset = [
  437. {
  438. **self.data1,
  439. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  440. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  441. },
  442. {**self.data2, "label": [], "Comments": []},
  443. ]
  444. self.assertEqual(dataset, expected_dataset)
  445. def test_confirmed_and_non_collaborative(self):
  446. self.prepare_data()
  447. datasets = self.export_dataset(confirmed_only=True)
  448. expected_datasets = {
  449. self.project.admin.username: [
  450. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
  451. ]
  452. }
  453. for username, dataset in expected_datasets.items():
  454. self.assertEqual(datasets[username], dataset)
  455. def test_confirmed_and_collaborative(self):
  456. self.prepare_data(collaborative=True)
  457. dataset = self.export_dataset(confirmed_only=True)
  458. expected_dataset = [
  459. {
  460. **self.data1,
  461. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  462. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  463. }
  464. ]
  465. self.assertEqual(dataset, expected_dataset)
  466. class TestExportBoundingBox(TestExport):
  467. def prepare_data(self, collaborative=False):
  468. self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative)
  469. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  470. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  471. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  472. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  473. self.bbox1 = mommy.make(
  474. "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
  475. )
  476. self.bbox2 = mommy.make(
  477. "ExportedBoundingBox", example=self.example1, user=self.project.annotator, x=10, y=10, width=20, height=20
  478. )
  479. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  480. self.data1 = self.data_to_filename(self.example1)
  481. self.data2 = self.data_to_filename(self.example2)
  482. def test_unconfirmed_and_non_collaborative(self):
  483. self.prepare_data()
  484. datasets = self.export_dataset()
  485. expected_datasets = {
  486. self.project.admin.username: [
  487. {
  488. **self.data1,
  489. "bbox": [self.bbox1.to_dict()],
  490. "Comments": [self.comment1.to_dict()],
  491. },
  492. {**self.data2, "bbox": [], "Comments": []},
  493. ],
  494. self.project.approver.username: [
  495. {**self.data1, "bbox": [], "Comments": []},
  496. {**self.data2, "bbox": [], "Comments": []},
  497. ],
  498. self.project.annotator.username: [
  499. {**self.data1, "bbox": [self.bbox2.to_dict()], "Comments": [self.comment2.to_dict()]},
  500. {**self.data2, "bbox": [], "Comments": []},
  501. ],
  502. }
  503. for username, dataset in expected_datasets.items():
  504. self.assertEqual(datasets[username], dataset)
  505. def test_unconfirmed_and_collaborative(self):
  506. self.prepare_data(collaborative=True)
  507. dataset = self.export_dataset()
  508. expected_dataset = [
  509. {
  510. **self.data1,
  511. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  512. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  513. },
  514. {**self.data2, "bbox": [], "Comments": []},
  515. ]
  516. self.assertEqual(dataset, expected_dataset)
  517. def test_confirmed_and_non_collaborative(self):
  518. self.prepare_data()
  519. datasets = self.export_dataset(confirmed_only=True)
  520. expected_datasets = {
  521. self.project.admin.username: [
  522. {**self.data1, "bbox": [self.bbox1.to_dict()], "Comments": [self.comment1.to_dict()]}
  523. ]
  524. }
  525. for username, dataset in expected_datasets.items():
  526. self.assertEqual(datasets[username], dataset)
  527. def test_confirmed_and_collaborative(self):
  528. self.prepare_data(collaborative=True)
  529. dataset = self.export_dataset(confirmed_only=True)
  530. expected_dataset = [
  531. {
  532. **self.data1,
  533. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  534. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  535. }
  536. ]
  537. self.assertEqual(dataset, expected_dataset)
  538. class TestExportSegmentation(TestExport):
  539. def prepare_data(self, collaborative=False):
  540. self.project = prepare_project(SEGMENTATION, collaborative_annotation=collaborative)
  541. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  542. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  543. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  544. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  545. self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1])
  546. self.seg2 = mommy.make(
  547. "ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2]
  548. )
  549. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  550. self.data1 = self.data_to_filename(self.example1)
  551. self.data2 = self.data_to_filename(self.example2)
  552. self.column = "segmentation"
  553. def test_unconfirmed_and_non_collaborative(self):
  554. self.prepare_data()
  555. datasets = self.export_dataset()
  556. expected_datasets = {
  557. self.project.admin.username: [
  558. {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]},
  559. {**self.data2, self.column: [], "Comments": []},
  560. ],
  561. self.project.approver.username: [
  562. {**self.data1, self.column: [], "Comments": []},
  563. {**self.data2, self.column: [], "Comments": []},
  564. ],
  565. self.project.annotator.username: [
  566. {**self.data1, self.column: [self.seg2.to_dict()], "Comments": [self.comment2.to_dict()]},
  567. {**self.data2, self.column: [], "Comments": []},
  568. ],
  569. }
  570. for username, dataset in expected_datasets.items():
  571. self.assertEqual(datasets[username], dataset)
  572. def test_unconfirmed_and_collaborative(self):
  573. self.prepare_data(collaborative=True)
  574. dataset = self.export_dataset()
  575. expected_dataset = [
  576. {
  577. **self.data1,
  578. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  579. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  580. },
  581. {**self.data2, self.column: [], "Comments": []},
  582. ]
  583. self.assertEqual(dataset, expected_dataset)
  584. def test_confirmed_and_non_collaborative(self):
  585. self.prepare_data()
  586. datasets = self.export_dataset(confirmed_only=True)
  587. expected_datasets = {
  588. self.project.admin.username: [
  589. {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]}
  590. ]
  591. }
  592. for username, dataset in expected_datasets.items():
  593. self.assertEqual(datasets[username], dataset)
  594. def test_confirmed_and_collaborative(self):
  595. self.prepare_data(collaborative=True)
  596. dataset = self.export_dataset(confirmed_only=True)
  597. expected_dataset = [
  598. {
  599. **self.data1,
  600. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  601. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  602. }
  603. ]
  604. self.assertEqual(dataset, expected_dataset)
  605. class TestExportImageCaptioning(TestExport):
  606. def prepare_data(self, collaborative=False):
  607. self.project = prepare_project(IMAGE_CAPTIONING, collaborative_annotation=collaborative)
  608. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  609. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  610. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  611. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  612. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  613. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  614. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  615. self.data1 = self.data_to_filename(self.example1)
  616. self.data2 = self.data_to_filename(self.example2)
  617. self.column = "label"
  618. def test_unconfirmed_and_non_collaborative(self):
  619. self.prepare_data()
  620. datasets = self.export_dataset()
  621. expected_datasets = {
  622. self.project.admin.username: [
  623. {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
  624. {**self.data2, self.column: [], "Comments": []},
  625. ],
  626. self.project.approver.username: [
  627. {**self.data1, self.column: [], "Comments": []},
  628. {**self.data2, self.column: [], "Comments": []},
  629. ],
  630. self.project.annotator.username: [
  631. {**self.data1, self.column: [self.text2.text], "Comments": [self.comment2.to_string()]},
  632. {**self.data2, self.column: [], "Comments": []},
  633. ],
  634. }
  635. for username, dataset in expected_datasets.items():
  636. self.assertEqual(datasets[username], dataset)
  637. def test_unconfirmed_and_collaborative(self):
  638. self.prepare_data(collaborative=True)
  639. dataset = self.export_dataset()
  640. expected_dataset = [
  641. {
  642. **self.data1,
  643. self.column: sorted([self.text1.text, self.text2.text]),
  644. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  645. },
  646. {**self.data2, self.column: [], "Comments": []},
  647. ]
  648. self.assertEqual(dataset, expected_dataset)
  649. def test_confirmed_and_non_collaborative(self):
  650. self.prepare_data()
  651. datasets = self.export_dataset(confirmed_only=True)
  652. expected_datasets = {
  653. self.project.admin.username: [
  654. {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
  655. ],
  656. self.project.approver.username: [],
  657. self.project.annotator.username: [],
  658. }
  659. for username, dataset in datasets.items():
  660. self.assertEqual(dataset, expected_datasets[username])
  661. def test_confirmed_and_collaborative(self):
  662. self.prepare_data(collaborative=True)
  663. dataset = self.export_dataset(confirmed_only=True)
  664. expected_dataset = [
  665. {
  666. **self.data1,
  667. self.column: sorted([self.text1.text, self.text2.text]),
  668. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  669. }
  670. ]
  671. self.assertEqual(dataset, expected_dataset)
  672. class TestExportRelation(TestExport):
  673. def prepare_data(self, collaborative=False):
  674. self.project = prepare_project(SEQUENCE_LABELING, use_relation=True, collaborative_annotation=collaborative)
  675. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example")
  676. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  677. self.span1 = mommy.make(
  678. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  679. )
  680. self.span2 = mommy.make(
  681. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=1, end_offset=2
  682. )
  683. self.span3 = mommy.make(
  684. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=2, end_offset=3
  685. )
  686. self.relation = mommy.make(
  687. "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
  688. )
  689. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  690. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  691. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  692. self.data1 = self.data_to_text(self.example1)
  693. self.data2 = self.data_to_text(self.example2)
  694. def test_unconfirmed_and_non_collaborative(self):
  695. self.prepare_data()
  696. datasets = self.export_dataset()
  697. expected_datasets = {
  698. self.project.admin.username: [
  699. {
  700. **self.data1,
  701. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  702. "relations": [self.relation.to_dict()],
  703. "Comments": [self.comment1.to_dict()],
  704. },
  705. {**self.data2, "entities": [], "relations": [], "Comments": []},
  706. ],
  707. self.project.annotator.username: [
  708. {
  709. **self.data1,
  710. "entities": [self.span3.to_dict()],
  711. "relations": [],
  712. "Comments": [self.comment2.to_dict()],
  713. },
  714. {**self.data2, "entities": [], "relations": [], "Comments": []},
  715. ],
  716. self.project.approver.username: [
  717. {**self.data1, "entities": [], "relations": [], "Comments": []},
  718. {**self.data2, "entities": [], "relations": [], "Comments": []},
  719. ],
  720. }
  721. for username, dataset in expected_datasets.items():
  722. self.assertEqual(datasets[username], dataset)
  723. def test_unconfirmed_and_collaborative(self):
  724. self.prepare_data(collaborative=True)
  725. dataset = self.export_dataset()
  726. expected_dataset = [
  727. {
  728. **self.data1,
  729. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  730. "relations": [self.relation.to_dict()],
  731. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  732. },
  733. {**self.data2, "entities": [], "relations": [], "Comments": []},
  734. ]
  735. self.assertEqual(dataset, expected_dataset)
  736. def test_confirmed_and_non_collaborative(self):
  737. self.prepare_data()
  738. datasets = self.export_dataset(confirmed_only=True)
  739. expected_datasets = {
  740. self.project.admin.username: [
  741. {
  742. **self.data1,
  743. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  744. "relations": [self.relation.to_dict()],
  745. "Comments": [self.comment1.to_dict()],
  746. },
  747. ],
  748. self.project.annotator.username: [],
  749. self.project.approver.username: [],
  750. }
  751. for username, dataset in datasets.items():
  752. self.assertEqual(dataset, expected_datasets[username])
  753. def test_confirmed_and_collaborative(self):
  754. self.prepare_data(collaborative=True)
  755. dataset = self.export_dataset(confirmed_only=True)
  756. expected_dataset = [
  757. {
  758. **self.data1,
  759. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  760. "relations": [self.relation.to_dict()],
  761. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  762. }
  763. ]
  764. self.assertEqual(dataset, expected_dataset)