You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

784 lines
32 KiB

  1. import os
  2. import zipfile
  3. import pandas as pd
  4. from django.test import TestCase, override_settings
  5. from model_mommy import mommy
  6. from ..celery_tasks import export_dataset
  7. from data_export.models import DATA
  8. from projects.models import (
  9. BOUNDING_BOX,
  10. DOCUMENT_CLASSIFICATION,
  11. IMAGE_CAPTIONING,
  12. IMAGE_CLASSIFICATION,
  13. INTENT_DETECTION_AND_SLOT_FILLING,
  14. SEGMENTATION,
  15. SEQ2SEQ,
  16. SEQUENCE_LABELING,
  17. SPEECH2TEXT,
  18. )
  19. from projects.tests.utils import prepare_project
  20. def read_zip_content(file):
  21. datasets = {}
  22. with zipfile.ZipFile(file) as z:
  23. for file in z.filelist:
  24. username = file.filename.split(".")[0]
  25. with z.open(file) as f:
  26. try:
  27. df = pd.read_json(f, lines=True)
  28. except pd.errors.EmptyDataError:
  29. continue
  30. datasets[username] = df.to_dict(orient="records")
  31. return datasets
  32. @override_settings(MEDIA_URL=os.path.dirname(__file__))
  33. class TestExport(TestCase):
  34. def export_dataset(self, confirmed_only=False):
  35. file = export_dataset(self.project.id, "JSONL", confirmed_only)
  36. if self.project.item.collaborative_annotation:
  37. dataset = pd.read_json(file, lines=True).to_dict(orient="records")
  38. else:
  39. dataset = read_zip_content(file)
  40. os.remove(file)
  41. return dataset
  42. def data_to_text(self, example):
  43. d = example.to_dict()
  44. d["text"] = d.pop(DATA)
  45. return d
  46. def data_to_filename(self, example):
  47. d = example.to_dict(is_text_project=False)
  48. d["filename"] = d.pop(DATA)
  49. return d
  50. class TestExportCategory(TestExport):
  51. def prepare_data(self, collaborative=False):
  52. self.project = prepare_project(DOCUMENT_CLASSIFICATION, collaborative_annotation=collaborative)
  53. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example1")
  54. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2")
  55. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  56. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  57. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  58. self.data1 = self.data_to_text(self.example1)
  59. self.data2 = self.data_to_text(self.example2)
  60. def test_unconfirmed_and_non_collaborative(self):
  61. self.prepare_data()
  62. datasets = self.export_dataset()
  63. expected_datasets = {
  64. self.project.admin.username: [
  65. {**self.data1, "label": [self.category1.to_string()]},
  66. {**self.data2, "label": []},
  67. ],
  68. self.project.approver.username: [
  69. {**self.data1, "label": []},
  70. {**self.data2, "label": []},
  71. ],
  72. self.project.annotator.username: [
  73. {**self.data1, "label": [self.category2.to_string()]},
  74. {**self.data2, "label": []},
  75. ],
  76. }
  77. for username, dataset in expected_datasets.items():
  78. self.assertEqual(datasets[username], dataset)
  79. def test_unconfirmed_and_collaborative(self):
  80. self.prepare_data(collaborative=True)
  81. dataset = self.export_dataset()
  82. expected_dataset = [
  83. {
  84. **self.data1,
  85. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  86. },
  87. {**self.data2, "label": []},
  88. ]
  89. self.assertEqual(dataset, expected_dataset)
  90. def test_confirmed_and_non_collaborative(self):
  91. self.prepare_data()
  92. datasets = self.export_dataset(confirmed_only=True)
  93. expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
  94. for username, dataset in expected_datasets.items():
  95. self.assertEqual(datasets[username], dataset)
  96. def test_confirmed_and_collaborative(self):
  97. self.prepare_data(collaborative=True)
  98. dataset = self.export_dataset(confirmed_only=True)
  99. expected_dataset = [
  100. {
  101. **self.data1,
  102. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  103. }
  104. ]
  105. self.assertEqual(dataset, expected_dataset)
  106. class TestExportSeq2seq(TestExport):
  107. def prepare_data(self, collaborative=False):
  108. self.project = prepare_project(SEQ2SEQ, collaborative_annotation=collaborative)
  109. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  110. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  111. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  112. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  113. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  114. self.data1 = self.data_to_text(self.example1)
  115. self.data2 = self.data_to_text(self.example2)
  116. def test_unconfirmed_and_non_collaborative(self):
  117. self.prepare_data()
  118. datasets = self.export_dataset()
  119. expected_datasets = {
  120. self.project.admin.username: [
  121. {**self.data1, "label": [self.text1.text]},
  122. {**self.data2, "label": []},
  123. ],
  124. self.project.approver.username: [
  125. {**self.data1, "label": []},
  126. {**self.data2, "label": []},
  127. ],
  128. self.project.annotator.username: [
  129. {**self.data1, "label": [self.text2.text]},
  130. {**self.data2, "label": []},
  131. ],
  132. }
  133. for username, dataset in expected_datasets.items():
  134. self.assertEqual(datasets[username], dataset)
  135. def test_unconfirmed_and_collaborative(self):
  136. self.prepare_data(collaborative=True)
  137. dataset = self.export_dataset()
  138. expected_dataset = [
  139. {
  140. **self.data1,
  141. "label": sorted([self.text1.text, self.text2.text]),
  142. },
  143. {**self.data2, "label": []},
  144. ]
  145. self.assertEqual(dataset, expected_dataset)
  146. def test_confirmed_and_non_collaborative(self):
  147. self.prepare_data()
  148. datasets = self.export_dataset(confirmed_only=True)
  149. expected_datasets = {
  150. self.project.admin.username: [
  151. {**self.data1, "label": [self.text1.text]},
  152. ],
  153. self.project.approver.username: [],
  154. self.project.annotator.username: [],
  155. }
  156. for username, dataset in datasets.items():
  157. self.assertEqual(dataset, expected_datasets[username])
  158. def test_confirmed_and_collaborative(self):
  159. self.prepare_data(collaborative=True)
  160. dataset = self.export_dataset(confirmed_only=True)
  161. expected_dataset = [
  162. {
  163. **self.data1,
  164. "label": sorted([self.text1.text, self.text2.text]),
  165. }
  166. ]
  167. self.assertEqual(dataset, expected_dataset)
  168. class TestExportIntentDetectionAndSlotFilling(TestExport):
  169. def prepare_data(self, collaborative=False):
  170. self.project = prepare_project(INTENT_DETECTION_AND_SLOT_FILLING, collaborative_annotation=collaborative)
  171. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  172. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  173. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  174. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  175. self.span = mommy.make(
  176. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  177. )
  178. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  179. self.data1 = self.data_to_text(self.example1)
  180. self.data2 = self.data_to_text(self.example2)
  181. def test_unconfirmed_and_non_collaborative(self):
  182. self.prepare_data()
  183. datasets = self.export_dataset()
  184. expected_datasets = {
  185. self.project.admin.username: [
  186. {
  187. **self.data1,
  188. "entities": [list(self.span.to_tuple())],
  189. "cats": [self.category1.to_string()],
  190. },
  191. {**self.data2, "entities": [], "cats": []},
  192. ],
  193. self.project.annotator.username: [
  194. {
  195. **self.data1,
  196. "entities": [],
  197. "cats": [self.category2.to_string()],
  198. },
  199. {**self.data2, "entities": [], "cats": []},
  200. ],
  201. self.project.approver.username: [
  202. {**self.data1, "entities": [], "cats": []},
  203. {**self.data2, "entities": [], "cats": []},
  204. ],
  205. }
  206. for username, dataset in expected_datasets.items():
  207. self.assertEqual(dataset, datasets[username])
  208. def test_unconfirmed_and_collaborative(self):
  209. self.prepare_data(collaborative=True)
  210. dataset = self.export_dataset()
  211. expected_dataset = [
  212. {
  213. **self.data1,
  214. "entities": [list(self.span.to_tuple())],
  215. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  216. },
  217. {**self.data2, "entities": [], "cats": []},
  218. ]
  219. self.assertEqual(dataset, expected_dataset)
  220. def test_confirmed_and_non_collaborative(self):
  221. self.prepare_data()
  222. datasets = self.export_dataset(confirmed_only=True)
  223. expected_datasets = {
  224. self.project.admin.username: [
  225. {
  226. **self.data1,
  227. "entities": [list(self.span.to_tuple())],
  228. "cats": [self.category1.to_string()],
  229. },
  230. ],
  231. self.project.annotator.username: [],
  232. self.project.approver.username: [],
  233. }
  234. for username, dataset in expected_datasets.items():
  235. self.assertEqual(dataset, datasets[username])
  236. def test_confirmed_and_collaborative(self):
  237. self.prepare_data(collaborative=True)
  238. dataset = self.export_dataset(confirmed_only=True)
  239. expected_dataset = [
  240. {
  241. **self.data1,
  242. "entities": [list(self.span.to_tuple())],
  243. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  244. },
  245. ]
  246. self.assertEqual(dataset, expected_dataset)
  247. class TestExportSequenceLabeling(TestExport):
  248. def prepare_data(self, collaborative=False):
  249. self.project = prepare_project(SEQUENCE_LABELING, collaborative_annotation=collaborative)
  250. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  251. self.span1 = mommy.make(
  252. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  253. )
  254. self.span2 = mommy.make(
  255. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=1, end_offset=2
  256. )
  257. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  258. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  259. self.data1 = self.data_to_text(self.example1)
  260. self.data2 = self.data_to_text(self.example2)
  261. def test_unconfirmed_and_non_collaborative(self):
  262. self.prepare_data()
  263. datasets = self.export_dataset()
  264. expected_datasets = {
  265. self.project.admin.username: [
  266. {**self.data1, "label": [list(self.span1.to_tuple())]},
  267. {**self.data2, "label": []},
  268. ],
  269. self.project.annotator.username: [
  270. {**self.data1, "label": [list(self.span2.to_tuple())]},
  271. {**self.data2, "label": []},
  272. ],
  273. self.project.approver.username: [
  274. {**self.data1, "label": []},
  275. {**self.data2, "label": []},
  276. ],
  277. }
  278. for username, dataset in expected_datasets.items():
  279. self.assertEqual(dataset, datasets[username])
  280. def test_unconfirmed_and_collaborative(self):
  281. self.prepare_data(collaborative=True)
  282. dataset = self.export_dataset()
  283. expected_dataset = [
  284. {
  285. **self.data1,
  286. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  287. },
  288. {**self.data2, "label": []},
  289. ]
  290. self.assertEqual(dataset, expected_dataset)
  291. def test_confirmed_and_non_collaborative(self):
  292. self.prepare_data()
  293. datasets = self.export_dataset(confirmed_only=True)
  294. expected_datasets = {
  295. self.project.admin.username: [
  296. {**self.data1, "label": [list(self.span1.to_tuple())]},
  297. ],
  298. self.project.annotator.username: [],
  299. self.project.approver.username: [],
  300. }
  301. for username, dataset in expected_datasets.items():
  302. self.assertEqual(dataset, datasets[username])
  303. def test_confirmed_and_collaborative(self):
  304. self.prepare_data(collaborative=True)
  305. dataset = self.export_dataset(confirmed_only=True)
  306. expected_dataset = [
  307. {
  308. **self.data1,
  309. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  310. },
  311. ]
  312. self.assertEqual(dataset, expected_dataset)
  313. class TestExportSpeechToText(TestExport):
  314. def prepare_data(self, collaborative=False):
  315. self.project = prepare_project(SPEECH2TEXT, collaborative_annotation=collaborative)
  316. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  317. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  318. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  319. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  320. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  321. self.data1 = self.data_to_filename(self.example1)
  322. self.data2 = self.data_to_filename(self.example2)
  323. def test_unconfirmed_and_non_collaborative(self):
  324. self.prepare_data()
  325. datasets = self.export_dataset()
  326. expected_datasets = {
  327. self.project.admin.username: [
  328. {**self.data1, "label": [self.text1.text]},
  329. {**self.data2, "label": []},
  330. ],
  331. self.project.approver.username: [
  332. {**self.data1, "label": []},
  333. {**self.data2, "label": []},
  334. ],
  335. self.project.annotator.username: [
  336. {**self.data1, "label": [self.text2.text]},
  337. {**self.data2, "label": []},
  338. ],
  339. }
  340. for username, dataset in expected_datasets.items():
  341. self.assertEqual(datasets[username], dataset)
  342. def test_unconfirmed_and_collaborative(self):
  343. self.prepare_data(collaborative=True)
  344. dataset = self.export_dataset()
  345. expected_dataset = [
  346. {
  347. **self.data1,
  348. "label": sorted([self.text1.text, self.text2.text]),
  349. },
  350. {**self.data2, "label": []},
  351. ]
  352. self.assertEqual(dataset, expected_dataset)
  353. def test_confirmed_and_non_collaborative(self):
  354. self.prepare_data()
  355. datasets = self.export_dataset(confirmed_only=True)
  356. expected_datasets = {
  357. self.project.admin.username: [
  358. {**self.data1, "label": [self.text1.text]},
  359. ],
  360. self.project.annotator.username: [],
  361. self.project.approver.username: [],
  362. }
  363. for username, dataset in datasets.items():
  364. self.assertEqual(dataset, expected_datasets[username])
  365. def test_confirmed_and_collaborative(self):
  366. self.prepare_data(collaborative=True)
  367. dataset = self.export_dataset(confirmed_only=True)
  368. expected_dataset = [
  369. {
  370. **self.data1,
  371. "label": sorted([self.text1.text, self.text2.text]),
  372. }
  373. ]
  374. self.assertEqual(dataset, expected_dataset)
  375. class TestExportImageClassification(TestExport):
  376. def prepare_data(self, collaborative=False):
  377. self.project = prepare_project(IMAGE_CLASSIFICATION, collaborative_annotation=collaborative)
  378. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  379. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  380. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  381. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  382. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  383. self.data1 = self.data_to_filename(self.example1)
  384. self.data2 = self.data_to_filename(self.example2)
  385. def test_unconfirmed_and_non_collaborative(self):
  386. self.prepare_data()
  387. datasets = self.export_dataset()
  388. expected_datasets = {
  389. self.project.admin.username: [
  390. {
  391. **self.data1,
  392. "label": [self.category1.to_string()],
  393. },
  394. {**self.data2, "label": []},
  395. ],
  396. self.project.approver.username: [
  397. {**self.data1, "label": []},
  398. {**self.data2, "label": []},
  399. ],
  400. self.project.annotator.username: [
  401. {
  402. **self.data1,
  403. "label": [self.category2.to_string()],
  404. },
  405. {**self.data2, "label": []},
  406. ],
  407. }
  408. for username, dataset in expected_datasets.items():
  409. self.assertEqual(datasets[username], dataset)
  410. def test_unconfirmed_and_collaborative(self):
  411. self.prepare_data(collaborative=True)
  412. dataset = self.export_dataset()
  413. expected_dataset = [
  414. {
  415. **self.data1,
  416. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  417. },
  418. {**self.data2, "label": []},
  419. ]
  420. self.assertEqual(dataset, expected_dataset)
  421. def test_confirmed_and_non_collaborative(self):
  422. self.prepare_data()
  423. datasets = self.export_dataset(confirmed_only=True)
  424. expected_datasets = {self.project.admin.username: [{**self.data1, "label": [self.category1.to_string()]}]}
  425. for username, dataset in expected_datasets.items():
  426. self.assertEqual(datasets[username], dataset)
  427. def test_confirmed_and_collaborative(self):
  428. self.prepare_data(collaborative=True)
  429. dataset = self.export_dataset(confirmed_only=True)
  430. expected_dataset = [
  431. {
  432. **self.data1,
  433. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  434. }
  435. ]
  436. self.assertEqual(dataset, expected_dataset)
  437. class TestExportBoundingBox(TestExport):
  438. def prepare_data(self, collaborative=False):
  439. self.project = prepare_project(BOUNDING_BOX, collaborative_annotation=collaborative)
  440. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  441. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  442. self.bbox1 = mommy.make(
  443. "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
  444. )
  445. self.bbox2 = mommy.make(
  446. "ExportedBoundingBox", example=self.example1, user=self.project.annotator, x=10, y=10, width=20, height=20
  447. )
  448. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  449. self.data1 = self.data_to_filename(self.example1)
  450. self.data2 = self.data_to_filename(self.example2)
  451. def test_unconfirmed_and_non_collaborative(self):
  452. self.prepare_data()
  453. datasets = self.export_dataset()
  454. expected_datasets = {
  455. self.project.admin.username: [
  456. {
  457. **self.data1,
  458. "bbox": [self.bbox1.to_dict()],
  459. },
  460. {**self.data2, "bbox": []},
  461. ],
  462. self.project.approver.username: [
  463. {**self.data1, "bbox": []},
  464. {**self.data2, "bbox": []},
  465. ],
  466. self.project.annotator.username: [
  467. {
  468. **self.data1,
  469. "bbox": [self.bbox2.to_dict()],
  470. },
  471. {**self.data2, "bbox": []},
  472. ],
  473. }
  474. for username, dataset in expected_datasets.items():
  475. self.assertEqual(datasets[username], dataset)
  476. def test_unconfirmed_and_collaborative(self):
  477. self.prepare_data(collaborative=True)
  478. dataset = self.export_dataset()
  479. expected_dataset = [
  480. {
  481. **self.data1,
  482. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  483. },
  484. {**self.data2, "bbox": []},
  485. ]
  486. self.assertEqual(dataset, expected_dataset)
  487. def test_confirmed_and_non_collaborative(self):
  488. self.prepare_data()
  489. datasets = self.export_dataset(confirmed_only=True)
  490. expected_datasets = {self.project.admin.username: [{**self.data1, "bbox": [self.bbox1.to_dict()]}]}
  491. for username, dataset in expected_datasets.items():
  492. self.assertEqual(datasets[username], dataset)
  493. def test_confirmed_and_collaborative(self):
  494. self.prepare_data(collaborative=True)
  495. dataset = self.export_dataset(confirmed_only=True)
  496. expected_dataset = [
  497. {
  498. **self.data1,
  499. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  500. }
  501. ]
  502. self.assertEqual(dataset, expected_dataset)
  503. class TestExportSegmentation(TestExport):
  504. def prepare_data(self, collaborative=False):
  505. self.project = prepare_project(SEGMENTATION, collaborative_annotation=collaborative)
  506. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  507. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  508. self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1])
  509. self.seg2 = mommy.make(
  510. "ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2]
  511. )
  512. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  513. self.data1 = self.data_to_filename(self.example1)
  514. self.data2 = self.data_to_filename(self.example2)
  515. self.column = "segmentation"
  516. def test_unconfirmed_and_non_collaborative(self):
  517. self.prepare_data()
  518. datasets = self.export_dataset()
  519. expected_datasets = {
  520. self.project.admin.username: [
  521. {
  522. **self.data1,
  523. self.column: [self.seg1.to_dict()],
  524. },
  525. {**self.data2, self.column: []},
  526. ],
  527. self.project.approver.username: [
  528. {**self.data1, self.column: []},
  529. {**self.data2, self.column: []},
  530. ],
  531. self.project.annotator.username: [
  532. {
  533. **self.data1,
  534. self.column: [self.seg2.to_dict()],
  535. },
  536. {**self.data2, self.column: []},
  537. ],
  538. }
  539. for username, dataset in expected_datasets.items():
  540. self.assertEqual(datasets[username], dataset)
  541. def test_unconfirmed_and_collaborative(self):
  542. self.prepare_data(collaborative=True)
  543. dataset = self.export_dataset()
  544. expected_dataset = [
  545. {
  546. **self.data1,
  547. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  548. },
  549. {**self.data2, self.column: []},
  550. ]
  551. self.assertEqual(dataset, expected_dataset)
  552. def test_confirmed_and_non_collaborative(self):
  553. self.prepare_data()
  554. datasets = self.export_dataset(confirmed_only=True)
  555. expected_datasets = {self.project.admin.username: [{**self.data1, self.column: [self.seg1.to_dict()]}]}
  556. for username, dataset in expected_datasets.items():
  557. self.assertEqual(datasets[username], dataset)
  558. def test_confirmed_and_collaborative(self):
  559. self.prepare_data(collaborative=True)
  560. dataset = self.export_dataset(confirmed_only=True)
  561. expected_dataset = [
  562. {
  563. **self.data1,
  564. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  565. }
  566. ]
  567. self.assertEqual(dataset, expected_dataset)
  568. class TestExportImageCaptioning(TestExport):
  569. def prepare_data(self, collaborative=False):
  570. self.project = prepare_project(IMAGE_CAPTIONING, collaborative_annotation=collaborative)
  571. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  572. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  573. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  574. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  575. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  576. self.data1 = self.data_to_filename(self.example1)
  577. self.data2 = self.data_to_filename(self.example2)
  578. self.column = "label"
  579. def test_unconfirmed_and_non_collaborative(self):
  580. self.prepare_data()
  581. datasets = self.export_dataset()
  582. expected_datasets = {
  583. self.project.admin.username: [
  584. {**self.data1, self.column: [self.text1.text]},
  585. {**self.data2, self.column: []},
  586. ],
  587. self.project.approver.username: [
  588. {**self.data1, self.column: []},
  589. {**self.data2, self.column: []},
  590. ],
  591. self.project.annotator.username: [
  592. {**self.data1, self.column: [self.text2.text]},
  593. {**self.data2, self.column: []},
  594. ],
  595. }
  596. for username, dataset in expected_datasets.items():
  597. self.assertEqual(datasets[username], dataset)
  598. def test_unconfirmed_and_collaborative(self):
  599. self.prepare_data(collaborative=True)
  600. dataset = self.export_dataset()
  601. expected_dataset = [
  602. {
  603. **self.data1,
  604. self.column: sorted([self.text1.text, self.text2.text]),
  605. },
  606. {**self.data2, self.column: []},
  607. ]
  608. self.assertEqual(dataset, expected_dataset)
  609. def test_confirmed_and_non_collaborative(self):
  610. self.prepare_data()
  611. datasets = self.export_dataset(confirmed_only=True)
  612. expected_datasets = {
  613. self.project.admin.username: [
  614. {**self.data1, self.column: [self.text1.text]},
  615. ],
  616. self.project.approver.username: [],
  617. self.project.annotator.username: [],
  618. }
  619. for username, dataset in datasets.items():
  620. self.assertEqual(dataset, expected_datasets[username])
  621. def test_confirmed_and_collaborative(self):
  622. self.prepare_data(collaborative=True)
  623. dataset = self.export_dataset(confirmed_only=True)
  624. expected_dataset = [
  625. {
  626. **self.data1,
  627. self.column: sorted([self.text1.text, self.text2.text]),
  628. }
  629. ]
  630. self.assertEqual(dataset, expected_dataset)
  631. class TestExportRelation(TestExport):
  632. def prepare_data(self, collaborative=False):
  633. self.project = prepare_project(SEQUENCE_LABELING, use_relation=True, collaborative_annotation=collaborative)
  634. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example")
  635. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  636. self.span1 = mommy.make(
  637. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  638. )
  639. self.span2 = mommy.make(
  640. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=1, end_offset=2
  641. )
  642. self.span3 = mommy.make(
  643. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=2, end_offset=3
  644. )
  645. self.relation = mommy.make(
  646. "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
  647. )
  648. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  649. self.data1 = self.data_to_text(self.example1)
  650. self.data2 = self.data_to_text(self.example2)
  651. def test_unconfirmed_and_non_collaborative(self):
  652. self.prepare_data()
  653. datasets = self.export_dataset()
  654. expected_datasets = {
  655. self.project.admin.username: [
  656. {
  657. **self.data1,
  658. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  659. "relations": [self.relation.to_dict()],
  660. },
  661. {**self.data2, "entities": [], "relations": []},
  662. ],
  663. self.project.annotator.username: [
  664. {
  665. **self.data1,
  666. "entities": [self.span3.to_dict()],
  667. "relations": [],
  668. },
  669. {**self.data2, "entities": [], "relations": []},
  670. ],
  671. self.project.approver.username: [
  672. {**self.data1, "entities": [], "relations": []},
  673. {**self.data2, "entities": [], "relations": []},
  674. ],
  675. }
  676. for username, dataset in expected_datasets.items():
  677. self.assertEqual(datasets[username], dataset)
  678. def test_unconfirmed_and_collaborative(self):
  679. self.prepare_data(collaborative=True)
  680. dataset = self.export_dataset()
  681. expected_dataset = [
  682. {
  683. **self.data1,
  684. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  685. "relations": [self.relation.to_dict()],
  686. },
  687. {**self.data2, "entities": [], "relations": []},
  688. ]
  689. self.assertEqual(dataset, expected_dataset)
  690. def test_confirmed_and_non_collaborative(self):
  691. self.prepare_data()
  692. datasets = self.export_dataset(confirmed_only=True)
  693. expected_datasets = {
  694. self.project.admin.username: [
  695. {
  696. **self.data1,
  697. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  698. "relations": [self.relation.to_dict()],
  699. },
  700. ],
  701. self.project.annotator.username: [],
  702. self.project.approver.username: [],
  703. }
  704. for username, dataset in datasets.items():
  705. self.assertEqual(dataset, expected_datasets[username])
  706. def test_confirmed_and_collaborative(self):
  707. self.prepare_data(collaborative=True)
  708. dataset = self.export_dataset(confirmed_only=True)
  709. expected_dataset = [
  710. {
  711. **self.data1,
  712. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  713. "relations": [self.relation.to_dict()],
  714. }
  715. ]
  716. self.assertEqual(dataset, expected_dataset)