You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

826 lines
37 KiB

  1. import os
  2. import zipfile
  3. import pandas as pd
  4. from django.test import TestCase, override_settings
  5. from model_mommy import mommy
  6. from ..celery_tasks import export_dataset
  7. from data_export.models import DATA
  8. from projects.models import ProjectType
  9. from projects.tests.utils import prepare_project
  10. def read_zip_content(file):
  11. datasets = {}
  12. with zipfile.ZipFile(file) as z:
  13. for file in z.filelist:
  14. username = file.filename.split(".")[0]
  15. with z.open(file) as f:
  16. try:
  17. df = pd.read_json(f, lines=True)
  18. except pd.errors.EmptyDataError:
  19. continue
  20. datasets[username] = df.to_dict(orient="records")
  21. return datasets
  22. @override_settings(MEDIA_URL=os.path.dirname(__file__))
  23. class TestExport(TestCase):
  24. def export_dataset(self, confirmed_only=False):
  25. file = export_dataset(self.project.id, "JSONL", confirmed_only)
  26. if self.project.item.collaborative_annotation:
  27. dataset = pd.read_json(file, lines=True).to_dict(orient="records")
  28. else:
  29. dataset = read_zip_content(file)
  30. os.remove(file)
  31. return dataset
  32. def data_to_text(self, example):
  33. d = example.to_dict()
  34. d["text"] = d.pop(DATA)
  35. return d
  36. def data_to_filename(self, example):
  37. d = example.to_dict(is_text_project=False)
  38. d["filename"] = d.pop(DATA)
  39. return d
  40. class TestExportCategory(TestExport):
  41. def prepare_data(self, collaborative=False):
  42. self.project = prepare_project(ProjectType.DOCUMENT_CLASSIFICATION, collaborative_annotation=collaborative)
  43. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example1")
  44. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="example2")
  45. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  46. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  47. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  48. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  49. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  50. self.data1 = self.data_to_text(self.example1)
  51. self.data2 = self.data_to_text(self.example2)
  52. def test_unconfirmed_and_non_collaborative(self):
  53. self.prepare_data()
  54. datasets = self.export_dataset()
  55. expected_datasets = {
  56. self.project.admin.username: [
  57. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
  58. {**self.data2, "label": [], "Comments": []},
  59. ],
  60. self.project.approver.username: [
  61. {**self.data1, "label": [], "Comments": []},
  62. {**self.data2, "label": [], "Comments": []},
  63. ],
  64. self.project.annotator.username: [
  65. {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
  66. {**self.data2, "label": [], "Comments": []},
  67. ],
  68. }
  69. for username, dataset in expected_datasets.items():
  70. self.assertEqual(datasets[username], dataset)
  71. def test_unconfirmed_and_collaborative(self):
  72. self.prepare_data(collaborative=True)
  73. dataset = self.export_dataset()
  74. expected_dataset = [
  75. {
  76. **self.data1,
  77. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  78. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  79. },
  80. {**self.data2, "label": [], "Comments": []},
  81. ]
  82. self.assertEqual(dataset, expected_dataset)
  83. def test_confirmed_and_non_collaborative(self):
  84. self.prepare_data()
  85. datasets = self.export_dataset(confirmed_only=True)
  86. expected_datasets = {
  87. self.project.admin.username: [
  88. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
  89. ]
  90. }
  91. for username, dataset in expected_datasets.items():
  92. self.assertEqual(datasets[username], dataset)
  93. def test_confirmed_and_collaborative(self):
  94. self.prepare_data(collaborative=True)
  95. dataset = self.export_dataset(confirmed_only=True)
  96. expected_dataset = [
  97. {
  98. **self.data1,
  99. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  100. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  101. }
  102. ]
  103. self.assertEqual(dataset, expected_dataset)
  104. class TestExportSeq2seq(TestExport):
  105. def prepare_data(self, collaborative=False):
  106. self.project = prepare_project(ProjectType.SEQ2SEQ, collaborative_annotation=collaborative)
  107. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  108. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  109. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  110. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  111. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  112. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  113. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  114. self.data1 = self.data_to_text(self.example1)
  115. self.data2 = self.data_to_text(self.example2)
  116. def test_unconfirmed_and_non_collaborative(self):
  117. self.prepare_data()
  118. datasets = self.export_dataset()
  119. expected_datasets = {
  120. self.project.admin.username: [
  121. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  122. {**self.data2, "label": [], "Comments": []},
  123. ],
  124. self.project.approver.username: [
  125. {**self.data1, "label": [], "Comments": []},
  126. {**self.data2, "label": [], "Comments": []},
  127. ],
  128. self.project.annotator.username: [
  129. {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
  130. {**self.data2, "label": [], "Comments": []},
  131. ],
  132. }
  133. for username, dataset in expected_datasets.items():
  134. self.assertEqual(datasets[username], dataset)
  135. def test_unconfirmed_and_collaborative(self):
  136. self.prepare_data(collaborative=True)
  137. dataset = self.export_dataset()
  138. expected_dataset = [
  139. {
  140. **self.data1,
  141. "label": sorted([self.text1.text, self.text2.text]),
  142. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  143. },
  144. {**self.data2, "label": [], "Comments": []},
  145. ]
  146. self.assertEqual(dataset, expected_dataset)
  147. def test_confirmed_and_non_collaborative(self):
  148. self.prepare_data()
  149. datasets = self.export_dataset(confirmed_only=True)
  150. expected_datasets = {
  151. self.project.admin.username: [
  152. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  153. ],
  154. self.project.approver.username: [],
  155. self.project.annotator.username: [],
  156. }
  157. for username, dataset in datasets.items():
  158. self.assertEqual(dataset, expected_datasets[username])
  159. def test_confirmed_and_collaborative(self):
  160. self.prepare_data(collaborative=True)
  161. dataset = self.export_dataset(confirmed_only=True)
  162. expected_dataset = [
  163. {
  164. **self.data1,
  165. "label": sorted([self.text1.text, self.text2.text]),
  166. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  167. }
  168. ]
  169. self.assertEqual(dataset, expected_dataset)
  170. class TestExportIntentDetectionAndSlotFilling(TestExport):
  171. def prepare_data(self, collaborative=False):
  172. self.project = prepare_project(
  173. ProjectType.INTENT_DETECTION_AND_SLOT_FILLING, collaborative_annotation=collaborative
  174. )
  175. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  176. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  177. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  178. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  179. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  180. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  181. self.span = mommy.make(
  182. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  183. )
  184. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  185. self.data1 = self.data_to_text(self.example1)
  186. self.data2 = self.data_to_text(self.example2)
  187. def test_unconfirmed_and_non_collaborative(self):
  188. self.prepare_data()
  189. datasets = self.export_dataset()
  190. expected_datasets = {
  191. self.project.admin.username: [
  192. {
  193. **self.data1,
  194. "entities": [list(self.span.to_tuple())],
  195. "cats": [self.category1.to_string()],
  196. "Comments": [self.comment1.to_string()],
  197. },
  198. {**self.data2, "entities": [], "cats": [], "Comments": []},
  199. ],
  200. self.project.annotator.username: [
  201. {
  202. **self.data1,
  203. "entities": [],
  204. "cats": [self.category2.to_string()],
  205. "Comments": [self.comment2.to_string()],
  206. },
  207. {**self.data2, "entities": [], "cats": [], "Comments": []},
  208. ],
  209. self.project.approver.username: [
  210. {**self.data1, "entities": [], "cats": [], "Comments": []},
  211. {**self.data2, "entities": [], "cats": [], "Comments": []},
  212. ],
  213. }
  214. for username, dataset in expected_datasets.items():
  215. self.assertEqual(dataset, datasets[username])
  216. def test_unconfirmed_and_collaborative(self):
  217. self.prepare_data(collaborative=True)
  218. dataset = self.export_dataset()
  219. expected_dataset = [
  220. {
  221. **self.data1,
  222. "entities": [list(self.span.to_tuple())],
  223. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  224. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  225. },
  226. {**self.data2, "entities": [], "cats": [], "Comments": []},
  227. ]
  228. self.assertEqual(dataset, expected_dataset)
  229. def test_confirmed_and_non_collaborative(self):
  230. self.prepare_data()
  231. datasets = self.export_dataset(confirmed_only=True)
  232. expected_datasets = {
  233. self.project.admin.username: [
  234. {
  235. **self.data1,
  236. "entities": [list(self.span.to_tuple())],
  237. "cats": [self.category1.to_string()],
  238. "Comments": [self.comment1.to_string()],
  239. },
  240. ],
  241. self.project.annotator.username: [],
  242. self.project.approver.username: [],
  243. }
  244. for username, dataset in expected_datasets.items():
  245. self.assertEqual(dataset, datasets[username])
  246. def test_confirmed_and_collaborative(self):
  247. self.prepare_data(collaborative=True)
  248. dataset = self.export_dataset(confirmed_only=True)
  249. expected_dataset = [
  250. {
  251. **self.data1,
  252. "entities": [list(self.span.to_tuple())],
  253. "cats": sorted([self.category1.to_string(), self.category2.to_string()]),
  254. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  255. },
  256. ]
  257. self.assertEqual(dataset, expected_dataset)
  258. class TestExportSequenceLabeling(TestExport):
  259. def prepare_data(self, collaborative=False):
  260. self.project = prepare_project(ProjectType.SEQUENCE_LABELING, collaborative_annotation=collaborative)
  261. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  262. self.span1 = mommy.make(
  263. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  264. )
  265. self.span2 = mommy.make(
  266. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=1, end_offset=2
  267. )
  268. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  269. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  270. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  271. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  272. self.data1 = self.data_to_text(self.example1)
  273. self.data2 = self.data_to_text(self.example2)
  274. def test_unconfirmed_and_non_collaborative(self):
  275. self.prepare_data()
  276. datasets = self.export_dataset()
  277. expected_datasets = {
  278. self.project.admin.username: [
  279. {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
  280. {**self.data2, "label": [], "Comments": []},
  281. ],
  282. self.project.annotator.username: [
  283. {**self.data1, "label": [list(self.span2.to_tuple())], "Comments": [self.comment2.to_string()]},
  284. {**self.data2, "label": [], "Comments": []},
  285. ],
  286. self.project.approver.username: [
  287. {**self.data1, "label": [], "Comments": []},
  288. {**self.data2, "label": [], "Comments": []},
  289. ],
  290. }
  291. for username, dataset in expected_datasets.items():
  292. self.assertEqual(dataset, datasets[username])
  293. def test_unconfirmed_and_collaborative(self):
  294. self.prepare_data(collaborative=True)
  295. dataset = self.export_dataset()
  296. expected_dataset = [
  297. {
  298. **self.data1,
  299. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  300. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  301. },
  302. {**self.data2, "label": [], "Comments": []},
  303. ]
  304. self.assertEqual(dataset, expected_dataset)
  305. def test_confirmed_and_non_collaborative(self):
  306. self.prepare_data()
  307. datasets = self.export_dataset(confirmed_only=True)
  308. expected_datasets = {
  309. self.project.admin.username: [
  310. {**self.data1, "label": [list(self.span1.to_tuple())], "Comments": [self.comment1.to_string()]},
  311. ],
  312. self.project.annotator.username: [],
  313. self.project.approver.username: [],
  314. }
  315. for username, dataset in expected_datasets.items():
  316. self.assertEqual(dataset, datasets[username])
  317. def test_confirmed_and_collaborative(self):
  318. self.prepare_data(collaborative=True)
  319. dataset = self.export_dataset(confirmed_only=True)
  320. expected_dataset = [
  321. {
  322. **self.data1,
  323. "label": [list(self.span1.to_tuple()), list(self.span2.to_tuple())],
  324. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  325. },
  326. ]
  327. self.assertEqual(dataset, expected_dataset)
  328. class TestExportSpeechToText(TestExport):
  329. def prepare_data(self, collaborative=False):
  330. self.project = prepare_project(ProjectType.SPEECH2TEXT, collaborative_annotation=collaborative)
  331. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  332. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  333. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  334. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  335. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  336. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  337. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  338. self.data1 = self.data_to_filename(self.example1)
  339. self.data2 = self.data_to_filename(self.example2)
  340. def test_unconfirmed_and_non_collaborative(self):
  341. self.prepare_data()
  342. datasets = self.export_dataset()
  343. expected_datasets = {
  344. self.project.admin.username: [
  345. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  346. {**self.data2, "label": [], "Comments": []},
  347. ],
  348. self.project.approver.username: [
  349. {**self.data1, "label": [], "Comments": []},
  350. {**self.data2, "label": [], "Comments": []},
  351. ],
  352. self.project.annotator.username: [
  353. {**self.data1, "label": [self.text2.text], "Comments": [self.comment2.to_string()]},
  354. {**self.data2, "label": [], "Comments": []},
  355. ],
  356. }
  357. for username, dataset in expected_datasets.items():
  358. self.assertEqual(datasets[username], dataset)
  359. def test_unconfirmed_and_collaborative(self):
  360. self.prepare_data(collaborative=True)
  361. dataset = self.export_dataset()
  362. expected_dataset = [
  363. {
  364. **self.data1,
  365. "label": sorted([self.text1.text, self.text2.text]),
  366. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  367. },
  368. {**self.data2, "label": [], "Comments": []},
  369. ]
  370. self.assertEqual(dataset, expected_dataset)
  371. def test_confirmed_and_non_collaborative(self):
  372. self.prepare_data()
  373. datasets = self.export_dataset(confirmed_only=True)
  374. expected_datasets = {
  375. self.project.admin.username: [
  376. {**self.data1, "label": [self.text1.text], "Comments": [self.comment1.to_string()]},
  377. ],
  378. self.project.annotator.username: [],
  379. self.project.approver.username: [],
  380. }
  381. for username, dataset in datasets.items():
  382. self.assertEqual(dataset, expected_datasets[username])
  383. def test_confirmed_and_collaborative(self):
  384. self.prepare_data(collaborative=True)
  385. dataset = self.export_dataset(confirmed_only=True)
  386. expected_dataset = [
  387. {
  388. **self.data1,
  389. "label": sorted([self.text1.text, self.text2.text]),
  390. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  391. }
  392. ]
  393. self.assertEqual(dataset, expected_dataset)
  394. class TestExportImageClassification(TestExport):
  395. def prepare_data(self, collaborative=False):
  396. self.project = prepare_project(ProjectType.IMAGE_CLASSIFICATION, collaborative_annotation=collaborative)
  397. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  398. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  399. self.category1 = mommy.make("ExportedCategory", example=self.example1, user=self.project.admin)
  400. self.category2 = mommy.make("ExportedCategory", example=self.example1, user=self.project.annotator)
  401. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  402. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  403. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  404. self.data1 = self.data_to_filename(self.example1)
  405. self.data2 = self.data_to_filename(self.example2)
  406. def test_unconfirmed_and_non_collaborative(self):
  407. self.prepare_data()
  408. datasets = self.export_dataset()
  409. expected_datasets = {
  410. self.project.admin.username: [
  411. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]},
  412. {**self.data2, "label": [], "Comments": []},
  413. ],
  414. self.project.approver.username: [
  415. {**self.data1, "label": [], "Comments": []},
  416. {**self.data2, "label": [], "Comments": []},
  417. ],
  418. self.project.annotator.username: [
  419. {**self.data1, "label": [self.category2.to_string()], "Comments": [self.comment2.to_string()]},
  420. {**self.data2, "label": [], "Comments": []},
  421. ],
  422. }
  423. for username, dataset in expected_datasets.items():
  424. self.assertEqual(datasets[username], dataset)
  425. def test_unconfirmed_and_collaborative(self):
  426. self.prepare_data(collaborative=True)
  427. dataset = self.export_dataset()
  428. expected_dataset = [
  429. {
  430. **self.data1,
  431. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  432. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  433. },
  434. {**self.data2, "label": [], "Comments": []},
  435. ]
  436. self.assertEqual(dataset, expected_dataset)
  437. def test_confirmed_and_non_collaborative(self):
  438. self.prepare_data()
  439. datasets = self.export_dataset(confirmed_only=True)
  440. expected_datasets = {
  441. self.project.admin.username: [
  442. {**self.data1, "label": [self.category1.to_string()], "Comments": [self.comment1.to_string()]}
  443. ]
  444. }
  445. for username, dataset in expected_datasets.items():
  446. self.assertEqual(datasets[username], dataset)
  447. def test_confirmed_and_collaborative(self):
  448. self.prepare_data(collaborative=True)
  449. dataset = self.export_dataset(confirmed_only=True)
  450. expected_dataset = [
  451. {
  452. **self.data1,
  453. "label": sorted([self.category1.to_string(), self.category2.to_string()]),
  454. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  455. }
  456. ]
  457. self.assertEqual(dataset, expected_dataset)
  458. class TestExportBoundingBox(TestExport):
  459. def prepare_data(self, collaborative=False):
  460. self.project = prepare_project(ProjectType.BOUNDING_BOX, collaborative_annotation=collaborative)
  461. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  462. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  463. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  464. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  465. self.bbox1 = mommy.make(
  466. "ExportedBoundingBox", example=self.example1, user=self.project.admin, x=0, y=0, width=10, height=10
  467. )
  468. self.bbox2 = mommy.make(
  469. "ExportedBoundingBox", example=self.example1, user=self.project.annotator, x=10, y=10, width=20, height=20
  470. )
  471. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  472. self.data1 = self.data_to_filename(self.example1)
  473. self.data2 = self.data_to_filename(self.example2)
  474. def test_unconfirmed_and_non_collaborative(self):
  475. self.prepare_data()
  476. datasets = self.export_dataset()
  477. expected_datasets = {
  478. self.project.admin.username: [
  479. {
  480. **self.data1,
  481. "bbox": [self.bbox1.to_dict()],
  482. "Comments": [self.comment1.to_dict()],
  483. },
  484. {**self.data2, "bbox": [], "Comments": []},
  485. ],
  486. self.project.approver.username: [
  487. {**self.data1, "bbox": [], "Comments": []},
  488. {**self.data2, "bbox": [], "Comments": []},
  489. ],
  490. self.project.annotator.username: [
  491. {**self.data1, "bbox": [self.bbox2.to_dict()], "Comments": [self.comment2.to_dict()]},
  492. {**self.data2, "bbox": [], "Comments": []},
  493. ],
  494. }
  495. for username, dataset in expected_datasets.items():
  496. self.assertEqual(datasets[username], dataset)
  497. def test_unconfirmed_and_collaborative(self):
  498. self.prepare_data(collaborative=True)
  499. dataset = self.export_dataset()
  500. expected_dataset = [
  501. {
  502. **self.data1,
  503. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  504. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  505. },
  506. {**self.data2, "bbox": [], "Comments": []},
  507. ]
  508. self.assertEqual(dataset, expected_dataset)
  509. def test_confirmed_and_non_collaborative(self):
  510. self.prepare_data()
  511. datasets = self.export_dataset(confirmed_only=True)
  512. expected_datasets = {
  513. self.project.admin.username: [
  514. {**self.data1, "bbox": [self.bbox1.to_dict()], "Comments": [self.comment1.to_dict()]}
  515. ]
  516. }
  517. for username, dataset in expected_datasets.items():
  518. self.assertEqual(datasets[username], dataset)
  519. def test_confirmed_and_collaborative(self):
  520. self.prepare_data(collaborative=True)
  521. dataset = self.export_dataset(confirmed_only=True)
  522. expected_dataset = [
  523. {
  524. **self.data1,
  525. "bbox": [self.bbox1.to_dict(), self.bbox2.to_dict()],
  526. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  527. }
  528. ]
  529. self.assertEqual(dataset, expected_dataset)
  530. class TestExportSegmentation(TestExport):
  531. def prepare_data(self, collaborative=False):
  532. self.project = prepare_project(ProjectType.SEGMENTATION, collaborative_annotation=collaborative)
  533. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  534. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  535. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  536. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  537. self.seg1 = mommy.make("ExportedSegmentation", example=self.example1, user=self.project.admin, points=[0, 1])
  538. self.seg2 = mommy.make(
  539. "ExportedSegmentation", example=self.example1, user=self.project.annotator, points=[1, 2]
  540. )
  541. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  542. self.data1 = self.data_to_filename(self.example1)
  543. self.data2 = self.data_to_filename(self.example2)
  544. self.column = "segmentation"
  545. def test_unconfirmed_and_non_collaborative(self):
  546. self.prepare_data()
  547. datasets = self.export_dataset()
  548. expected_datasets = {
  549. self.project.admin.username: [
  550. {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]},
  551. {**self.data2, self.column: [], "Comments": []},
  552. ],
  553. self.project.approver.username: [
  554. {**self.data1, self.column: [], "Comments": []},
  555. {**self.data2, self.column: [], "Comments": []},
  556. ],
  557. self.project.annotator.username: [
  558. {**self.data1, self.column: [self.seg2.to_dict()], "Comments": [self.comment2.to_dict()]},
  559. {**self.data2, self.column: [], "Comments": []},
  560. ],
  561. }
  562. for username, dataset in expected_datasets.items():
  563. self.assertEqual(datasets[username], dataset)
  564. def test_unconfirmed_and_collaborative(self):
  565. self.prepare_data(collaborative=True)
  566. dataset = self.export_dataset()
  567. expected_dataset = [
  568. {
  569. **self.data1,
  570. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  571. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  572. },
  573. {**self.data2, self.column: [], "Comments": []},
  574. ]
  575. self.assertEqual(dataset, expected_dataset)
  576. def test_confirmed_and_non_collaborative(self):
  577. self.prepare_data()
  578. datasets = self.export_dataset(confirmed_only=True)
  579. expected_datasets = {
  580. self.project.admin.username: [
  581. {**self.data1, self.column: [self.seg1.to_dict()], "Comments": [self.comment1.to_dict()]}
  582. ]
  583. }
  584. for username, dataset in expected_datasets.items():
  585. self.assertEqual(datasets[username], dataset)
  586. def test_confirmed_and_collaborative(self):
  587. self.prepare_data(collaborative=True)
  588. dataset = self.export_dataset(confirmed_only=True)
  589. expected_dataset = [
  590. {
  591. **self.data1,
  592. self.column: [self.seg1.to_dict(), self.seg2.to_dict()],
  593. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  594. }
  595. ]
  596. self.assertEqual(dataset, expected_dataset)
  597. class TestExportImageCaptioning(TestExport):
  598. def prepare_data(self, collaborative=False):
  599. self.project = prepare_project(ProjectType.IMAGE_CAPTIONING, collaborative_annotation=collaborative)
  600. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="confirmed")
  601. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  602. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  603. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  604. self.text1 = mommy.make("TextLabel", example=self.example1, user=self.project.admin)
  605. self.text2 = mommy.make("TextLabel", example=self.example1, user=self.project.annotator)
  606. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  607. self.data1 = self.data_to_filename(self.example1)
  608. self.data2 = self.data_to_filename(self.example2)
  609. self.column = "label"
  610. def test_unconfirmed_and_non_collaborative(self):
  611. self.prepare_data()
  612. datasets = self.export_dataset()
  613. expected_datasets = {
  614. self.project.admin.username: [
  615. {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
  616. {**self.data2, self.column: [], "Comments": []},
  617. ],
  618. self.project.approver.username: [
  619. {**self.data1, self.column: [], "Comments": []},
  620. {**self.data2, self.column: [], "Comments": []},
  621. ],
  622. self.project.annotator.username: [
  623. {**self.data1, self.column: [self.text2.text], "Comments": [self.comment2.to_string()]},
  624. {**self.data2, self.column: [], "Comments": []},
  625. ],
  626. }
  627. for username, dataset in expected_datasets.items():
  628. self.assertEqual(datasets[username], dataset)
  629. def test_unconfirmed_and_collaborative(self):
  630. self.prepare_data(collaborative=True)
  631. dataset = self.export_dataset()
  632. expected_dataset = [
  633. {
  634. **self.data1,
  635. self.column: sorted([self.text1.text, self.text2.text]),
  636. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  637. },
  638. {**self.data2, self.column: [], "Comments": []},
  639. ]
  640. self.assertEqual(dataset, expected_dataset)
  641. def test_confirmed_and_non_collaborative(self):
  642. self.prepare_data()
  643. datasets = self.export_dataset(confirmed_only=True)
  644. expected_datasets = {
  645. self.project.admin.username: [
  646. {**self.data1, self.column: [self.text1.text], "Comments": [self.comment1.to_string()]},
  647. ],
  648. self.project.approver.username: [],
  649. self.project.annotator.username: [],
  650. }
  651. for username, dataset in datasets.items():
  652. self.assertEqual(dataset, expected_datasets[username])
  653. def test_confirmed_and_collaborative(self):
  654. self.prepare_data(collaborative=True)
  655. dataset = self.export_dataset(confirmed_only=True)
  656. expected_dataset = [
  657. {
  658. **self.data1,
  659. self.column: sorted([self.text1.text, self.text2.text]),
  660. "Comments": sorted([self.comment1.to_string(), self.comment2.to_string()]),
  661. }
  662. ]
  663. self.assertEqual(dataset, expected_dataset)
  664. class TestExportRelation(TestExport):
  665. def prepare_data(self, collaborative=False):
  666. self.project = prepare_project(
  667. ProjectType.SEQUENCE_LABELING, use_relation=True, collaborative_annotation=collaborative
  668. )
  669. self.example1 = mommy.make("ExportedExample", project=self.project.item, text="example")
  670. self.example2 = mommy.make("ExportedExample", project=self.project.item, text="unconfirmed")
  671. self.span1 = mommy.make(
  672. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=0, end_offset=1
  673. )
  674. self.span2 = mommy.make(
  675. "ExportedSpan", example=self.example1, user=self.project.admin, start_offset=1, end_offset=2
  676. )
  677. self.span3 = mommy.make(
  678. "ExportedSpan", example=self.example1, user=self.project.annotator, start_offset=2, end_offset=3
  679. )
  680. self.relation = mommy.make(
  681. "ExportedRelation", from_id=self.span1, to_id=self.span2, example=self.example1, user=self.project.admin
  682. )
  683. self.comment1 = mommy.make("ExportedComment", example=self.example1, user=self.project.admin)
  684. self.comment2 = mommy.make("ExportedComment", example=self.example1, user=self.project.annotator)
  685. mommy.make("ExampleState", example=self.example1, confirmed_by=self.project.admin)
  686. self.data1 = self.data_to_text(self.example1)
  687. self.data2 = self.data_to_text(self.example2)
  688. def test_unconfirmed_and_non_collaborative(self):
  689. self.prepare_data()
  690. datasets = self.export_dataset()
  691. expected_datasets = {
  692. self.project.admin.username: [
  693. {
  694. **self.data1,
  695. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  696. "relations": [self.relation.to_dict()],
  697. "Comments": [self.comment1.to_dict()],
  698. },
  699. {**self.data2, "entities": [], "relations": [], "Comments": []},
  700. ],
  701. self.project.annotator.username: [
  702. {
  703. **self.data1,
  704. "entities": [self.span3.to_dict()],
  705. "relations": [],
  706. "Comments": [self.comment2.to_dict()],
  707. },
  708. {**self.data2, "entities": [], "relations": [], "Comments": []},
  709. ],
  710. self.project.approver.username: [
  711. {**self.data1, "entities": [], "relations": [], "Comments": []},
  712. {**self.data2, "entities": [], "relations": [], "Comments": []},
  713. ],
  714. }
  715. for username, dataset in expected_datasets.items():
  716. self.assertEqual(datasets[username], dataset)
  717. def test_unconfirmed_and_collaborative(self):
  718. self.prepare_data(collaborative=True)
  719. dataset = self.export_dataset()
  720. expected_dataset = [
  721. {
  722. **self.data1,
  723. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  724. "relations": [self.relation.to_dict()],
  725. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  726. },
  727. {**self.data2, "entities": [], "relations": [], "Comments": []},
  728. ]
  729. self.assertEqual(dataset, expected_dataset)
  730. def test_confirmed_and_non_collaborative(self):
  731. self.prepare_data()
  732. datasets = self.export_dataset(confirmed_only=True)
  733. expected_datasets = {
  734. self.project.admin.username: [
  735. {
  736. **self.data1,
  737. "entities": [self.span1.to_dict(), self.span2.to_dict()],
  738. "relations": [self.relation.to_dict()],
  739. "Comments": [self.comment1.to_dict()],
  740. },
  741. ],
  742. self.project.annotator.username: [],
  743. self.project.approver.username: [],
  744. }
  745. for username, dataset in datasets.items():
  746. self.assertEqual(dataset, expected_datasets[username])
  747. def test_confirmed_and_collaborative(self):
  748. self.prepare_data(collaborative=True)
  749. dataset = self.export_dataset(confirmed_only=True)
  750. expected_dataset = [
  751. {
  752. **self.data1,
  753. "entities": [self.span1.to_dict(), self.span2.to_dict(), self.span3.to_dict()],
  754. "relations": [self.relation.to_dict()],
  755. "Comments": [self.comment1.to_dict(), self.comment2.to_dict()],
  756. }
  757. ]
  758. self.assertEqual(dataset, expected_dataset)