From 541af14d1319176beafd11df60b4246967bba166 Mon Sep 17 00:00:00 2001 From: Hironsan Date: Wed, 6 Mar 2019 17:34:46 +0900 Subject: [PATCH] Add test case(jsonl without label) to file handler --- app/server/api.py | 12 ++-- ...ssification.jsonl => classification.jsonl} | 0 .../data/{example.valid.2.csv => example.csv} | 2 +- app/server/tests/data/example.jsonl | 21 +------ app/server/tests/data/example.labeling.jsonl | 3 - app/server/tests/data/example.valid.1.csv | 4 -- .../{example.valid.conll => labeling.conll} | 0 ...e.invalid.conll => labeling.invalid.conll} | 0 app/server/tests/data/labeling.jsonl | 3 + .../{example.seq2seq.jsonl => seq2seq.jsonl} | 0 app/server/tests/test_api.py | 62 ++++++++++++------- 11 files changed, 53 insertions(+), 54 deletions(-) rename app/server/tests/data/{example.classification.jsonl => classification.jsonl} (100%) rename app/server/tests/data/{example.valid.2.csv => example.csv} (52%) delete mode 100644 app/server/tests/data/example.labeling.jsonl delete mode 100644 app/server/tests/data/example.valid.1.csv rename app/server/tests/data/{example.valid.conll => labeling.conll} (100%) rename app/server/tests/data/{example.invalid.conll => labeling.invalid.conll} (100%) create mode 100644 app/server/tests/data/labeling.jsonl rename app/server/tests/data/{example.seq2seq.jsonl => seq2seq.jsonl} (100%) diff --git a/app/server/api.py b/app/server/api.py index 2c412d59..631fec9c 100644 --- a/app/server/api.py +++ b/app/server/api.py @@ -2,7 +2,6 @@ import csv import io import json from collections import Counter -from itertools import chain from django.db import transaction from django.http import HttpResponse @@ -432,8 +431,7 @@ class JsonHandler(FileHandler): def parse(self, file): for i, line in enumerate(file, start=1): try: - j = json.loads(line) - yield j + yield json.loads(line) except json.decoder.JSONDecodeError: raise FileParseException(line_num=i, line=line) @@ -464,7 +462,7 @@ class JsonClassificationHandler(JsonHandler): def handle_uploaded_file(self, file, user): for data in self.parse(file): doc = self.save_doc(data) - for label in data['labels']: + for label in data.get('labels', []): label = self.save_label({'text': label}) self.save_annotation({'label': label.id}, doc, user) @@ -474,7 +472,7 @@ class JsonLabelingHandler(JsonHandler): The format is as follows: ``` - {"text": "Python is awesome!", "entities": [[0, 6, "Product"],]} + {"text": "Python is awesome!", "labels": [[0, 6, "Product"],]} ... ``` """ @@ -484,7 +482,7 @@ class JsonLabelingHandler(JsonHandler): def handle_uploaded_file(self, file, user): for data in self.parse(file): doc = self.save_doc(data) - for start_offset, end_offset, label in data['entities']: + for start_offset, end_offset, label in data.get('labels', []): label = self.save_label({'text': label}) data = {'label': label.id, 'start_offset': start_offset, @@ -507,5 +505,5 @@ class JsonSeq2seqHandler(JsonHandler): def handle_uploaded_file(self, file, user): for data in self.parse(file): doc = self.save_doc(data) - for label in data['labels']: + for label in data.get('labels', []): self.save_annotation({'text': label}, doc, user) diff --git a/app/server/tests/data/example.classification.jsonl b/app/server/tests/data/classification.jsonl similarity index 100% rename from app/server/tests/data/example.classification.jsonl rename to app/server/tests/data/classification.jsonl diff --git a/app/server/tests/data/example.valid.2.csv b/app/server/tests/data/example.csv similarity index 52% rename from app/server/tests/data/example.valid.2.csv rename to app/server/tests/data/example.csv index 206730c3..ac64a3e3 100644 --- a/app/server/tests/data/example.valid.2.csv +++ b/app/server/tests/data/example.csv @@ -1,4 +1,4 @@ -text, label +text,label AAA,Positive BBB,Positive CCC,Negative \ No newline at end of file diff --git a/app/server/tests/data/example.jsonl b/app/server/tests/data/example.jsonl index c386876b..9bd22c86 100644 --- a/app/server/tests/data/example.jsonl +++ b/app/server/tests/data/example.jsonl @@ -1,18 +1,3 @@ -{"id": 5, "label": "2", "text": "\u30a2\u30f3\u30d1\u30b5\u30f3\u30c9", "manual": true, "prob": 0.5, "manual": true, "prob": 0.5} -{"id": 10, "label": "2", "text": "\u8a00\u8a9e", "manual": true, "prob": 0.5} -{"id": 11, "label": "1", "text": "\u65e5\u672c\u8a9e", "manual": true, "prob": 0.5} -{"id": 12, "label": "1", "text": "\u5730\u7406\u5b66", "manual": true, "prob": 0.5} -{"id": 23, "label": "2", "text": "\u56fd\u306e\u4e00\u89a7", "manual": false, "prob": 0.5} -{"id": 31, "label": "2", "text": "\u30d1\u30ea", "manual": false, "prob": 0.5} -{"id": 32, "label": "3", "text": "\u30e8\u30fc\u30ed\u30c3\u30d1", "manual": false, "prob": 0.5} -{"id": 42, "label": "2", "text": "\u751f\u7269", "manual": true, "prob": 0.5} -{"id": 43, "label": "3", "text": "\u30b3\u30b1\u690d\u7269", "manual": false, "prob": 0.5} -{"id": 47, "label": "3", "text": "\u793e\u4f1a\u5b66", "manual": true, "prob": 0.5} -{"id": 111, "label": "1", "text": "\u65e5\u672c\u8a9e", "manual": true, "prob": 0.5} -{"id": 112, "label": "1", "text": "\u5730\u7406\u5b66", "manual": true, "prob": 0.5} -{"id": 123, "label": "2", "text": "\u56fd\u306e\u4e00\u89a7", "manual": false, "prob": 0.5} -{"id": 131, "label": "2", "text": "\u30d1\u30ea", "manual": false, "prob": 0.5} -{"id": 132, "label": "3", "text": "\u30e8\u30fc\u30ed\u30c3\u30d1", "manual": true, "prob": 0.5} -{"id": 142, "label": "2", "text": "\u751f\u7269", "manual": true, "prob": 0.5} -{"id": 143, "label": "1", "text": "\u30b3\u30b1\u690d\u7269", "manual": false, "prob": 0.5} -{"id": 147, "label": "3", "text": "\u793e\u4f1a\u5b66", "manual": true, "prob": 0.5} \ No newline at end of file +{"text": "example"} +{"text": "example"} +{"text": "example"} diff --git a/app/server/tests/data/example.labeling.jsonl b/app/server/tests/data/example.labeling.jsonl deleted file mode 100644 index 6591c7bf..00000000 --- a/app/server/tests/data/example.labeling.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"text": "example", "entities": [[0, 1, "LOC"], [0, 2, "ORG"]]} -{"text": "example", "entities": [[0, 1, "LOC"]]} -{"text": "example", "entities": [[0, 1, "PER"]]} diff --git a/app/server/tests/data/example.valid.1.csv b/app/server/tests/data/example.valid.1.csv deleted file mode 100644 index 9b34b36b..00000000 --- a/app/server/tests/data/example.valid.1.csv +++ /dev/null @@ -1,4 +0,0 @@ -text -AAA -BBB -CCC \ No newline at end of file diff --git a/app/server/tests/data/example.valid.conll b/app/server/tests/data/labeling.conll similarity index 100% rename from app/server/tests/data/example.valid.conll rename to app/server/tests/data/labeling.conll diff --git a/app/server/tests/data/example.invalid.conll b/app/server/tests/data/labeling.invalid.conll similarity index 100% rename from app/server/tests/data/example.invalid.conll rename to app/server/tests/data/labeling.invalid.conll diff --git a/app/server/tests/data/labeling.jsonl b/app/server/tests/data/labeling.jsonl new file mode 100644 index 00000000..513ac848 --- /dev/null +++ b/app/server/tests/data/labeling.jsonl @@ -0,0 +1,3 @@ +{"text": "example", "labels": [[0, 1, "LOC"], [0, 2, "ORG"]]} +{"text": "example", "labels": [[0, 1, "LOC"]]} +{"text": "example", "labels": [[0, 1, "PER"]]} diff --git a/app/server/tests/data/example.seq2seq.jsonl b/app/server/tests/data/seq2seq.jsonl similarity index 100% rename from app/server/tests/data/example.seq2seq.jsonl rename to app/server/tests/data/seq2seq.jsonl diff --git a/app/server/tests/test_api.py b/app/server/tests/test_api.py index f9b6b75a..35c298f5 100644 --- a/app/server/tests/test_api.py +++ b/app/server/tests/test_api.py @@ -829,25 +829,25 @@ class TestUploader(APITestCase): def test_can_upload_conll_format_file(self): self.upload_test_helper(url=self.labeling_url, - filename='example.valid.conll', + filename='labeling.conll', format='conll', expected_status=status.HTTP_201_CREATED) def test_cannot_upload_wrong_conll_format_file(self): self.upload_test_helper(url=self.labeling_url, - filename='example.invalid.conll', + filename='labeling.invalid.conll', format='conll', expected_status=status.HTTP_400_BAD_REQUEST) def test_can_upload_classification_csv(self): self.upload_test_helper(url=self.classification_url, - filename='example.valid.2.csv', + filename='example.csv', format='csv', expected_status=status.HTTP_201_CREATED) def test_can_upload_seq2seq_csv(self): self.upload_test_helper(url=self.seq2seq_url, - filename='example.valid.2.csv', + filename='example.csv', format='csv', expected_status=status.HTTP_201_CREATED) @@ -865,19 +865,19 @@ class TestUploader(APITestCase): def test_can_upload_classification_jsonl(self): self.upload_test_helper(url=self.classification_url, - filename='example.classification.jsonl', + filename='classification.jsonl', format='json', expected_status=status.HTTP_201_CREATED) def test_can_upload_labeling_jsonl(self): self.upload_test_helper(url=self.labeling_url, - filename='example.labeling.jsonl', + filename='labeling.jsonl', format='json', expected_status=status.HTTP_201_CREATED) def test_can_upload_seq2seq_jsonl(self): self.upload_test_helper(url=self.seq2seq_url, - filename='example.seq2seq.jsonl', + filename='seq2seq.jsonl', format='json', expected_status=status.HTTP_201_CREATED) @@ -904,54 +904,74 @@ class TestFileHandler(APITestCase): with open(os.path.join(DATA_DIR, filename), mode='rb') as f: handler.handle_uploaded_file(f, self.super_user) - def test_conll_handler(self): - self.handler_test_helper(filename='example.valid.conll', + def test_give_valid_data_to_conll_handler(self): + self.handler_test_helper(filename='labeling.conll', handler=CoNLLHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Label.objects.count(), 3) # LOC, PER, O self.assertEqual(SequenceAnnotation.objects.count(), 20) # num of annotation line - def test_conll_invalid_handler(self): + def test_give_invalid_data_to_conll_handler(self): with self.assertRaises(FileParseException): - self.handler_test_helper(filename='example.invalid.conll', + self.handler_test_helper(filename='labeling.invalid.conll', handler=CoNLLHandler(self.project)) self.assertEqual(Document.objects.count(), 0) self.assertEqual(Label.objects.count(), 0) self.assertEqual(SequenceAnnotation.objects.count(), 0) - def test_csv_classification_handler(self): - self.handler_test_helper(filename='example.valid.2.csv', + def test_give_valid_data_to_csv_classification_handler(self): + self.handler_test_helper(filename='example.csv', handler=CSVClassificationHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Label.objects.count(), 2) self.assertEqual(DocumentAnnotation.objects.count(), 3) - def test_csv_seq2seq_handler(self): - self.handler_test_helper(filename='example.valid.2.csv', + def test_give_valid_data_to_csv_seq2seq_handler(self): + self.handler_test_helper(filename='example.csv', handler=CSVSeq2seqHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Seq2seqAnnotation.objects.count(), 3) - def test_json_classification_handler(self): - self.handler_test_helper(filename='example.classification.jsonl', + def test_give_valid_data_to_json_classification_handler(self): + self.handler_test_helper(filename='classification.jsonl', handler=JsonClassificationHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Label.objects.count(), 2) self.assertEqual(DocumentAnnotation.objects.count(), 4) - def test_json_labeling_handler(self): - self.handler_test_helper(filename='example.labeling.jsonl', + def test_give_valid_data_to_json_labeling_handler(self): + self.handler_test_helper(filename='labeling.jsonl', handler=JsonLabelingHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Label.objects.count(), 3) self.assertEqual(SequenceAnnotation.objects.count(), 4) - def test_json_seq2seq_handler(self): - self.handler_test_helper(filename='example.seq2seq.jsonl', + def test_give_valid_data_to_json_seq2seq_handler(self): + self.handler_test_helper(filename='seq2seq.jsonl', handler=JsonSeq2seqHandler(self.project)) self.assertEqual(Document.objects.count(), 3) self.assertEqual(Seq2seqAnnotation.objects.count(), 4) + def test_give_data_without_label_to_json_classification_handler(self): + self.handler_test_helper(filename='example.jsonl', + handler=JsonClassificationHandler(self.project)) + self.assertEqual(Document.objects.count(), 3) + self.assertEqual(Label.objects.count(), 0) + self.assertEqual(DocumentAnnotation.objects.count(), 0) + + def test_give_data_without_label_to_json_labeling_handler(self): + self.handler_test_helper(filename='example.jsonl', + handler=JsonLabelingHandler(self.project)) + self.assertEqual(Document.objects.count(), 3) + self.assertEqual(Label.objects.count(), 0) + self.assertEqual(SequenceAnnotation.objects.count(), 0) + + def test_give_data_without_label_to_json_seq2seq_handler(self): + self.handler_test_helper(filename='example.jsonl', + handler=JsonSeq2seqHandler(self.project)) + self.assertEqual(Document.objects.count(), 3) + self.assertEqual(Seq2seqAnnotation.objects.count(), 0) + class TestDownloader(APITestCase):