Browse Source

Add test case(jsonl without label) to file handler

pull/110/head
Hironsan 5 years ago
parent
commit
541af14d13
11 changed files with 53 additions and 54 deletions
  1. 12
      app/server/api.py
  2. 0
      app/server/tests/data/classification.jsonl
  3. 2
      app/server/tests/data/example.csv
  4. 21
      app/server/tests/data/example.jsonl
  5. 3
      app/server/tests/data/example.labeling.jsonl
  6. 4
      app/server/tests/data/example.valid.1.csv
  7. 0
      app/server/tests/data/labeling.conll
  8. 0
      app/server/tests/data/labeling.invalid.conll
  9. 3
      app/server/tests/data/labeling.jsonl
  10. 0
      app/server/tests/data/seq2seq.jsonl
  11. 62
      app/server/tests/test_api.py

12
app/server/api.py

@ -2,7 +2,6 @@ import csv
import io
import json
from collections import Counter
from itertools import chain
from django.db import transaction
from django.http import HttpResponse
@ -432,8 +431,7 @@ class JsonHandler(FileHandler):
def parse(self, file):
for i, line in enumerate(file, start=1):
try:
j = json.loads(line)
yield j
yield json.loads(line)
except json.decoder.JSONDecodeError:
raise FileParseException(line_num=i, line=line)
@ -464,7 +462,7 @@ class JsonClassificationHandler(JsonHandler):
def handle_uploaded_file(self, file, user):
for data in self.parse(file):
doc = self.save_doc(data)
for label in data['labels']:
for label in data.get('labels', []):
label = self.save_label({'text': label})
self.save_annotation({'label': label.id}, doc, user)
@ -474,7 +472,7 @@ class JsonLabelingHandler(JsonHandler):
The format is as follows:
```
{"text": "Python is awesome!", "entities": [[0, 6, "Product"],]}
{"text": "Python is awesome!", "labels": [[0, 6, "Product"],]}
...
```
"""
@ -484,7 +482,7 @@ class JsonLabelingHandler(JsonHandler):
def handle_uploaded_file(self, file, user):
for data in self.parse(file):
doc = self.save_doc(data)
for start_offset, end_offset, label in data['entities']:
for start_offset, end_offset, label in data.get('labels', []):
label = self.save_label({'text': label})
data = {'label': label.id,
'start_offset': start_offset,
@ -507,5 +505,5 @@ class JsonSeq2seqHandler(JsonHandler):
def handle_uploaded_file(self, file, user):
for data in self.parse(file):
doc = self.save_doc(data)
for label in data['labels']:
for label in data.get('labels', []):
self.save_annotation({'text': label}, doc, user)

app/server/tests/data/example.classification.jsonl → app/server/tests/data/classification.jsonl

app/server/tests/data/example.valid.2.csv → app/server/tests/data/example.csv

@ -1,4 +1,4 @@
text, label
text,label
AAA,Positive
BBB,Positive
CCC,Negative

21
app/server/tests/data/example.jsonl

@ -1,18 +1,3 @@
{"id": 5, "label": "2", "text": "\u30a2\u30f3\u30d1\u30b5\u30f3\u30c9", "manual": true, "prob": 0.5, "manual": true, "prob": 0.5}
{"id": 10, "label": "2", "text": "\u8a00\u8a9e", "manual": true, "prob": 0.5}
{"id": 11, "label": "1", "text": "\u65e5\u672c\u8a9e", "manual": true, "prob": 0.5}
{"id": 12, "label": "1", "text": "\u5730\u7406\u5b66", "manual": true, "prob": 0.5}
{"id": 23, "label": "2", "text": "\u56fd\u306e\u4e00\u89a7", "manual": false, "prob": 0.5}
{"id": 31, "label": "2", "text": "\u30d1\u30ea", "manual": false, "prob": 0.5}
{"id": 32, "label": "3", "text": "\u30e8\u30fc\u30ed\u30c3\u30d1", "manual": false, "prob": 0.5}
{"id": 42, "label": "2", "text": "\u751f\u7269", "manual": true, "prob": 0.5}
{"id": 43, "label": "3", "text": "\u30b3\u30b1\u690d\u7269", "manual": false, "prob": 0.5}
{"id": 47, "label": "3", "text": "\u793e\u4f1a\u5b66", "manual": true, "prob": 0.5}
{"id": 111, "label": "1", "text": "\u65e5\u672c\u8a9e", "manual": true, "prob": 0.5}
{"id": 112, "label": "1", "text": "\u5730\u7406\u5b66", "manual": true, "prob": 0.5}
{"id": 123, "label": "2", "text": "\u56fd\u306e\u4e00\u89a7", "manual": false, "prob": 0.5}
{"id": 131, "label": "2", "text": "\u30d1\u30ea", "manual": false, "prob": 0.5}
{"id": 132, "label": "3", "text": "\u30e8\u30fc\u30ed\u30c3\u30d1", "manual": true, "prob": 0.5}
{"id": 142, "label": "2", "text": "\u751f\u7269", "manual": true, "prob": 0.5}
{"id": 143, "label": "1", "text": "\u30b3\u30b1\u690d\u7269", "manual": false, "prob": 0.5}
{"id": 147, "label": "3", "text": "\u793e\u4f1a\u5b66", "manual": true, "prob": 0.5}
{"text": "example"}
{"text": "example"}
{"text": "example"}

3
app/server/tests/data/example.labeling.jsonl

@ -1,3 +0,0 @@
{"text": "example", "entities": [[0, 1, "LOC"], [0, 2, "ORG"]]}
{"text": "example", "entities": [[0, 1, "LOC"]]}
{"text": "example", "entities": [[0, 1, "PER"]]}

4
app/server/tests/data/example.valid.1.csv

@ -1,4 +0,0 @@
text
AAA
BBB
CCC

app/server/tests/data/example.valid.conll → app/server/tests/data/labeling.conll

app/server/tests/data/example.invalid.conll → app/server/tests/data/labeling.invalid.conll

3
app/server/tests/data/labeling.jsonl

@ -0,0 +1,3 @@
{"text": "example", "labels": [[0, 1, "LOC"], [0, 2, "ORG"]]}
{"text": "example", "labels": [[0, 1, "LOC"]]}
{"text": "example", "labels": [[0, 1, "PER"]]}

app/server/tests/data/example.seq2seq.jsonl → app/server/tests/data/seq2seq.jsonl

62
app/server/tests/test_api.py

@ -829,25 +829,25 @@ class TestUploader(APITestCase):
def test_can_upload_conll_format_file(self):
self.upload_test_helper(url=self.labeling_url,
filename='example.valid.conll',
filename='labeling.conll',
format='conll',
expected_status=status.HTTP_201_CREATED)
def test_cannot_upload_wrong_conll_format_file(self):
self.upload_test_helper(url=self.labeling_url,
filename='example.invalid.conll',
filename='labeling.invalid.conll',
format='conll',
expected_status=status.HTTP_400_BAD_REQUEST)
def test_can_upload_classification_csv(self):
self.upload_test_helper(url=self.classification_url,
filename='example.valid.2.csv',
filename='example.csv',
format='csv',
expected_status=status.HTTP_201_CREATED)
def test_can_upload_seq2seq_csv(self):
self.upload_test_helper(url=self.seq2seq_url,
filename='example.valid.2.csv',
filename='example.csv',
format='csv',
expected_status=status.HTTP_201_CREATED)
@ -865,19 +865,19 @@ class TestUploader(APITestCase):
def test_can_upload_classification_jsonl(self):
self.upload_test_helper(url=self.classification_url,
filename='example.classification.jsonl',
filename='classification.jsonl',
format='json',
expected_status=status.HTTP_201_CREATED)
def test_can_upload_labeling_jsonl(self):
self.upload_test_helper(url=self.labeling_url,
filename='example.labeling.jsonl',
filename='labeling.jsonl',
format='json',
expected_status=status.HTTP_201_CREATED)
def test_can_upload_seq2seq_jsonl(self):
self.upload_test_helper(url=self.seq2seq_url,
filename='example.seq2seq.jsonl',
filename='seq2seq.jsonl',
format='json',
expected_status=status.HTTP_201_CREATED)
@ -904,54 +904,74 @@ class TestFileHandler(APITestCase):
with open(os.path.join(DATA_DIR, filename), mode='rb') as f:
handler.handle_uploaded_file(f, self.super_user)
def test_conll_handler(self):
self.handler_test_helper(filename='example.valid.conll',
def test_give_valid_data_to_conll_handler(self):
self.handler_test_helper(filename='labeling.conll',
handler=CoNLLHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 3) # LOC, PER, O
self.assertEqual(SequenceAnnotation.objects.count(), 20) # num of annotation line
def test_conll_invalid_handler(self):
def test_give_invalid_data_to_conll_handler(self):
with self.assertRaises(FileParseException):
self.handler_test_helper(filename='example.invalid.conll',
self.handler_test_helper(filename='labeling.invalid.conll',
handler=CoNLLHandler(self.project))
self.assertEqual(Document.objects.count(), 0)
self.assertEqual(Label.objects.count(), 0)
self.assertEqual(SequenceAnnotation.objects.count(), 0)
def test_csv_classification_handler(self):
self.handler_test_helper(filename='example.valid.2.csv',
def test_give_valid_data_to_csv_classification_handler(self):
self.handler_test_helper(filename='example.csv',
handler=CSVClassificationHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 2)
self.assertEqual(DocumentAnnotation.objects.count(), 3)
def test_csv_seq2seq_handler(self):
self.handler_test_helper(filename='example.valid.2.csv',
def test_give_valid_data_to_csv_seq2seq_handler(self):
self.handler_test_helper(filename='example.csv',
handler=CSVSeq2seqHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Seq2seqAnnotation.objects.count(), 3)
def test_json_classification_handler(self):
self.handler_test_helper(filename='example.classification.jsonl',
def test_give_valid_data_to_json_classification_handler(self):
self.handler_test_helper(filename='classification.jsonl',
handler=JsonClassificationHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 2)
self.assertEqual(DocumentAnnotation.objects.count(), 4)
def test_json_labeling_handler(self):
self.handler_test_helper(filename='example.labeling.jsonl',
def test_give_valid_data_to_json_labeling_handler(self):
self.handler_test_helper(filename='labeling.jsonl',
handler=JsonLabelingHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 3)
self.assertEqual(SequenceAnnotation.objects.count(), 4)
def test_json_seq2seq_handler(self):
self.handler_test_helper(filename='example.seq2seq.jsonl',
def test_give_valid_data_to_json_seq2seq_handler(self):
self.handler_test_helper(filename='seq2seq.jsonl',
handler=JsonSeq2seqHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Seq2seqAnnotation.objects.count(), 4)
def test_give_data_without_label_to_json_classification_handler(self):
self.handler_test_helper(filename='example.jsonl',
handler=JsonClassificationHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 0)
self.assertEqual(DocumentAnnotation.objects.count(), 0)
def test_give_data_without_label_to_json_labeling_handler(self):
self.handler_test_helper(filename='example.jsonl',
handler=JsonLabelingHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Label.objects.count(), 0)
self.assertEqual(SequenceAnnotation.objects.count(), 0)
def test_give_data_without_label_to_json_seq2seq_handler(self):
self.handler_test_helper(filename='example.jsonl',
handler=JsonSeq2seqHandler(self.project))
self.assertEqual(Document.objects.count(), 3)
self.assertEqual(Seq2seqAnnotation.objects.count(), 0)
class TestDownloader(APITestCase):

Loading…
Cancel
Save