Browse Source

Merge pull request #217 from erikamenezes/feature/json_text_labels_export_1

Enhancement/Alignment for import data format and export data format
pull/239/head
Hiroki Nakayama 5 years ago
committed by GitHub
parent
commit
4232830a96
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 3 deletions
  1. 11
      app/server/api.py
  2. 15
      app/server/static/components/download_sequence_labeling.vue
  3. 3
      app/server/static/components/examples/download_sequence_labeling.json1l
  4. 5
      app/server/tests/test_api.py
  5. 20
      app/server/utils.py

11
app/server/api.py

@ -290,13 +290,20 @@ class TextDownloadAPI(APIView):
project = get_object_or_404(Project, pk=self.kwargs['project_id'])
documents = project.documents.all()
painter = self.select_painter(format)
data = painter.paint(documents)
# json1 format prints text labels while json format prints annotations with label ids
# json1 format - "labels": [[0, 15, "PERSON"], ..]
# json format - "annotations": [{"label": 5, "start_offset": 0, "end_offset": 2, "user": 1},..]
if format == "json1":
labels = project.labels.all()
data = JSONPainter.paint_labels(documents, labels)
else:
data = painter.paint(documents)
return Response(data)
def select_painter(self, format):
if format == 'csv':
return CSVPainter()
elif format == 'json':
elif format == 'json' or format == "json1":
return JSONPainter()
else:
raise ValidationError('format {} is invalid.'.format(format))

15
app/server/static/components/download_sequence_labeling.vue

@ -12,11 +12,26 @@ block select-format-area
)
| JSONL
label.radio
input(
type="radio"
name="format"
value="json1"
v-bind:checked="format == 'json1'"
v-model="format"
)
| JSON(Text-Labels)
block example-format-area
pre.code-block(v-show="format == 'json'")
code.json
include ./examples/download_sequence_labeling.jsonl
| ...
pre.code-block(v-show="format == 'json1'")
code.json
include ./examples/download_sequence_labeling.json1l
| ...
</template>
<script>

3
app/server/static/components/examples/download_sequence_labeling.json1l

@ -0,0 +1,3 @@
{"id": 1, "text": "EU rejects ...", "labels": [[0,2,"ORG"], [11,17, "MISC"], [34,41,"ORG"]]}
{"id": 2, "text": "Peter Blackburn", "labels": [[0, 15, "PERSON"]]}
{"id": 3, "text": "President Obama", "labels": [[10, 15, "PERSON"]]}

5
app/server/tests/test_api.py

@ -1013,6 +1013,11 @@ class TestDownloader(APITestCase):
format='json',
expected_status=status.HTTP_200_OK)
def test_can_download_labelling_json1(self):
self.download_test_helper(url=self.labeling_url,
format='json1',
expected_status=status.HTTP_200_OK)
def test_can_download_plain_text(self):
self.download_test_helper(url=self.classification_url,
format='plain',

20
app/server/utils.py

@ -352,6 +352,7 @@ class JSONParser(FileParser):
data = []
try:
j = json.loads(line)
#j = json.loads(line.decode('utf-8'))
j['meta'] = json.dumps(j.get('meta', {}))
data.append(j)
except json.decoder.JSONDecodeError:
@ -378,7 +379,6 @@ class JSONLRenderer(JSONRenderer):
ensure_ascii=self.ensure_ascii,
allow_nan=not self.strict) + '\n'
class JSONPainter(object):
def paint(self, documents):
@ -393,6 +393,24 @@ class JSONPainter(object):
data.append(d)
return data
@staticmethod
def paint_labels(documents, labels):
serializer_labels = LabelSerializer(labels, many=True)
serializer = DocumentSerializer(documents, many=True)
data = []
for d in serializer.data:
labels = []
for a in d['annotations']:
label_obj = [x for x in serializer_labels.data if x['id'] == a['label']][0]
label_text = label_obj['text']
label_start = a['start_offset']
label_end = a['end_offset']
labels.append([label_start, label_end, label_text])
d.pop('annotations')
d['labels'] = labels
d['meta'] = json.loads(d['meta'])
data.append(d)
return data
class CSVPainter(JSONPainter):

Loading…
Cancel
Save