diff --git a/app/server/models.py b/app/server/models.py index 004adc23..6eec7fb7 100644 --- a/app/server/models.py +++ b/app/server/models.py @@ -184,6 +184,38 @@ class Document(models.Model): for a in annotations] return dataset + def to_json(self): + return self.make_dataset_json() + + def make_dataset_json(self): + if self.project.is_type_of(Project.DOCUMENT_CLASSIFICATION): + return self.make_dataset_for_classification_json() + elif self.project.is_type_of(Project.SEQUENCE_LABELING): + return self.make_dataset_for_sequence_labeling_json() + elif self.project.is_type_of(Project.Seq2seq): + return self.make_dataset_for_seq2seq_json() + + def make_dataset_for_classification_json(self): + annotations = self.get_annotations() + labels = [a.label.text for a in annotations] + username = annotations[0].user.username + dataset = {'doc_id': self.id, 'text': self.text, 'labels': labels, 'username': username} + return dataset + + def make_dataset_for_sequence_labeling_json(self): + annotations = self.get_annotations() + entities = [(a.start_offset, a.end_offset, a.label.text) for a in annotations] + username = annotations[0].user.username + dataset = {'doc_id': self.id, 'text': self.text, 'entities': entities, 'username': username} + return dataset + + def make_dataset_for_seq2seq_json(self): + annotations = self.get_annotations() + sentences = [a.text for a in annotations] + username = annotations[0].user.username + dataset = {'doc_id': self.id, 'text': self.text, 'sentences': sentences, 'username': username} + return dataset + def __str__(self): return self.text[:50] diff --git a/app/server/views.py b/app/server/views.py index 4b266eea..1ecd4c59 100644 --- a/app/server/views.py +++ b/app/server/views.py @@ -104,7 +104,7 @@ class DataDownloadFile(SuperUserMixin, LoginRequiredMixin, View): response = self.get_json(filename, docs) return response except: - return HttpResponseRedirect(reverse('download', args=[project.id])) + return HttpResponseRedirect(reverse('upload', args=[project.id])) def get_csv(self, filename, docs): response = HttpResponse(content_type='text/csv') @@ -115,7 +115,13 @@ class DataDownloadFile(SuperUserMixin, LoginRequiredMixin, View): return response def get_json(self, filename, docs): - pass + response = HttpResponse(content_type='text/json') + response['Content-Disposition'] = 'attachment; filename="{}.json"'.format(filename) + for d in docs: + dump = json.dumps(d.to_json(), ensure_ascii=False) + response.write(dump + '\n') # write each json object end with a newline + print('dump done') + return response class DemoTextClassification(TemplateView):