|
|
@ -184,6 +184,38 @@ class Document(models.Model): |
|
|
|
for a in annotations] |
|
|
|
return dataset |
|
|
|
|
|
|
|
def to_json(self): |
|
|
|
return self.make_dataset_json() |
|
|
|
|
|
|
|
def make_dataset_json(self): |
|
|
|
if self.project.is_type_of(Project.DOCUMENT_CLASSIFICATION): |
|
|
|
return self.make_dataset_for_classification_json() |
|
|
|
elif self.project.is_type_of(Project.SEQUENCE_LABELING): |
|
|
|
return self.make_dataset_for_sequence_labeling_json() |
|
|
|
elif self.project.is_type_of(Project.Seq2seq): |
|
|
|
return self.make_dataset_for_seq2seq_json() |
|
|
|
|
|
|
|
def make_dataset_for_classification_json(self): |
|
|
|
annotations = self.get_annotations() |
|
|
|
labels = [a.label.text for a in annotations] |
|
|
|
username = annotations[0].user.username |
|
|
|
dataset = {'doc_id': self.id, 'text': self.text, 'labels': labels, 'username': username} |
|
|
|
return dataset |
|
|
|
|
|
|
|
def make_dataset_for_sequence_labeling_json(self): |
|
|
|
annotations = self.get_annotations() |
|
|
|
entities = [(a.start_offset, a.end_offset, a.label.text) for a in annotations] |
|
|
|
username = annotations[0].user.username |
|
|
|
dataset = {'doc_id': self.id, 'text': self.text, 'entities': entities, 'username': username} |
|
|
|
return dataset |
|
|
|
|
|
|
|
def make_dataset_for_seq2seq_json(self): |
|
|
|
annotations = self.get_annotations() |
|
|
|
sentences = [a.text for a in annotations] |
|
|
|
username = annotations[0].user.username |
|
|
|
dataset = {'doc_id': self.id, 'text': self.text, 'sentences': sentences, 'username': username} |
|
|
|
return dataset |
|
|
|
|
|
|
|
def __str__(self): |
|
|
|
return self.text[:50] |
|
|
|
|
|
|
|