From 7a4d0c5c7b3f32091333cf07ecad139d58587649 Mon Sep 17 00:00:00 2001 From: Hironsan Date: Fri, 25 Feb 2022 11:00:32 +0900 Subject: [PATCH] Add export catalog for relation extraction --- backend/data_export/pipeline/catalog.py | 6 +++ backend/data_export/pipeline/examples.py | 52 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/backend/data_export/pipeline/catalog.py b/backend/data_export/pipeline/catalog.py index 58751145..3eb74b43 100644 --- a/backend/data_export/pipeline/catalog.py +++ b/backend/data_export/pipeline/catalog.py @@ -50,6 +50,11 @@ class IntentAndSlot(Format): extension = "jsonl" +class JSONLRelation(Format): + name = "JSONL(relation)" + extension = "jsonl" + + class OptionDelimiter(BaseModel): delimiter: Literal[",", "\t", ";", "|", " "] = "," @@ -79,6 +84,7 @@ Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_J # Sequence Labeling Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL) +Options.register(SEQUENCE_LABELING, JSONLRelation, OptionNone, examples.ENTITY_AND_RELATION_JSONL) # Sequence to sequence Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV) diff --git a/backend/data_export/pipeline/examples.py b/backend/data_export/pipeline/examples.py index e72fc09b..f6ba84ed 100644 --- a/backend/data_export/pipeline/examples.py +++ b/backend/data_export/pipeline/examples.py @@ -54,6 +54,58 @@ Offset_JSONL = """ {"text": "President Obama", "label": [ [10, 15, "PERSON"] ]} """ +ENTITY_AND_RELATION_JSONL = """ +{ + "text": "Google was founded on September 4, 1998, by Larry Page and Sergey Brin.", + "entities": [ + { + "id": 0, + "start_offset": 0, + "end_offset": 6, + "label": "ORG" + }, + { + "id": 1, + "start_offset": 22, + "end_offset": 39, + "label": "DATE" + }, + { + "id": 2, + "start_offset": 44, + "end_offset": 54, + "label": "PERSON" + }, + { + "id": 3, + "start_offset": 59, + "end_offset": 70, + "label": "PERSON" + } + ], + "relations": [ + { + "id": 0, + "from_id": 0, + "to_id": 1, + "type": "foundedAt" + }, + { + "id": 1, + "from_id": 0, + "to_id": 2, + "type": "foundedBy" + }, + { + "id": 2, + "from_id": 0, + "to_id": 3, + "type": "foundedBy" + } + ] +} +""" + CategoryImageClassification = """ [ {