Browse Source

Add export catalog for relation extraction

pull/1703/head
Hironsan 3 years ago
parent
commit
7a4d0c5c7b
2 changed files with 58 additions and 0 deletions
  1. 6
      backend/data_export/pipeline/catalog.py
  2. 52
      backend/data_export/pipeline/examples.py

6
backend/data_export/pipeline/catalog.py

@ -50,6 +50,11 @@ class IntentAndSlot(Format):
extension = "jsonl"
class JSONLRelation(Format):
name = "JSONL(relation)"
extension = "jsonl"
class OptionDelimiter(BaseModel):
delimiter: Literal[",", "\t", ";", "|", " "] = ","
@ -79,6 +84,7 @@ Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_J
# Sequence Labeling
Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
Options.register(SEQUENCE_LABELING, JSONLRelation, OptionNone, examples.ENTITY_AND_RELATION_JSONL)
# Sequence to sequence
Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)

52
backend/data_export/pipeline/examples.py

@ -54,6 +54,58 @@ Offset_JSONL = """
{"text": "President Obama", "label": [ [10, 15, "PERSON"] ]}
"""
ENTITY_AND_RELATION_JSONL = """
{
"text": "Google was founded on September 4, 1998, by Larry Page and Sergey Brin.",
"entities": [
{
"id": 0,
"start_offset": 0,
"end_offset": 6,
"label": "ORG"
},
{
"id": 1,
"start_offset": 22,
"end_offset": 39,
"label": "DATE"
},
{
"id": 2,
"start_offset": 44,
"end_offset": 54,
"label": "PERSON"
},
{
"id": 3,
"start_offset": 59,
"end_offset": 70,
"label": "PERSON"
}
],
"relations": [
{
"id": 0,
"from_id": 0,
"to_id": 1,
"type": "foundedAt"
},
{
"id": 1,
"from_id": 0,
"to_id": 2,
"type": "foundedBy"
},
{
"id": 2,
"from_id": 0,
"to_id": 3,
"type": "foundedBy"
}
]
}
"""
CategoryImageClassification = """
[
{

Loading…
Cancel
Save