diff --git a/backend/data_export/pipeline/catalog.py b/backend/data_export/pipeline/catalog.py index 8ec81bb3..de384081 100644 --- a/backend/data_export/pipeline/catalog.py +++ b/backend/data_export/pipeline/catalog.py @@ -79,25 +79,25 @@ class Options: # Text Classification -Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV) -Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText) -Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON) -Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL) +Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.CATEGORY_CSV) +Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.CATEGORY_FASTTEXT) +Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.CATEGORY_JSON) +Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.CATEGORY_JSONL) # Sequence Labeling -Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL) +Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.SPAN_JSONL) Options.register(SEQUENCE_LABELING, JSONLRelation, OptionNone, examples.ENTITY_AND_RELATION_JSONL) # Sequence to sequence -Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV) -Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON) -Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL) +Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.TEXT_CSV) +Options.register(SEQ2SEQ, JSON, OptionNone, examples.TEXT_JSON) +Options.register(SEQ2SEQ, JSONL, OptionNone, examples.TEXT_JSONL) # Intent detection and slot filling Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL) # Image Classification -Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification) +Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CATEGORY_IMAGE_CLASSIFICATION) # Speech to Text -Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text) +Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.SPEECH_TO_TEXT) diff --git a/backend/data_export/pipeline/examples.py b/backend/data_export/pipeline/examples.py index f6ba84ed..8c75f2d8 100644 --- a/backend/data_export/pipeline/examples.py +++ b/backend/data_export/pipeline/examples.py @@ -1,17 +1,17 @@ -Category_CSV = """ +CATEGORY_CSV = """ text,label "Terrible customer service.","negative" "Really great transaction.","positive" "Great price.","positive" """ -Category_fastText = """ +CATEGORY_FASTTEXT = """ __label__negative Terrible customer service. __label__positive Really great transaction. __label__positive Great price. """ -Category_JSON = """ +CATEGORY_JSON = """ [ { "text": "Terrible customer service.", @@ -20,20 +20,20 @@ Category_JSON = """ ] """ -Category_JSONL = """ +CATEGORY_JSONL = """ {"text": "Terrible customer service.", "label": ["negative"]} {"text": "Really great transaction.", "label": ["positive"]} {"text": "Great price.", "label": ["positive"]} """ -Text_CSV = """ +TEXT_CSV = """ text,label "Hello!","こんにちは!" "Good morning.","おはようございます。" "See you.","さようなら。" """ -Text_JSON = """ +TEXT_JSON = """ [ { "text": "Hello!", @@ -42,13 +42,13 @@ Text_JSON = """ ] """ -Text_JSONL = """ +TEXT_JSONL = """ {"text": "Hello!", "label": ["こんにちは!"]} {"text": "Good morning.", "label": ["おはようございます。"]} {"text": "See you.", "label": ["さようなら。"]} """ -Offset_JSONL = """ +SPAN_JSONL = """ {"text": "EU rejects German call to boycott British lamb.", "label": [ [0, 2, "ORG"], [11, 17, "MISC"], ... ]} {"text": "Peter Blackburn", "label": [ [0, 15, "PERSON"] ]} {"text": "President Obama", "label": [ [10, 15, "PERSON"] ]} @@ -106,7 +106,7 @@ ENTITY_AND_RELATION_JSONL = """ } """ -CategoryImageClassification = """ +CATEGORY_IMAGE_CLASSIFICATION = """ [ { "filename": "20210514.png", @@ -115,7 +115,7 @@ CategoryImageClassification = """ ] """ -Speech2Text = """ +SPEECH_TO_TEXT = """ [ { "filename": "20210514.mp3",