You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.3 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. from collections import defaultdict
  2. from pathlib import Path
  3. from typing import Dict, List, Type
  4. from projects.models import (
  5. BOUNDING_BOX,
  6. DOCUMENT_CLASSIFICATION,
  7. IMAGE_CAPTIONING,
  8. IMAGE_CLASSIFICATION,
  9. INTENT_DETECTION_AND_SLOT_FILLING,
  10. SEGMENTATION,
  11. SEQ2SEQ,
  12. SEQUENCE_LABELING,
  13. SPEECH2TEXT,
  14. )
  15. EXAMPLE_DIR = Path(__file__).parent.resolve() / "examples"
  16. class Format:
  17. name = ""
  18. @classmethod
  19. def dict(cls):
  20. return {
  21. "name": cls.name,
  22. }
  23. class CSV(Format):
  24. name = "CSV"
  25. class FastText(Format):
  26. name = "fastText"
  27. class JSON(Format):
  28. name = "JSON"
  29. class JSONL(Format):
  30. name = "JSONL"
  31. class Options:
  32. options: Dict[str, List] = defaultdict(list)
  33. @classmethod
  34. def filter_by_task(cls, task_name: str, use_relation: bool = False):
  35. options = cls.options[task_name]
  36. return [
  37. {**file_format.dict(), "example": example}
  38. for file_format, example, use_rel in options
  39. if use_rel == use_relation
  40. ]
  41. @classmethod
  42. def register(cls, task: str, file_format: Type[Format], file: Path, use_relation: bool = False):
  43. example = cls.load_example(file)
  44. cls.options[task].append((file_format, example, use_relation))
  45. @staticmethod
  46. def load_example(file):
  47. with open(file, encoding="utf-8") as f:
  48. return f.read()
  49. # Text Classification
  50. TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
  51. Options.register(DOCUMENT_CLASSIFICATION, CSV, TEXT_CLASSIFICATION_DIR / "example.csv")
  52. Options.register(DOCUMENT_CLASSIFICATION, FastText, TEXT_CLASSIFICATION_DIR / "example.txt")
  53. Options.register(DOCUMENT_CLASSIFICATION, JSON, TEXT_CLASSIFICATION_DIR / "example.json")
  54. Options.register(DOCUMENT_CLASSIFICATION, JSONL, TEXT_CLASSIFICATION_DIR / "example.jsonl")
  55. # Sequence Labeling
  56. SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
  57. RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
  58. Options.register(SEQUENCE_LABELING, JSONL, SEQUENCE_LABELING_DIR / "example.jsonl")
  59. Options.register(SEQUENCE_LABELING, JSONL, RELATION_EXTRACTION_DIR / "example.jsonl", True)
  60. # Sequence to sequence
  61. SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
  62. Options.register(SEQ2SEQ, CSV, SEQ2SEQ_DIR / "example.csv")
  63. Options.register(SEQ2SEQ, JSON, SEQ2SEQ_DIR / "example.json")
  64. Options.register(SEQ2SEQ, JSONL, SEQ2SEQ_DIR / "example.jsonl")
  65. # Intent detection and slot filling
  66. INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
  67. Options.register(INTENT_DETECTION_AND_SLOT_FILLING, JSONL, INTENT_DETECTION_DIR / "example.jsonl")
  68. # Image Classification
  69. IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
  70. Options.register(IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "example.jsonl")
  71. BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box"
  72. Options.register(BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl")
  73. SEGMENTATION_DIR = EXAMPLE_DIR / "segmentation"
  74. Options.register(SEGMENTATION, JSONL, SEGMENTATION_DIR / "example.jsonl")
  75. IMAGE_CAPTIONING_DIR = EXAMPLE_DIR / "image_captioning"
  76. Options.register(IMAGE_CAPTIONING, JSONL, IMAGE_CAPTIONING_DIR / "example.jsonl")
  77. # Speech to Text
  78. SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
  79. Options.register(SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl")