You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
3.2 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. from collections import defaultdict
  2. from pathlib import Path
  3. from typing import Dict, List, Type
  4. from projects.models import ProjectType
  5. EXAMPLE_DIR = Path(__file__).parent.resolve() / "examples"
  6. class Format:
  7. name = ""
  8. @classmethod
  9. def dict(cls):
  10. return {
  11. "name": cls.name,
  12. }
  13. class CSV(Format):
  14. name = "CSV"
  15. class FastText(Format):
  16. name = "fastText"
  17. class JSON(Format):
  18. name = "JSON"
  19. class JSONL(Format):
  20. name = "JSONL"
  21. class Options:
  22. options: Dict[str, List] = defaultdict(list)
  23. @classmethod
  24. def filter_by_task(cls, task_name: str, use_relation: bool = False):
  25. options = cls.options[task_name]
  26. return [
  27. {**file_format.dict(), "example": example}
  28. for file_format, example, use_rel in options
  29. if use_rel == use_relation
  30. ]
  31. @classmethod
  32. def register(cls, task: str, file_format: Type[Format], file: Path, use_relation: bool = False):
  33. example = cls.load_example(file)
  34. cls.options[task].append((file_format, example, use_relation))
  35. @staticmethod
  36. def load_example(file):
  37. with open(file, encoding="utf-8") as f:
  38. return f.read()
  39. # Text Classification
  40. TEXT_CLASSIFICATION_DIR = EXAMPLE_DIR / "text_classification"
  41. Options.register(ProjectType.DOCUMENT_CLASSIFICATION, CSV, TEXT_CLASSIFICATION_DIR / "example.csv")
  42. Options.register(ProjectType.DOCUMENT_CLASSIFICATION, FastText, TEXT_CLASSIFICATION_DIR / "example.txt")
  43. Options.register(ProjectType.DOCUMENT_CLASSIFICATION, JSON, TEXT_CLASSIFICATION_DIR / "example.json")
  44. Options.register(ProjectType.DOCUMENT_CLASSIFICATION, JSONL, TEXT_CLASSIFICATION_DIR / "example.jsonl")
  45. # Sequence Labeling
  46. SEQUENCE_LABELING_DIR = EXAMPLE_DIR / "sequence_labeling"
  47. RELATION_EXTRACTION_DIR = EXAMPLE_DIR / "relation_extraction"
  48. Options.register(ProjectType.SEQUENCE_LABELING, JSONL, SEQUENCE_LABELING_DIR / "example.jsonl")
  49. Options.register(ProjectType.SEQUENCE_LABELING, JSONL, RELATION_EXTRACTION_DIR / "example.jsonl", True)
  50. # Sequence to sequence
  51. SEQ2SEQ_DIR = EXAMPLE_DIR / "sequence_to_sequence"
  52. Options.register(ProjectType.SEQ2SEQ, CSV, SEQ2SEQ_DIR / "example.csv")
  53. Options.register(ProjectType.SEQ2SEQ, JSON, SEQ2SEQ_DIR / "example.json")
  54. Options.register(ProjectType.SEQ2SEQ, JSONL, SEQ2SEQ_DIR / "example.jsonl")
  55. # Intent detection and slot filling
  56. INTENT_DETECTION_DIR = EXAMPLE_DIR / "intent_detection"
  57. Options.register(ProjectType.INTENT_DETECTION_AND_SLOT_FILLING, JSONL, INTENT_DETECTION_DIR / "example.jsonl")
  58. # Image Classification
  59. IMAGE_CLASSIFICATION_DIR = EXAMPLE_DIR / "image_classification"
  60. Options.register(ProjectType.IMAGE_CLASSIFICATION, JSONL, IMAGE_CLASSIFICATION_DIR / "example.jsonl")
  61. BOUNDING_BOX_DIR = EXAMPLE_DIR / "bounding_box"
  62. Options.register(ProjectType.BOUNDING_BOX, JSONL, BOUNDING_BOX_DIR / "example.jsonl")
  63. SEGMENTATION_DIR = EXAMPLE_DIR / "segmentation"
  64. Options.register(ProjectType.SEGMENTATION, JSONL, SEGMENTATION_DIR / "example.jsonl")
  65. IMAGE_CAPTIONING_DIR = EXAMPLE_DIR / "image_captioning"
  66. Options.register(ProjectType.IMAGE_CAPTIONING, JSONL, IMAGE_CAPTIONING_DIR / "example.jsonl")
  67. # Speech to Text
  68. SPEECH2TEXT_DIR = EXAMPLE_DIR / "speech_to_text"
  69. Options.register(ProjectType.SPEECH2TEXT, JSONL, SPEECH2TEXT_DIR / "example.jsonl")