You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
2.6 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. from collections import defaultdict
  2. from typing import Dict, List, Type
  3. from pydantic import BaseModel
  4. from typing_extensions import Literal
  5. from . import examples
  6. from projects.models import (
  7. DOCUMENT_CLASSIFICATION,
  8. IMAGE_CLASSIFICATION,
  9. INTENT_DETECTION_AND_SLOT_FILLING,
  10. SEQ2SEQ,
  11. SEQUENCE_LABELING,
  12. SPEECH2TEXT,
  13. )
  14. class Format:
  15. name = ""
  16. @classmethod
  17. def dict(cls):
  18. return {
  19. "name": cls.name,
  20. }
  21. class CSV(Format):
  22. name = "CSV"
  23. extension = "csv"
  24. class FastText(Format):
  25. name = "fastText"
  26. extension = "txt"
  27. class JSON(Format):
  28. name = "JSON"
  29. extension = "json"
  30. class JSONL(Format):
  31. name = "JSONL"
  32. extension = "jsonl"
  33. class IntentAndSlot(Format):
  34. name = "JSONL(intent and slot)"
  35. extension = "jsonl"
  36. class JSONLRelation(Format):
  37. name = "JSONL(relation)"
  38. extension = "jsonl"
  39. class OptionDelimiter(BaseModel):
  40. delimiter: Literal[",", "\t", ";", "|", " "] = ","
  41. class OptionNone(BaseModel):
  42. pass
  43. class Options:
  44. options: Dict[str, List] = defaultdict(list)
  45. @classmethod
  46. def filter_by_task(cls, task_name: str):
  47. options = cls.options[task_name]
  48. return [
  49. {**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options
  50. ]
  51. @classmethod
  52. def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], example: str):
  53. cls.options[task].append((file_format, option, example))
  54. # Text Classification
  55. Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)
  56. Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)
  57. Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)
  58. Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
  59. # Sequence Labeling
  60. Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
  61. Options.register(SEQUENCE_LABELING, JSONLRelation, OptionNone, examples.ENTITY_AND_RELATION_JSONL)
  62. # Sequence to sequence
  63. Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)
  64. Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)
  65. Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
  66. # Intent detection and slot filling
  67. Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)
  68. # Image Classification
  69. Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)
  70. # Speech to Text
  71. Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)