You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
2.4 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. from collections import defaultdict
  2. from typing import Dict, List, Type
  3. from pydantic import BaseModel
  4. from typing_extensions import Literal
  5. from . import examples
  6. from projects.models import (
  7. DOCUMENT_CLASSIFICATION,
  8. IMAGE_CLASSIFICATION,
  9. INTENT_DETECTION_AND_SLOT_FILLING,
  10. SEQ2SEQ,
  11. SEQUENCE_LABELING,
  12. SPEECH2TEXT,
  13. )
  14. class Format:
  15. name = ""
  16. @classmethod
  17. def dict(cls):
  18. return {
  19. "name": cls.name,
  20. }
  21. class CSV(Format):
  22. name = "CSV"
  23. extension = "csv"
  24. class FastText(Format):
  25. name = "fastText"
  26. extension = "txt"
  27. class JSON(Format):
  28. name = "JSON"
  29. extension = "json"
  30. class JSONL(Format):
  31. name = "JSONL"
  32. extension = "jsonl"
  33. class IntentAndSlot(Format):
  34. name = "JSONL(intent and slot)"
  35. extension = "jsonl"
  36. class OptionDelimiter(BaseModel):
  37. delimiter: Literal[",", "\t", ";", "|", " "] = ","
  38. class OptionNone(BaseModel):
  39. pass
  40. class Options:
  41. options: Dict[str, List] = defaultdict(list)
  42. @classmethod
  43. def filter_by_task(cls, task_name: str):
  44. options = cls.options[task_name]
  45. return [{**format.dict(), **option.schema(), "example": example} for format, option, example in options]
  46. @classmethod
  47. def register(cls, task: str, format: Type[Format], option: Type[BaseModel], example: str):
  48. cls.options[task].append((format, option, example))
  49. # Text Classification
  50. Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)
  51. Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)
  52. Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)
  53. Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
  54. # Sequence Labeling
  55. Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
  56. # Sequence to sequence
  57. Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)
  58. Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)
  59. Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
  60. # Intent detection and slot filling
  61. Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)
  62. # Image Classification
  63. Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)
  64. # Speech to Text
  65. Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)