You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
2.6 KiB

  1. from collections import defaultdict
  2. from typing import Dict, List, Type
  3. from pydantic import BaseModel
  4. from typing_extensions import Literal
  5. from api.models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION,
  6. INTENT_DETECTION_AND_SLOT_FILLING, SEQ2SEQ,
  7. SEQUENCE_LABELING, SPEECH2TEXT)
  8. from . import examples
  9. class Format:
  10. name = ''
  11. @classmethod
  12. def dict(cls):
  13. return {
  14. 'name': cls.name,
  15. }
  16. class CSV(Format):
  17. name = 'CSV'
  18. extension = 'csv'
  19. class FastText(Format):
  20. name = 'fastText'
  21. extension = 'txt'
  22. class JSON(Format):
  23. name = 'JSON'
  24. extension = 'json'
  25. class JSONL(Format):
  26. name = 'JSONL'
  27. extension = 'jsonl'
  28. class IntentAndSlot(Format):
  29. name = 'JSONL(intent and slot)'
  30. extension = 'jsonl'
  31. class OptionDelimiter(BaseModel):
  32. delimiter: Literal[',', '\t', ';', '|', ' '] = ','
  33. class OptionNone(BaseModel):
  34. pass
  35. class Options:
  36. options: Dict[str, List] = defaultdict(list)
  37. @classmethod
  38. def filter_by_task(cls, task_name: str):
  39. options = cls.options[task_name]
  40. return [
  41. {
  42. **format.dict(),
  43. **option.schema(),
  44. 'example': example
  45. } for format, option, example in options
  46. ]
  47. @classmethod
  48. def register(cls,
  49. task: str,
  50. format: Type[Format],
  51. option: Type[BaseModel],
  52. example: str):
  53. cls.options[task].append((format, option, example))
  54. # Text Classification
  55. Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)
  56. Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)
  57. Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)
  58. Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
  59. # Sequence Labeling
  60. Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
  61. # Sequence to sequence
  62. Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)
  63. Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)
  64. Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
  65. # Intent detection and slot filling
  66. Options.register(INTENT_DETECTION_AND_SLOT_FILLING, IntentAndSlot, OptionNone, examples.INTENT_JSONL)
  67. # Image Classification
  68. Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)
  69. # Speech to Text
  70. Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)