You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
2.3 KiB

  1. from collections import defaultdict
  2. from typing import Dict, List, Type
  3. from pydantic import BaseModel
  4. from typing_extensions import Literal
  5. from ...models import (DOCUMENT_CLASSIFICATION, IMAGE_CLASSIFICATION, SEQ2SEQ,
  6. SEQUENCE_LABELING, SPEECH2TEXT)
  7. from . import examples
  8. class Format:
  9. name = ''
  10. @classmethod
  11. def dict(cls):
  12. return {
  13. 'name': cls.name,
  14. }
  15. class CSV(Format):
  16. name = 'CSV'
  17. extension = 'csv'
  18. class FastText(Format):
  19. name = 'fastText'
  20. extension = 'txt'
  21. class JSON(Format):
  22. name = 'JSON'
  23. extension = 'json'
  24. class JSONL(Format):
  25. name = 'JSONL'
  26. extension = 'jsonl'
  27. class OptionDelimiter(BaseModel):
  28. delimiter: Literal[',', '\t', ';', '|', ' '] = ','
  29. class OptionNone(BaseModel):
  30. pass
  31. class Options:
  32. options: Dict[str, List] = defaultdict(list)
  33. @classmethod
  34. def filter_by_task(cls, task_name: str):
  35. options = cls.options[task_name]
  36. return [
  37. {
  38. **format.dict(),
  39. **option.schema(),
  40. 'example': example
  41. } for format, option, example in options
  42. ]
  43. @classmethod
  44. def register(cls,
  45. task: str,
  46. format: Type[Format],
  47. option: Type[BaseModel],
  48. example: str):
  49. cls.options[task].append((format, option, example))
  50. # Text Classification
  51. Options.register(DOCUMENT_CLASSIFICATION, CSV, OptionDelimiter, examples.Category_CSV)
  52. Options.register(DOCUMENT_CLASSIFICATION, FastText, OptionNone, examples.Category_fastText)
  53. Options.register(DOCUMENT_CLASSIFICATION, JSON, OptionNone, examples.Category_JSON)
  54. Options.register(DOCUMENT_CLASSIFICATION, JSONL, OptionNone, examples.Category_JSONL)
  55. # Sequence Labeling
  56. Options.register(SEQUENCE_LABELING, JSONL, OptionNone, examples.Offset_JSONL)
  57. # Sequence to sequence
  58. Options.register(SEQ2SEQ, CSV, OptionDelimiter, examples.Text_CSV)
  59. Options.register(SEQ2SEQ, JSON, OptionNone, examples.Text_JSON)
  60. Options.register(SEQ2SEQ, JSONL, OptionNone, examples.Text_JSONL)
  61. # Image Classification
  62. Options.register(IMAGE_CLASSIFICATION, JSONL, OptionNone, examples.CategoryImageClassification)
  63. # Speech to Text
  64. Options.register(SPEECH2TEXT, JSONL, OptionNone, examples.Speech2Text)