import abc import collections.abc from typing import Any, Dict, Iterator, List, Type from .cleaners import Cleaner from .data import BaseData from .exceptions import FileParseException from .labels import Label DEFAULT_TEXT_COLUMN = "text" DEFAULT_LABEL_COLUMN = "label" class Record: """Record represents a data.""" def __init__( self, data: Type[BaseData], label: List[Label] = None, meta: Dict[Any, Any] = None, line_num: int = -1 ): if label is None: label = [] if meta is None: meta = {} self._data = data self._label = label self._meta = meta self._line_num = line_num def __str__(self): return f"{self._data}\t{self._label}" def clean(self, cleaner: Cleaner): label = cleaner.clean(self._label) changed = len(label) != len(self.label) self._label = label if changed: raise FileParseException(filename=self._data.filename, line_num=self._line_num, message=cleaner.message) @property def data(self): return self._data def create_data(self, project): return self._data.create(project, self._meta) def create_label(self, project): return [label.create(project) for label in self._label] def create_annotation(self, user, example, mapping): return [label.create_annotation(user, example, mapping) for label in self._label] @property def label(self): return [label.dict() for label in self._label if label.has_name() and label.name] class BaseReader(collections.abc.Iterable): """Reader has a role to parse files and return a Record iterator.""" @abc.abstractmethod def __iter__(self) -> Iterator[Record]: """Creates an iterator for elements of this dataset. Returns: A `Record` for the elements of this dataset. """ raise NotImplementedError("Please implement this method in the subclass.") @property @abc.abstractmethod def errors(self): raise NotImplementedError("Please implement this method in the subclass.") class Parser(abc.ABC): """The abstract file parser.""" @abc.abstractmethod def parse(self, filename: str) -> Iterator[Dict[Any, Any]]: """Parses the file and returns the dictionary.""" raise NotImplementedError("Please implement this method in the subclass.") @property def errors(self) -> List[FileParseException]: """Returns parsing errors.""" return [] class Builder(abc.ABC): """The abstract Record builder.""" @abc.abstractmethod def build(self, row: Dict[Any, Any], filename: str, line_num: int) -> Record: """Builds the record from the dictionary.""" raise NotImplementedError("Please implement this method in the subclass.") class Reader(BaseReader): def __init__(self, filenames: List[str], parser: Parser, builder: Builder): self.filenames = filenames self.parser = parser self.builder = builder self._errors: List[FileParseException] = [] def __iter__(self) -> Iterator[Record]: for filename in self.filenames: rows = self.parser.parse(filename) for line_num, row in enumerate(rows, start=1): try: yield self.builder.build(row, filename, line_num) except FileParseException as e: self._errors.append(e) @property def errors(self) -> List[FileParseException]: """Aggregates parser and builder errors.""" errors = self.parser.errors + self._errors return errors