|
|
import abc import collections.abc import dataclasses from typing import Any, Dict, Iterator, List, Type
from .cleaners import Cleaner from .data import BaseData from .exceptions import FileParseException from .labels import Label
DEFAULT_TEXT_COLUMN = "text" DEFAULT_LABEL_COLUMN = "label"
class Record: """Record represents a data."""
def __init__( self, data: Type[BaseData], label: List[Label] = None, meta: Dict[Any, Any] = None, line_num: int = -1 ): if label is None: label = [] if meta is None: meta = {} self._data = data self._label = label self._meta = meta self._line_num = line_num
def __str__(self): return f"{self._data}\t{self._label}"
def clean(self, cleaner: Cleaner): label = cleaner.clean(self._label) changed = len(label) != len(self.label) self._label = label if changed: raise FileParseException(filename=self._data.filename, line_num=self._line_num, message=cleaner.message)
@property def data(self): return self._data
def create_data(self, project): return self._data.create(project, self._meta)
def create_label(self, project): return [label.create(project) for label in self._label]
def create_annotation(self, user, example, mapping): return [label.create_annotation(user, example, mapping) for label in self._label]
@property def label(self): return [label.dict() for label in self._label if label.has_name() and label.name]
class BaseReader(collections.abc.Iterable): """Reader has a role to parse files and return a Record iterator."""
@abc.abstractmethod def __iter__(self) -> Iterator[Record]: """Creates an iterator for elements of this dataset.
Returns: A `Record` for the elements of this dataset. """
raise NotImplementedError("Please implement this method in the subclass.")
@property @abc.abstractmethod def errors(self): raise NotImplementedError("Please implement this method in the subclass.")
class Parser(abc.ABC): """The abstract file parser."""
@abc.abstractmethod def parse(self, filename: str) -> Iterator[Dict[Any, Any]]: """Parses the file and returns the dictionary.""" raise NotImplementedError("Please implement this method in the subclass.")
@property def errors(self) -> List[FileParseException]: """Returns parsing errors.""" return []
@dataclasses.dataclass class FileName: full_path: str generated_name: str upload_name: str
class Builder(abc.ABC): """The abstract Record builder."""
@abc.abstractmethod def build(self, row: Dict[Any, Any], filename: FileName, line_num: int) -> Record: """Builds the record from the dictionary.""" raise NotImplementedError("Please implement this method in the subclass.")
class Reader(BaseReader): def __init__(self, filenames: List[FileName], parser: Parser, builder: Builder): self.filenames = filenames self.parser = parser self.builder = builder self._errors: List[FileParseException] = []
def __iter__(self) -> Iterator[Record]: for filename in self.filenames: rows = self.parser.parse(filename.full_path) for line_num, row in enumerate(rows, start=1): try: yield self.builder.build(row, filename, line_num) except FileParseException as e: self._errors.append(e)
@property def errors(self) -> List[FileParseException]: """Aggregates parser and builder errors.""" errors = self.parser.errors + self._errors return errors
|