You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

24 lines
763 B

  1. from typing import Any, Dict, Iterator, List
  2. import pandas as pd
  3. from django.db.models.query import QuerySet
  4. from .labels import Labels
  5. from data_export.models import ExportedExample
  6. class Dataset:
  7. def __init__(self, examples: QuerySet[ExportedExample], labels: List[Labels], is_text_project=True):
  8. self.examples = examples
  9. self.labels = labels
  10. self.is_text_project = is_text_project
  11. def __iter__(self) -> Iterator[Dict[str, Any]]:
  12. for example in self.examples:
  13. data = example.to_dict(self.is_text_project)
  14. for labels in self.labels:
  15. data.update(**labels.find_by(example.id))
  16. yield data
  17. def to_dataframe(self) -> pd.DataFrame:
  18. return pd.DataFrame(self)