You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

30 lines
968 B

  1. from typing import Any, Dict, Iterator
  2. import pandas as pd
  3. from django.db.models.query import QuerySet
  4. from .labels import Labels
  5. from examples.models import Example
  6. def filter_examples(examples: QuerySet[Example], is_collaborative=False, confirmed_only=False, user=None):
  7. if is_collaborative and confirmed_only:
  8. return examples.exclude(states=None)
  9. elif not is_collaborative and confirmed_only:
  10. assert user is not None
  11. return examples.filter(states__confirmed_by=user)
  12. else:
  13. return examples
  14. class Dataset:
  15. def __init__(self, examples: QuerySet[Example], labels: Labels):
  16. self.examples = examples
  17. self.labels = labels
  18. def __iter__(self) -> Iterator[Dict[str, Any]]:
  19. for example in self.examples:
  20. yield {"id": example.id, "data": example.text, **example.meta, **self.labels.find_by(example.id)}
  21. def to_dataframe(self) -> pd.DataFrame:
  22. return pd.DataFrame(self)