From bd55097c4a8bbd65780f51725a205fa558960d9b Mon Sep 17 00:00:00 2001 From: Hironsan Date: Wed, 20 Apr 2022 22:01:47 +0900 Subject: [PATCH] Add dataset class to represent export data --- backend/data_export/pipeline/dataset.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 backend/data_export/pipeline/dataset.py diff --git a/backend/data_export/pipeline/dataset.py b/backend/data_export/pipeline/dataset.py new file mode 100644 index 00000000..1bfe8817 --- /dev/null +++ b/backend/data_export/pipeline/dataset.py @@ -0,0 +1,22 @@ +from typing import Any, Dict, Iterator, Type + +import pandas as pd +from django.db.models.query import QuerySet + +from .labels import Labels +from examples.models import Example + + +class Dataset: + def __init__(self, examples: QuerySet[Example], user, label_collection_class: Type[Labels], confirmed_only=False): + if confirmed_only: + examples = examples.filter(states__confirmed_by=user) + self.examples = examples + self.labels = label_collection_class(examples, user) + + def __iter__(self) -> Iterator[Dict[str, Any]]: + for example in self.examples: + yield {"id": example.id, "data": example.text, **example.meta, **self.labels.find_by(example.id)} + + def to_pandas(self) -> pd.DataFrame: + return pd.DataFrame(self)