mirror of https://github.com/doccano/doccano.git
pythondatasetsactive-learningtext-annotationdatasetnatural-language-processingdata-labelingmachine-learningannotation-tool
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
968 B
30 lines
968 B
from typing import Any, Dict, Iterator, List
|
|
|
|
import pandas as pd
|
|
from django.db.models.query import QuerySet
|
|
|
|
from .comments import Comments
|
|
from .labels import Labels
|
|
from data_export.models import ExportedExample
|
|
|
|
|
|
class Dataset:
|
|
def __init__(
|
|
self, examples: QuerySet[ExportedExample], labels: List[Labels], comments: List[Comments], is_text_project=True
|
|
):
|
|
self.examples = examples
|
|
self.labels = labels
|
|
self.is_text_project = is_text_project
|
|
self.comments = comments
|
|
|
|
def __iter__(self) -> Iterator[Dict[str, Any]]:
|
|
for example in self.examples:
|
|
data = example.to_dict(self.is_text_project)
|
|
for labels in self.labels:
|
|
data.update(**labels.find_by(example.id))
|
|
for comment in self.comments:
|
|
data.update(**comment.find_by(example.id))
|
|
yield data
|
|
|
|
def to_dataframe(self) -> pd.DataFrame:
|
|
return pd.DataFrame(self)
|