diff --git a/backend/examples/assignment/__init__.py b/backend/examples/assignment/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/examples/assignment/strategies.py b/backend/examples/assignment/strategies.py index 36b52df8..eb3e8095 100644 --- a/backend/examples/assignment/strategies.py +++ b/backend/examples/assignment/strategies.py @@ -1,5 +1,6 @@ import abc import dataclasses +import random from typing import List import numpy as np @@ -17,7 +18,7 @@ class BaseStrategy(abc.ABC): ... -class WeightedRandomStrategy: +class WeightedRandomStrategy(BaseStrategy): def __init__(self, dataset_size: int, weights: List[int]): assert sum(weights) == 100 self.dataset_size = dataset_size @@ -27,3 +28,19 @@ class WeightedRandomStrategy: proba = np.array(self.weights) / 100 assignees = np.random.choice(range(len(self.weights)), size=self.dataset_size, p=proba) return [Assignment(user=user, example=example) for example, user in enumerate(assignees)] + + +class SamplingWithoutReplacementStrategy(BaseStrategy): + def __init__(self, dataset_size: int, weights: List[int]): + assert 0 <= sum(weights) <= 100 * len(weights) + self.dataset_size = dataset_size + self.weights = weights + + def assign(self) -> List[Assignment]: + assignments = [] + proba = np.array(self.weights) / 100 + for user, p in enumerate(proba): + count = int(self.dataset_size * p) + examples = random.sample(range(self.dataset_size), count) + assignments.extend([Assignment(user=user, example=example) for example in examples]) + return assignments