Browse Source

Add sampling without replacement strategy

pull/2261/head
Hironsan 1 year ago
parent
commit
e104cd3c5e
2 changed files with 18 additions and 1 deletions
  1. 0
      backend/examples/assignment/__init__.py
  2. 19
      backend/examples/assignment/strategies.py

0
backend/examples/assignment/__init__.py

19
backend/examples/assignment/strategies.py

@ -1,5 +1,6 @@
import abc
import dataclasses
import random
from typing import List
import numpy as np
@ -17,7 +18,7 @@ class BaseStrategy(abc.ABC):
...
class WeightedRandomStrategy:
class WeightedRandomStrategy(BaseStrategy):
def __init__(self, dataset_size: int, weights: List[int]):
assert sum(weights) == 100
self.dataset_size = dataset_size
@ -27,3 +28,19 @@ class WeightedRandomStrategy:
proba = np.array(self.weights) / 100
assignees = np.random.choice(range(len(self.weights)), size=self.dataset_size, p=proba)
return [Assignment(user=user, example=example) for example, user in enumerate(assignees)]
class SamplingWithoutReplacementStrategy(BaseStrategy):
def __init__(self, dataset_size: int, weights: List[int]):
assert 0 <= sum(weights) <= 100 * len(weights)
self.dataset_size = dataset_size
self.weights = weights
def assign(self) -> List[Assignment]:
assignments = []
proba = np.array(self.weights) / 100
for user, p in enumerate(proba):
count = int(self.dataset_size * p)
examples = random.sample(range(self.dataset_size), count)
assignments.extend([Assignment(user=user, example=example) for example in examples])
return assignments
Loading…
Cancel
Save