|
|
@ -1,5 +1,6 @@ |
|
|
|
import abc |
|
|
|
import dataclasses |
|
|
|
import random |
|
|
|
from typing import List |
|
|
|
|
|
|
|
import numpy as np |
|
|
@ -17,7 +18,7 @@ class BaseStrategy(abc.ABC): |
|
|
|
... |
|
|
|
|
|
|
|
|
|
|
|
class WeightedRandomStrategy: |
|
|
|
class WeightedRandomStrategy(BaseStrategy): |
|
|
|
def __init__(self, dataset_size: int, weights: List[int]): |
|
|
|
assert sum(weights) == 100 |
|
|
|
self.dataset_size = dataset_size |
|
|
@ -27,3 +28,19 @@ class WeightedRandomStrategy: |
|
|
|
proba = np.array(self.weights) / 100 |
|
|
|
assignees = np.random.choice(range(len(self.weights)), size=self.dataset_size, p=proba) |
|
|
|
return [Assignment(user=user, example=example) for example, user in enumerate(assignees)] |
|
|
|
|
|
|
|
|
|
|
|
class SamplingWithoutReplacementStrategy(BaseStrategy): |
|
|
|
def __init__(self, dataset_size: int, weights: List[int]): |
|
|
|
assert 0 <= sum(weights) <= 100 * len(weights) |
|
|
|
self.dataset_size = dataset_size |
|
|
|
self.weights = weights |
|
|
|
|
|
|
|
def assign(self) -> List[Assignment]: |
|
|
|
assignments = [] |
|
|
|
proba = np.array(self.weights) / 100 |
|
|
|
for user, p in enumerate(proba): |
|
|
|
count = int(self.dataset_size * p) |
|
|
|
examples = random.sample(range(self.dataset_size), count) |
|
|
|
assignments.extend([Assignment(user=user, example=example) for example in examples]) |
|
|
|
return assignments |