-
-
Notifications
You must be signed in to change notification settings - Fork 50.4k
Expand file tree
/
Copy pathreservoir_sampling.py
More file actions
37 lines (28 loc) · 832 Bytes
/
reservoir_sampling.py
File metadata and controls
37 lines (28 loc) · 832 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import random
from typing import List, Iterator, TypeVar
T = TypeVar("T")
def reservoir_sampling(stream: Iterator[T], k: int) -> List[T]:
"""
Randomly select k items from a stream of unknown length using reservoir sampling.
Args:
stream (Iterator[T]): Input data stream.
k (int): Number of items to sample.
Returns:
List[T]: List of k randomly sampled items.
Examples:
>>> stream = iter(range(1, 11))
>>> len(reservoir_sampling(stream, 5))
5
"""
reservoir: List[T] = []
for i, item in enumerate(stream):
if i < k:
reservoir.append(item)
else:
j = random.randint(0, i)
if j < k:
reservoir[j] = item
return reservoir
if __name__ == "__main__":
import doctest
doctest.testmod()