-
Notifications
You must be signed in to change notification settings - Fork 0
/
distribution.py
42 lines (31 loc) · 1.06 KB
/
distribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import random
def zipf(n):
"""
Computes the theoretical Zipf distribution with `n` elements.
"""
C_n = sum([1/(n + 1) for n in range(n)])
zipf = [1/((i + 1) * C_n) for i in range(n)]
# The distribution is already normalized.
return zipf
def sample_distr(distr, N):
"""
Draws N samples of data following the given distribution. Creates the
sampled distribution.
**Note**: a distribution is the list of the normalized probabilities for
each element in the universe.
"""
# Build the cumulative distribution.
cumulative_distr = [distr[0]]
for i in range(1, len(distr)):
cumulative_distr.append(distr[i] + cumulative_distr[i-1])
# Randomly draw `N` samples from the Zipf.
data = [0 for _ in cumulative_distr]
for _ in range(N):
x = random.random()
for (i, cumulative_freq) in enumerate(cumulative_distr):
if x <= cumulative_freq:
data[i] += 1
break
# Normalize the sampled distribution.
res = [f/N for f in data]
return res