forked from splatlab/iceberghashtable
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_cdf.py
54 lines (37 loc) · 1.1 KB
/
compute_cdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import sys
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt(sys.argv[1])
# Choose how many bins you want here
num_bins = 100
mn = min(data)
mx = max(data)
print(np.percentile(data,50))
print(np.percentile(data,95))
print(np.percentile(data,99))
print(np.percentile(data,99.9))
print(np.percentile(data,99.99))
"""
# Use the histogram function to bin the data
counts, bin_edges = np.histogram(data, bins=np.logspace(np.log10(mn),np.log10(mx), num_bins))
# Now find the cdf
cdf = np.cumsum(counts/sum(counts))
zipped = zip(bin_edges[1:], cdf)
l=list(zipped)
np.savetxt(sys.argv[1] + '_cdf.csv', l, fmt='%f %f', header='Latency_nsec CDF')
# plt.plot(cdf)
# plt.rcParams['axes.formatter.useoffset'] = False
# And finally plot the cdf
# axes=plt.gca()
# axes.set_xlabel('Data')
# axes.set_ylabel('Probability')
# np.round(bin_edges, decimals=2)
# axes.set_xticklabels(bin_edges)
# plt.xticks(rotation = 15)
# plt.savefig(sys.argv[1] + '_cdf.png', dpi=300)
# for scatter plot
# x=np.arange(0,data.size)
# plt.scatter(x,data)
# plt.savefig(sys.argv[1] + '_scatter.png', dpi=300)
# plt.show()
"""