-
Notifications
You must be signed in to change notification settings - Fork 0
/
storage.py
166 lines (129 loc) · 4.95 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import math
import distribution as distr
import matplotlib.pyplot as plt
# Length of a Findex block.
L_b = 16
# Length of a Findex UID.
L_uid = 32
# AEAD encryption overhead.
L_enc = 28
# Size of the `K_wi`.
L_kwi = 16
# Size of a keyword hash.
L_hash = 32
def compute_actual_chain_table_storage(indexed_values, B):
"""
Computes the Chain Table storage for the given volumes and `B` parameter.
The `indexed_values` parameter lists the sizes of all value stored in all chains.
"""
line_size = L_uid + L_enc + math.ceil(B/8) + B * (1 + L_b)
n_lines = 0
for v_i in indexed_values:
n_blocks = 0
for v_ij in v_i:
n_blocks += math.ceil(v_ij/L_b)
n_lines += math.ceil(n_blocks/B)
return line_size * n_lines
def compute_ideal_chain_table_storage(indexed_values):
"""
Computes the ideal Chain Table storage given the indexed values.
The `indexed_values` parameter lists the sizes of all value stored in all chains.
"""
return sum([L_uid + L_enc + sum([v_ij for v_ij in v_i]) for v_i in indexed_values])
def plot_padding_cost(n, max_B):
"""
Plots the normalized padding for the given `n` and different indexed
volumes `V`. The repartition of the indexed volume among the indexed
keywords follows the Zipf distribution. Each indexed value fits in a single
block.
"""
zipf = distr.zipf(n)
x = [i + 1 for i in range(max_B)]
legend = []
for i in range(4):
legend.append(f"V = {10 ** i} * n")
# Indexed volume.
V = 10 ** i * n
# Each indexed value fits in a single block.
volumes = [[L_b for _ in range(math.ceil(V * f))] for f in zipf]
size_opt = compute_ideal_chain_table_storage(volumes)
y = []
for B in range(1, max_B + 1):
size_B = compute_actual_chain_table_storage(volumes, B)
y.append((size_B - size_opt) / size_B)
plt.plot(x, y)
plt.title(
f"Portion of the Chain Table used for padding given B for indexed values following a Zipf distribution (n = {n})")
plt.ylabel("padding")
plt.xlabel("B")
plt.legend(legend)
plt.show()
def get_first_secure_keyword(distr, V, B, s):
"""
Computes the indice of the first keyword in the distribution that has been
drawn the same number of times as (s-1) others.
"""
i = 0
for (i, f) in enumerate(distr[:len(distr) - (s - 1)]):
if math.ceil(V * f / B) == math.ceil(V * distr[i + (s - 1)] / B):
break
return i
def plot_storage(distr, V, B, s):
"""
Plots the given distribution and its padded version. Normalizes them for
comparison purpose.
Ars:
- `V` (int) : number of indexed values
- `B` (int) : padding number
- `distr` (List(float)) : distribution
"""
# Padded volume per keyword.
line_size = L_uid + L_enc + math.ceil(B/8) + B * (1 + L_b)
padded_storage = [line_size * math.ceil(V * f / B) for f in distr]
k_safe = get_first_secure_keyword(distr, V, B, s) + 1
plt.vlines(k_safe, 0, 1)
x = [(i + 1) for i in range(len(distr))]
plt.plot(x, [f / distr[0] for f in distr], "x")
plt.plot(x, [v_wi / padded_storage[0] for v_wi in padded_storage], "x")
plt.title(
f"Comparison between theoretical and padded Chain Table storages (B={B}, V={V})")
plt.ylabel(f"normalized storage")
plt.xlabel(f"keyword")
plt.legend([f"first secure keyword (k={k_safe}, s={s})",
"theoretical storage", f"padded storage (B={B}, V={V})"])
plt.show()
def plot_first_secure_index_given_B(distr, V, s):
"""
Plots the evolution of the first safe keyword given B.
"""
B_range = [B for B in range(1, 101)]
k_safe_B = [get_first_secure_keyword(distr, V, B, s) for B in B_range]
plt.title(f"Secure portion of the index given B (s={s}, n={n}, V={V})")
plt.ylabel(f"secure portion of the index ((n - k)/n)")
plt.xlabel(f"B")
plt.plot(B_range, [(len(distr) - y) / len(distr) for y in k_safe_B], "x")
plt.show()
def plot_first_secure_index_given_V(distr, B, s):
"""
Plots the evolution of the first safe keyword given V.
"""
V_range = [len(distr) * i for i in range(1, 1001)]
k_safe_V = [get_first_secure_keyword(distr, V, B, s) for V in V_range]
plt.title(f"Secure portion of the index given V (s={s}, n={n}, B={B})")
plt.ylabel(f"secure portion of the index ((n - k)/n)")
plt.xlabel(f"V/n")
plt.plot([x / len(distr) for x in V_range],
[(len(distr) - y) / len(distr) for y in k_safe_V],
"x")
plt.show()
if __name__ == "__main__":
# Number of keywords indexed.
n = 1_000
# The number of values indexed per keyword follows the Zipf distribution.
zipf = distr.zipf(n)
plot_storage(zipf, 10 * n, 6, 2)
plot_first_secure_index_given_B(zipf, 10 * n, 2)
plot_first_secure_index_given_V(zipf, 6, 2)
# Maximum value for B
max_B = 100
plot_padding_cost(n, max_B)