-
Notifications
You must be signed in to change notification settings - Fork 1
/
pNormDiffusion.jl
168 lines (146 loc) · 5.22 KB
/
pNormDiffusion.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
using Random
include("struct.jl")
include("utils.jl")
"""
Computes node embeddings from p-norm flow diffusion on an undirected unweighted
graph G. Extentions to weighted graphs are straightforward.
Inputs:
G - Adjacency list representation of graph.
Node indices must start from 1 and end with n, where n is the
number of nodes in G.
seedset - Dictionary specifying seed node(s) and seed mass as
(key, value) pairs.
p - Specifies the p-norm in primal p-norm flow objective.
mu - Smoothing parameter that smoothes the p-norm objective.
Setting mu=0 (default) suffices for almost all practical
purposes.
max_iters - Maximum number of passes for Random Permutation Coordinate
Minimization. A single pass goes over all nodes that violate
KKT conditions.
epsilon - Tolerance on the maximum excess mass on nodes (which is
equivalent to the maximum primal infeasibility).
Diffusion process terminates whenever the excess mass is no
greater than epsilon on all nodes.
cm_tol - Tolerance in the approximate coordinate minimization step
(required only for p > 2).
Approximate coordinate minimization in this diffusion setting
is equivalent to performing inexact line-seach on coordinate
descent stepsizes.
Returns:
x - Node embeddings.
Applying sweepcut on x produces the final output cluster.
"""
function pnormdiffusion(G::AdjacencyList, seedset::Dict{Int,T} where T<:Real;
p::Real, mu::Real=0, max_iters::Int=50, epsilon::Float64=1.0e-3,
cm_tol::Float64=1.0e-2)
for (seed_node, seed_mass) in seedset
if seed_mass > 0 && G.degree[seed_node] == 0
error("Positive initial mass on node that has degree 0.")
end
end
mass = zeros(Float64, G.nv)
for (v,m) in seedset
mass[v] = Float64(m)
end
x = zeros(Float64, G.nv)
if p == 2
l2opt!(x, mass, G.adjlist, G.degree, max_iters, epsilon)
elseif p > 2
lpopt!(x, mass, G.adjlist, G.degree, max_iters, epsilon, p, mu, cm_tol)
else
error("p should be >= 2.")
end
return x
end
function l2opt!(x::Vector{Float64}, mass::Vector{Float64},
adjlist::Vector{Vector{Int}}, degree::Vector{Int}, max_iters::Int,
epsilon::Float64)
S = findall(!iszero, mass)
for _ in 1:max_iters
T = [v for v in S if mass[v] > degree[v] + epsilon]
if isempty(T)
break
end
for v in shuffle!(T)
push = (mass[v] - degree[v])/degree[v]
x[v] += push
mass[v] = degree[v]
for u in adjlist[v]
if mass[u] == 0
push!(S, u)
end
mass[u] += push
end
end
end
end
function lpopt!(x::Vector{Float64}, mass::Vector{Float64},
adjlist::Vector{Vector{Int}}, degree::Vector{Int}, max_iters::Int,
epsilon::Float64, p::Real, mu::Real, cm_tol::Float64)
q = p/(p - 1)
mass0 = copy(mass)
S = findall(!iszero, mass)
for _ in 1:max_iters
T = [v for v in S if mass[v] > degree[v] + epsilon]
if isempty(T)
break
end
for v in shuffle!(T)
x_v_prev = x[v]
push_node_v!(x, mass, v, adjlist, degree, mass0, q, mu, cm_tol)
for u in adjlist[v]
if mass[u] == 0
push!(S, u)
end
update_mass_u!(x, mass, u, v, x_v_prev, q, mu)
end
end
end
end
"""
Pushes out the excess mass on node v to its neighbors.
This is done by simply increasing x[v], the incumbent node embedding for node v.
"""
function push_node_v!(x::Vector{Float64}, mass::Vector{Float64}, v::Int,
adjlist::Vector{Vector{Int}}, degree::Vector{Int},
mass0::Vector{Float64}, q::Float64, mu::Real, cm_tol::Float64)
L = x[v]
U = L + 1
while compute_mass_v(x, v, U, adjlist, mass0, q, mu) > degree[v]
L = U
U *= 2
end
tol = max(cm_tol, 2*eps(U));
while abs(U - L) > tol
M = (L + U)/2
if compute_mass_v(x, v, M, adjlist, mass0, q, mu) > degree[v]
L = M
else
U = M
end
end
x[v] = (L + U)/2
mass[v] = compute_mass_v(x, v, x[v], adjlist, mass0, q, mu)
end
function update_mass_u!(x::Vector{Float64}, mass::Vector{Float64}, u::Int,
v::Int, x_v_prev::Float64, q::Float64, mu::Real)
mass[u] += flow_uv(x[u], x_v_prev, q, mu) - flow_uv(x[u], x[v], q, mu)
end
"""
Computes the net mass on node v (i.e. initial mass plus all incoming flows).
"""
function compute_mass_v(x, v, x_v, adjlist, mass0, q, mu)
mass_v = mass0[v]
for u in adjlist[v]
mass_v += flow_uv(x[u], x_v, q, mu)
end
return mass_v
end
"""
Computes the amount of flow from node u to node v, given node embeddings x_u for
node u and x_v for node v.
"""
function flow_uv(x_u, x_v, q, mu)
return mu > 0 ? ((x_u - x_v)^2 + mu^2)^(q/2 - 1) * (x_u - x_v) :
abs(x_u - x_v)^(q - 1) * sign(x_u - x_v)
end