Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve kkt creation on GPUs #299

Merged
merged 44 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e4c6273
add wrapper to cuDSS solver
frapac Dec 6, 2023
a7b5298
initial implementation
frapac Jan 5, 2024
e6b9438
debug HybridCondensedKKTSystem
frapac Jan 9, 2024
aa724a2
undocumented cholesky solver added
sshin23 Jan 12, 2024
f139290
Merge remote-tracking branch 'origin/master' into ss/undocumented_cho…
sshin23 Jan 12, 2024
8a97f98
add support for GPU
frapac Jan 12, 2024
bd804e2
deactive iterative refinement for HybridKKTSystem
frapac Jan 16, 2024
1050bfe
Merge remote-tracking branch 'origin/fp/cudss' into fp/hybrid
frapac Jan 18, 2024
35460e5
update CUDSS interface
frapac Jan 18, 2024
69929fa
clean testing scripts
frapac Jan 18, 2024
486dc01
Merge remote-tracking branch 'origin/ss/undocumented_cholesky' into f…
frapac Jan 24, 2024
8bf2cf8
implement AMD ordering for CUDSS
frapac Jan 24, 2024
cf76e23
CUDSS: add inertia for LDL factorization
frapac Jan 26, 2024
8ebd4b2
remove permutation in cuDSS
frapac Feb 5, 2024
d4eb2e3
cudss: add safeguard for inertia
frapac Feb 5, 2024
5ac1dbb
Fix MOI interface
frapac Feb 13, 2024
730922d
metis
sshin23 Feb 29, 2024
f8bf4ed
metis
sshin23 Feb 29, 2024
4ee36bf
merged cudss
sshin23 Feb 29, 2024
c47ea79
option fixes
sshin23 Mar 1, 2024
1a59f01
1e-8 updates
sshin23 Mar 1, 2024
908a3c3
option fix
sshin23 Mar 1, 2024
ea9efeb
add wrapper to CHOLMOD
frapac Mar 1, 2024
ff3f088
Merge branch 'fp/cholmod' into fp/hybrid
sshin23 Mar 1, 2024
e6b4c81
added cholmod and ldl
sshin23 Mar 1, 2024
f9611b8
option issue fixed
sshin23 Mar 1, 2024
9426066
option fix
sshin23 Mar 1, 2024
485163f
cudss test changed
sshin23 Mar 1, 2024
91986dc
addressed Francois' commeonts
sshin23 Mar 2, 2024
7a66bf6
addressed Alexis' comments
sshin23 Mar 2, 2024
770de36
renamed options cudss
sshin23 Mar 2, 2024
c207e1f
added a few comments
sshin23 Mar 2, 2024
acb3d48
improved initialization
sshin23 Mar 3, 2024
6854eee
buf fixes
sshin23 Mar 4, 2024
7ad3d21
minor edits
sshin23 Mar 4, 2024
a792a87
merged with master
sshin23 Mar 4, 2024
eb8f023
found a bug in scaling
sshin23 Mar 4, 2024
6e0e8cd
scaling improved
sshin23 Mar 4, 2024
0158c19
sort error fix
sshin23 Mar 5, 2024
c3a795b
coo to csc bug fix
sshin23 Mar 5, 2024
96b62a8
addressed Francois' comments
sshin23 Mar 5, 2024
be5f77b
addressed Francois' comments
sshin23 Mar 5, 2024
ffad56f
name chanage and bug fix
sshin23 Mar 6, 2024
2058da4
bug fix
sshin23 Mar 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion lib/MadNLPGPU/src/cusolverrf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ function MadNLP.factorize!(M::CuCholeskySolver)
CUSOLVER.spcholesky_factorise(M.inner, M.fullp, eltype(M.fullp.nzVal) == Float32 ? 1e-6 : 1e-12)
M.singularity = false
catch e
println(e)
M.singularity = true
end

Expand Down
146 changes: 125 additions & 21 deletions lib/MadNLPGPU/src/interface.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,86 @@
function MadNLP.coo_to_csc(coo::MadNLP.SparseMatrixCOO{T,I,VT,VI}) where {T,I, VT <: CuArray, VI <: CuArray}
csc, map = MadNLP.coo_to_csc(
MadNLP.SparseMatrixCOO(
coo.m, coo.n,
Array(coo.I), Array(coo.J), Array(coo.V)
)
coord = map(
(i,j,k)->((i,j),k),
coo.I, coo.J, 1:length(coo.I)
)
if length(coord) > 0
sort!(coord, lt = (((i, j), k), ((n, m), l)) -> (j,i) < (m,n))
end

mapptr = getptr(CUDABackend(), coord)

colptr = similar(coo.I, size(coo,2)+1)
sshin23 marked this conversation as resolved.
Show resolved Hide resolved

coord_csc = coord[@view(mapptr[1:end-1])]

ker_colptr!(CUDABackend())(colptr, coord_csc, ndrange = length(coord_csc))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
rowval = map(x -> x[1][1], coord_csc)
nzval = similar(rowval, T)

csc = CUDA.CUSPARSE.CuSparseMatrixCSC(colptr, rowval, nzval, size(coo))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved


cscmap = similar(coo.I, Int)
ker_index(CUDABackend())(cscmap, mapptr, coord, ndrange = length(mapptr)-1)

synchronize(CUDABackend())

return csc, cscmap

end

return CUDA.CUSPARSE.CuSparseMatrixCSC(csc), CuArray(map)
@kernel function ker_colptr!(colptr, @Const(coord))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
index = @index(Global)

@inbounds begin
if index == 1
((i2,j2),k2) = coord[index]
colptr[1:j2] .= 1
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
else
((i1,j1),k1) = coord[index-1]
((i2,j2),k2) = coord[index]
if j1 != j2
colptr[j1+1:j2] .= index
end
if index == length(coord)
colptr[j2+1:end] .= index+1
end
end
end
end

@kernel function ker_index(cscmap, @Const(mapptr),@Const(coord))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
index = @index(Global)
@inbounds for l in mapptr[index]:mapptr[index+1]-1
((i,j),k) = coord[l]
cscmap[k] = index
end
end

function getptr(backend, array; cmp = isequal)

bitarray = similar(array, Bool, length(array) + 1)
kergetptr(backend)(cmp, bitarray, array; ndrange = length(array) + 1)
synchronize(backend)

return findall(identity, bitarray)
end
@kernel function kergetptr(@Const(cmp), bitarray, @Const(array))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
I = @index(Global)
@inbounds if I == 1
bitarray[I] = true
elseif I == length(array) + 1
bitarray[I] = true
else
i0, j0 = array[I-1]
i1, j1 = array[I]

if !cmp(i0, i1)
bitarray[I] = true
else
bitarray[I] = false
end
end
end

function CUSPARSE.CuSparseMatrixCSC{Tv,Ti}(A::MadNLP.SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
Expand Down Expand Up @@ -222,13 +296,27 @@ end
end
end

@kernel function ker_set_con_scale_sparse!(@Const(ptr), @Const(inds), con_scale, @Const(jac_I), @Const(jac_buffer))
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
index = @index(Global)

@inbounds begin
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
rng = ptr[index]:ptr[index+1]-1

for k in rng
(row, i) = inds[k]
con_scale[row] = max(con_scale[row], abs(jac_buffer[i]))
end
end
end
function MadNLP._set_con_scale_sparse!(con_scale::VT, jac_I, jac_buffer) where {T, VT <: CuVector{T}}
con_scale_cpu = Array(con_scale)
MadNLP._set_con_scale_sparse!(con_scale_cpu, Array(jac_I), Array(jac_buffer))
copyto!(con_scale, con_scale_cpu)
if length(jac_I) > 0
inds = sort!(map((i,j)->(i,j), jac_I, 1:length(jac_I)))
ptr = getptr(CUDABackend(), inds)
ker_set_con_scale_sparse!(CUDABackend())(ptr, inds, con_scale, jac_I, jac_buffer; ndrange=length(ptr)-1)
synchronize(CUDABackend())
end
end


function MadNLP._sym_length(Jt::CUDA.CUSPARSE.CuSparseMatrixCSC)
return mapreduce(
(x,y) -> begin
Expand Down Expand Up @@ -258,17 +346,33 @@ end
end
end

@kernel function ker_build_condensed_aug_symbolic_jt(@Const(colptr), @Const(rowval), @Const(offsets), sym, sym2)
sshin23 marked this conversation as resolved.
Show resolved Hide resolved
i = @index(Global)
@inbounds begin
cnt = if i==1
0
else
offsets[i-1]
end
for j in colptr[i]:colptr[i+1]-1
c1 = rowval[j]
for k in j:colptr[i+1]-1
c2 = rowval[k]
cnt += 1
sym[cnt] = (i,j,k)
sym2[cnt] = (c2,c1)
end
end
end
end

function MadNLP._build_condensed_aug_symbolic_jt(Jt::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti}
sym_cpu = Array(sym)
sym2_cpu = Array(sym2)
MadNLP._build_condensed_aug_symbolic_jt(
MadNLP.SparseMatrixCSC(Jt),
sym_cpu,
sym2_cpu
)

copyto!(sym, sym_cpu)
copyto!(sym2, sym2_cpu)
_offsets = map((i,j) -> div((j-i)^2 + (j-i), 2), @view(Jt.colPtr[1:end-1]) , @view(Jt.colPtr[2:end]))
offsets = cumsum(_offsets)

ker_build_condensed_aug_symbolic_jt(CUDABackend())(Jt.colPtr, Jt.rowVal, offsets, sym, sym2; ndrange = size(Jt,2))
synchronize(CUDABackend())
end

function MadNLP._first_and_last_col(sym2::CuVector,ptr2)
Expand Down Expand Up @@ -305,9 +409,9 @@ end

(~, prevcol) = sym2[i-1]
(row, col) = sym2[i]

g = guide[i]
for j in prevcol+1:col
colptr[j] = guide[i]
colptr[j] = g
end
end
end
Expand Down
1 change: 1 addition & 0 deletions src/IPM/solver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
if (solver.cnt.k!=0 && !solver.opt.jacobian_constant)
eval_jac_wrapper!(solver, solver.kkt, solver.x)
end

jtprod!(solver.jacl, solver.kkt, solver.y)
sd = get_sd(solver.y,solver.zl_r,solver.zu_r,T(solver.opt.s_max))
sc = get_sc(solver.zl_r,solver.zu_r,T(solver.opt.s_max))
Expand Down
7 changes: 2 additions & 5 deletions src/KKT/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -508,18 +508,15 @@ function create_kkt_system(
jac_sparsity_I,
jac,
)

jt_csc, jt_csc_map = coo_to_csc(jt_coo)
hess_com, hess_csc_map = coo_to_csc(hess_raw)

aug_com, dptr, hptr, jptr = build_condensed_aug_symbolic(
hess_com,
jt_csc
)

)
_linear_solver = linear_solver(aug_com; opt = opt_linear_solver)

ext = get_sparse_condensed_ext(VT, hess_com, jptr, jt_csc_map, hess_csc_map)

return SparseCondensedKKTSystem(
hess, hess_raw, hess_com, hess_csc_map,
jac, jt_coo, jt_csc, jt_csc_map,
Expand Down
Loading