Skip to content

Commit

Permalink
[MadNLPGPU] Bug fix for empty Hessian (MadNLP#326)
Browse files Browse the repository at this point in the history
* fix for empty hessian/jacobian

* changed everything to @nl

* lp fix

* lp excluded from cholesky

* changed lp to lp_examodels_issue75

* added a few empty check

* synchronize bug fix

* empty case handling improved

* get_diagonal improved

* interface empty case handling improved

* another attempt

* always check synchronize out of the empty check

* several more changes

* debugging

* debugging

* undo unncessary changes
  • Loading branch information
sshin23 authored Apr 26, 2024
1 parent 3fbe691 commit 6c76830
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 44 deletions.
3 changes: 1 addition & 2 deletions lib/MadNLPGPU/src/cudss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,13 @@ function MadNLP.factorize!(M::CUDSSSolver)
# copyto!(M.full.nzVal, M.tril_to_full_view)
CUDSS.cudss_set(M.inner.matrix, SparseArrays.nonzeros(M.tril))
CUDSS.cudss("factorization", M.inner, M.x_gpu, M.b_gpu)

synchronize(CUDABackend())

return M
end

function MadNLP.solve!(M::CUDSSSolver{T}, x) where T
CUDSS.cudss("solve", M.inner, M.x_gpu, x)
synchronize(CUDABackend())
copyto!(x, M.x_gpu)
return x
end
Expand Down
92 changes: 56 additions & 36 deletions lib/MadNLPGPU/src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ function MadNLP.coo_to_csc(coo::MadNLP.SparseMatrixCOO{T,I,VT,VI}) where {T,I, V

coord_csc = coord[@view(mapptr[1:end-1])]

if length(coord) > 0
if length(coord_csc) > 0
_set_coo_to_colptr_kernel!(CUDABackend())(colptr, coord_csc, ndrange = length(coord_csc))
else
fill!(colptr, one(Int))
end

rowval = map(x -> x[1][1], coord_csc)
nzval = similar(rowval, T)

csc = CUSPARSE.CuSparseMatrixCSC(colptr, rowval, nzval, size(coo))


cscmap = similar(coo.I, Int)
if length(coord) > 0
if length(mapptr) > 1
_set_coo_to_csc_map_kernel!(CUDABackend())(cscmap, mapptr, coord, ndrange = length(mapptr)-1)
end

Expand Down Expand Up @@ -73,16 +76,16 @@ function MadNLP.build_condensed_aug_coord!(kkt::MadNLP.AbstractCondensedKKTSyste
fill!(kkt.aug_com.nzVal, zero(T))
if length(kkt.hptr) > 0
_transfer_kernel!(CUDABackend())(kkt.aug_com.nzVal, kkt.hptr, kkt.hess_com.nzVal; ndrange = length(kkt.hptr))
synchronize(CUDABackend())
end
synchronize(CUDABackend())
if length(kkt.dptr) > 0
_transfer_kernel!(CUDABackend())(kkt.aug_com.nzVal, kkt.dptr, kkt.pr_diag; ndrange = length(kkt.dptr))
synchronize(CUDABackend())
end
synchronize(CUDABackend())
if length(kkt.ext.jptrptr) > 1 # otherwise error is thrown
_jtsj!(CUDABackend())(kkt.aug_com.nzVal, kkt.jptr, kkt.ext.jptrptr, kkt.jt_csc.nzVal, kkt.diag_buffer; ndrange = length(kkt.ext.jptrptr)-1)
synchronize(CUDABackend())
end
synchronize(CUDABackend())
end


Expand All @@ -92,12 +95,12 @@ function MadNLP.get_sparse_condensed_ext(
) where {T, VT <: CuVector{T}}

hess_com_ptr = map((i,j)->(i,j), hess_map, 1:length(hess_map))
if length(hess_map) > 0 # otherwise error is thrown
if length(hess_com_ptr) > 0 # otherwise error is thrown
sort!(hess_com_ptr)
end

jt_csc_ptr = map((i,j)->(i,j), jt_map, 1:length(jt_map))
if length(jt_map) > 0 # otherwise error is thrown
if length(jt_csc_ptr) > 0 # otherwise error is thrown
sort!(jt_csc_ptr)
end

Expand Down Expand Up @@ -143,12 +146,14 @@ function MadNLP.mul!(
MadNLP.mul!(wx, kkt.hess_com , xx, alpha, beta)
MadNLP.mul!(wx, kkt.hess_com', xx, alpha, one(T))
MadNLP.mul!(wx, kkt.jt_csc, xz, alpha, beta)
diag_operation(CUDABackend())(
wx, kkt.hess_com.nzVal, xx, alpha,
kkt.ext.diag_map_to,
kkt.ext.diag_map_fr;
ndrange = length(kkt.ext.diag_map_to)
)
if !isempty(kkt.ext.diag_map_to)
diag_operation(CUDABackend())(
wx, kkt.hess_com.nzVal, xx, alpha,
kkt.ext.diag_map_to,
kkt.ext.diag_map_fr;
ndrange = length(kkt.ext.diag_map_to)
)
end
synchronize(CUDABackend())

MadNLP.mul!(wz, kkt.jt_csc', xx, alpha, one(T))
Expand All @@ -171,12 +176,14 @@ function MadNLP.mul_hess_blk!(

MadNLP.mul!(wxx, kkt.hess_com , tx, one(T), zero(T))
MadNLP.mul!(wxx, kkt.hess_com', tx, one(T), one(T))
diag_operation(CUDABackend())(
wxx, kkt.hess_com.nzVal, tx, one(T),
kkt.ext.diag_map_to,
kkt.ext.diag_map_fr;
ndrange = length(kkt.ext.diag_map_to)
)
if !isempty(kkt.ext.diag_map_to)
diag_operation(CUDABackend())(
wxx, kkt.hess_com.nzVal, tx, one(T),
kkt.ext.diag_map_to,
kkt.ext.diag_map_fr;
ndrange = length(kkt.ext.diag_map_to)
)
end
synchronize(CUDABackend())

fill!(@view(wx[n+1:end]), 0)
Expand All @@ -187,10 +194,19 @@ end
function get_diagonal_mapping(colptr, rowval)

nnz = length(rowval)
if nnz == 0
return similar(colptr, 0), similar(colptr, 0)
end
inds1 = findall(map((x,y)-> ((x <= nnz) && (x != y)), @view(colptr[1:end-1]), @view(colptr[2:end])))
if length(inds1) == 0
return similar(rows, 0), similar(ptrs, 0)
end
ptrs = colptr[inds1]
rows = rowval[ptrs]
inds2 = findall(inds1 .== rows)
if length(inds2) == 0
return similar(rows, 0), similar(ptrs, 0)
end

return rows[inds2], ptrs[inds2]
end
Expand Down Expand Up @@ -223,12 +239,15 @@ function MadNLP.compress_jacobian!(kkt::MadNLP.SparseCondensedKKTSystem{T, VT, M
end

function MadNLP._set_con_scale_sparse!(con_scale::VT, jac_I, jac_buffer) where {T, VT <: CuVector{T}}
if length(jac_I) > 0
inds = sort!(map((i,j)->(i,j), jac_I, 1:length(jac_I)))
ptr = MadNLP.getptr(inds; by = ((x1,x2),(y1,y2))->x1 != y1)
inds = map((i,j)->(i,j), jac_I, 1:length(jac_I))
if !isempty(inds)
sort!(inds)
end
ptr = MadNLP.getptr(inds; by = ((x1,x2),(y1,y2))->x1 != y1)
if length(ptr) > 1
_set_con_scale_sparse_kernel!(CUDABackend())(con_scale, ptr, inds, jac_I, jac_buffer; ndrange=length(ptr)-1)
synchronize(CUDABackend())
end
synchronize(CUDABackend())
end

function MadNLP._sym_length(Jt::CUSPARSE.CuSparseMatrixCSC)
Expand All @@ -243,23 +262,24 @@ function MadNLP._sym_length(Jt::CUSPARSE.CuSparseMatrixCSC)
)
end


function MadNLP._build_condensed_aug_symbolic_hess(H::CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti}
if size(H,2) > 0
_build_condensed_aug_symbolic_hess_kernel!(CUDABackend())(
sym, sym2, H.colPtr, H.rowVal;
ndrange = size(H,2)
)
synchronize(CUDABackend())
end
synchronize(CUDABackend())
end

function MadNLP._build_condensed_aug_symbolic_jt(Jt::CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti}
if size(Jt,2) > 0
_offsets = map((i,j) -> div((j-i)^2 + (j-i), 2), @view(Jt.colPtr[1:end-1]) , @view(Jt.colPtr[2:end]))
offsets = cumsum(_offsets)
_build_condensed_aug_symbolic_jt_kernel!(CUDABackend())(sym, sym2, Jt.colPtr, Jt.rowVal, offsets; ndrange = size(Jt,2))
synchronize(CUDABackend())
end
synchronize(CUDABackend())
end

function MadNLP._first_and_last_col(sym2::CuVector,ptr2)
Expand All @@ -273,17 +293,15 @@ end
MadNLP.nzval(H::CUSPARSE.CuSparseMatrixCSC) = H.nzVal

function MadNLP._set_colptr!(colptr::CuVector, ptr2, sym2, guide)
if length(ptr2) == 1 # otherwise error is thrown
return
if length(ptr2) > 1 # otherwise error is thrown
_set_colptr_kernel!(CUDABackend())(
colptr,
sym2,
ptr2,
guide;
ndrange = length(ptr2)-1
)
end

_set_colptr_kernel!(CUDABackend())(
colptr,
sym2,
ptr2,
guide;
ndrange = length(ptr2)-1
)
synchronize(CUDABackend())
return
end
Expand Down Expand Up @@ -312,7 +330,9 @@ end


function MadNLP.force_lower_triangular!(I::CuVector{T},J) where T
_force_lower_triangular!(CUDABackend())(I,J; ndrange=length(I))
if !isempty(I)
_force_lower_triangular!(CUDABackend())(I,J; ndrange=length(I))
end
synchronize(CUDABackend())
end

Expand Down
2 changes: 1 addition & 1 deletion lib/MadNLPGPU/src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ function MadNLP._set_diag!(A::CuMatrix, inds, a)
A, inds, a;
ndrange = length(inds)
)
synchronize(CUDABackend())
end
synchronize(CUDABackend())
end

@kernel function _set_diag_kernel!(
Expand Down
1 change: 1 addition & 0 deletions lib/MadNLPGPU/src/lapackgpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ function _copyto!(y, x::CUSPARSE.CuSparseMatrixCSC{T}) where T
n = size(y,2)
fill!(y, zero(T))
kernel_copyto!(CUDABackend())(y, x.colPtr, x.rowVal, x.nzVal, ndrange=n)
synchronize(CUDABackend())
end
@kernel function kernel_copyto!(y, @Const(colptr), @Const(rowval), @Const(nzval))
col = @index(Global)
Expand Down
2 changes: 1 addition & 1 deletion lib/MadNLPGPU/test/madnlpgpu_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ testset = [
lapack_algorithm=MadNLP.CHOLESKY,
print_level=MadNLP.ERROR
),
["infeasible", "lootsma", "eigmina"], # KKT system not PD
["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"], # KKT system not PD
],
]

Expand Down
23 changes: 22 additions & 1 deletion lib/MadNLPTests/src/MadNLPTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ end

function test_madnlp(name,optimizer_constructor::Function,exclude; Arr = Array)
@testset "$name" begin
for f in [infeasible,unbounded,lootsma,eigmina]
for f in [infeasible,unbounded,lootsma,eigmina,lp_examodels_issue75]
!(string(f) in exclude) && f(optimizer_constructor; Arr = Arr)
end
end
Expand Down Expand Up @@ -331,6 +331,27 @@ function eigmina(optimizer_constructor::Function; Arr = Array)
end
end

function lp_examodels_issue75(optimizer_constructor::Function; Arr = Array)
@testset "lp_examodels_issue75" begin

m = Model()
@variable(m, x >= 0)
@variable(m, 0 <= y <= 3)
@NLobjective(m, Min, 12x + 20y)
@NLconstraint(m, c1, 6x + 8y >= 100)
@NLconstraint(m, c2, 7x + 12y >= 120)

nlp = SparseWrapperModel(
Arr,
NLPModelsJuMP.MathOptNLPModel(m)
)
optimizer = optimizer_constructor()
result = MadNLP.madnlp(nlp; optimizer.options...)

@test result.status == MadNLP.SOLVE_SUCCEEDED
end
end

include("Instances/dummy_qp.jl")
include("Instances/hs15.jl")
include("Instances/nls.jl")
Expand Down
2 changes: 0 additions & 2 deletions src/KKT/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,6 @@ nzval(H) = H.nzval
similar(nzval(H), Ti, size(H,1)+1),
one(Tv)
)
rowval = Ti[]

n = size(H,2)

Expand All @@ -637,7 +636,6 @@ nzval(H) = H.nzval
1:size(H,2)
)


_build_condensed_aug_symbolic_hess(
H,
@view(sym[n+1:n+nnz(H)]),
Expand Down
2 changes: 1 addition & 1 deletion test/madnlp_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ testset = [
linear_solver=MadNLP.LapackCPUSolver,
lapack_algorithm=MadNLP.CHOLESKY,
print_level=MadNLP.ERROR),
["infeasible", "lootsma", "eigmina"]
["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"]
],
[
"Option: RELAX_BOUND",
Expand Down

0 comments on commit 6c76830

Please sign in to comment.