diff --git a/lib/MadNLPGPU/src/cudss.jl b/lib/MadNLPGPU/src/cudss.jl index 535fbae6..f5f6c12b 100644 --- a/lib/MadNLPGPU/src/cudss.jl +++ b/lib/MadNLPGPU/src/cudss.jl @@ -80,14 +80,13 @@ function MadNLP.factorize!(M::CUDSSSolver) # copyto!(M.full.nzVal, M.tril_to_full_view) CUDSS.cudss_set(M.inner.matrix, SparseArrays.nonzeros(M.tril)) CUDSS.cudss("factorization", M.inner, M.x_gpu, M.b_gpu) - synchronize(CUDABackend()) - return M end function MadNLP.solve!(M::CUDSSSolver{T}, x) where T CUDSS.cudss("solve", M.inner, M.x_gpu, x) + synchronize(CUDABackend()) copyto!(x, M.x_gpu) return x end diff --git a/lib/MadNLPGPU/src/interface.jl b/lib/MadNLPGPU/src/interface.jl index 038ee14b..ba238f78 100644 --- a/lib/MadNLPGPU/src/interface.jl +++ b/lib/MadNLPGPU/src/interface.jl @@ -13,9 +13,12 @@ function MadNLP.coo_to_csc(coo::MadNLP.SparseMatrixCOO{T,I,VT,VI}) where {T,I, V coord_csc = coord[@view(mapptr[1:end-1])] - if length(coord) > 0 + if length(coord_csc) > 0 _set_coo_to_colptr_kernel!(CUDABackend())(colptr, coord_csc, ndrange = length(coord_csc)) + else + fill!(colptr, one(Int)) end + rowval = map(x -> x[1][1], coord_csc) nzval = similar(rowval, T) @@ -23,7 +26,7 @@ function MadNLP.coo_to_csc(coo::MadNLP.SparseMatrixCOO{T,I,VT,VI}) where {T,I, V cscmap = similar(coo.I, Int) - if length(coord) > 0 + if length(mapptr) > 1 _set_coo_to_csc_map_kernel!(CUDABackend())(cscmap, mapptr, coord, ndrange = length(mapptr)-1) end @@ -73,16 +76,16 @@ function MadNLP.build_condensed_aug_coord!(kkt::MadNLP.AbstractCondensedKKTSyste fill!(kkt.aug_com.nzVal, zero(T)) if length(kkt.hptr) > 0 _transfer_kernel!(CUDABackend())(kkt.aug_com.nzVal, kkt.hptr, kkt.hess_com.nzVal; ndrange = length(kkt.hptr)) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) if length(kkt.dptr) > 0 _transfer_kernel!(CUDABackend())(kkt.aug_com.nzVal, kkt.dptr, kkt.pr_diag; ndrange = length(kkt.dptr)) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) if length(kkt.ext.jptrptr) > 1 # otherwise error is thrown _jtsj!(CUDABackend())(kkt.aug_com.nzVal, kkt.jptr, kkt.ext.jptrptr, kkt.jt_csc.nzVal, kkt.diag_buffer; ndrange = length(kkt.ext.jptrptr)-1) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) end @@ -92,12 +95,12 @@ function MadNLP.get_sparse_condensed_ext( ) where {T, VT <: CuVector{T}} hess_com_ptr = map((i,j)->(i,j), hess_map, 1:length(hess_map)) - if length(hess_map) > 0 # otherwise error is thrown + if length(hess_com_ptr) > 0 # otherwise error is thrown sort!(hess_com_ptr) end jt_csc_ptr = map((i,j)->(i,j), jt_map, 1:length(jt_map)) - if length(jt_map) > 0 # otherwise error is thrown + if length(jt_csc_ptr) > 0 # otherwise error is thrown sort!(jt_csc_ptr) end @@ -143,12 +146,14 @@ function MadNLP.mul!( MadNLP.mul!(wx, kkt.hess_com , xx, alpha, beta) MadNLP.mul!(wx, kkt.hess_com', xx, alpha, one(T)) MadNLP.mul!(wx, kkt.jt_csc, xz, alpha, beta) - diag_operation(CUDABackend())( - wx, kkt.hess_com.nzVal, xx, alpha, - kkt.ext.diag_map_to, - kkt.ext.diag_map_fr; - ndrange = length(kkt.ext.diag_map_to) - ) + if !isempty(kkt.ext.diag_map_to) + diag_operation(CUDABackend())( + wx, kkt.hess_com.nzVal, xx, alpha, + kkt.ext.diag_map_to, + kkt.ext.diag_map_fr; + ndrange = length(kkt.ext.diag_map_to) + ) + end synchronize(CUDABackend()) MadNLP.mul!(wz, kkt.jt_csc', xx, alpha, one(T)) @@ -171,12 +176,14 @@ function MadNLP.mul_hess_blk!( MadNLP.mul!(wxx, kkt.hess_com , tx, one(T), zero(T)) MadNLP.mul!(wxx, kkt.hess_com', tx, one(T), one(T)) - diag_operation(CUDABackend())( - wxx, kkt.hess_com.nzVal, tx, one(T), - kkt.ext.diag_map_to, - kkt.ext.diag_map_fr; - ndrange = length(kkt.ext.diag_map_to) - ) + if !isempty(kkt.ext.diag_map_to) + diag_operation(CUDABackend())( + wxx, kkt.hess_com.nzVal, tx, one(T), + kkt.ext.diag_map_to, + kkt.ext.diag_map_fr; + ndrange = length(kkt.ext.diag_map_to) + ) + end synchronize(CUDABackend()) fill!(@view(wx[n+1:end]), 0) @@ -187,10 +194,19 @@ end function get_diagonal_mapping(colptr, rowval) nnz = length(rowval) + if nnz == 0 + return similar(colptr, 0), similar(colptr, 0) + end inds1 = findall(map((x,y)-> ((x <= nnz) && (x != y)), @view(colptr[1:end-1]), @view(colptr[2:end]))) + if length(inds1) == 0 + return similar(rows, 0), similar(ptrs, 0) + end ptrs = colptr[inds1] rows = rowval[ptrs] inds2 = findall(inds1 .== rows) + if length(inds2) == 0 + return similar(rows, 0), similar(ptrs, 0) + end return rows[inds2], ptrs[inds2] end @@ -223,12 +239,15 @@ function MadNLP.compress_jacobian!(kkt::MadNLP.SparseCondensedKKTSystem{T, VT, M end function MadNLP._set_con_scale_sparse!(con_scale::VT, jac_I, jac_buffer) where {T, VT <: CuVector{T}} - if length(jac_I) > 0 - inds = sort!(map((i,j)->(i,j), jac_I, 1:length(jac_I))) - ptr = MadNLP.getptr(inds; by = ((x1,x2),(y1,y2))->x1 != y1) + inds = map((i,j)->(i,j), jac_I, 1:length(jac_I)) + if !isempty(inds) + sort!(inds) + end + ptr = MadNLP.getptr(inds; by = ((x1,x2),(y1,y2))->x1 != y1) + if length(ptr) > 1 _set_con_scale_sparse_kernel!(CUDABackend())(con_scale, ptr, inds, jac_I, jac_buffer; ndrange=length(ptr)-1) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) end function MadNLP._sym_length(Jt::CUSPARSE.CuSparseMatrixCSC) @@ -243,14 +262,15 @@ function MadNLP._sym_length(Jt::CUSPARSE.CuSparseMatrixCSC) ) end + function MadNLP._build_condensed_aug_symbolic_hess(H::CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti} if size(H,2) > 0 _build_condensed_aug_symbolic_hess_kernel!(CUDABackend())( sym, sym2, H.colPtr, H.rowVal; ndrange = size(H,2) ) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) end function MadNLP._build_condensed_aug_symbolic_jt(Jt::CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti} @@ -258,8 +278,8 @@ function MadNLP._build_condensed_aug_symbolic_jt(Jt::CUSPARSE.CuSparseMatrixCSC{ _offsets = map((i,j) -> div((j-i)^2 + (j-i), 2), @view(Jt.colPtr[1:end-1]) , @view(Jt.colPtr[2:end])) offsets = cumsum(_offsets) _build_condensed_aug_symbolic_jt_kernel!(CUDABackend())(sym, sym2, Jt.colPtr, Jt.rowVal, offsets; ndrange = size(Jt,2)) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) end function MadNLP._first_and_last_col(sym2::CuVector,ptr2) @@ -273,17 +293,15 @@ end MadNLP.nzval(H::CUSPARSE.CuSparseMatrixCSC) = H.nzVal function MadNLP._set_colptr!(colptr::CuVector, ptr2, sym2, guide) - if length(ptr2) == 1 # otherwise error is thrown - return + if length(ptr2) > 1 # otherwise error is thrown + _set_colptr_kernel!(CUDABackend())( + colptr, + sym2, + ptr2, + guide; + ndrange = length(ptr2)-1 + ) end - - _set_colptr_kernel!(CUDABackend())( - colptr, - sym2, - ptr2, - guide; - ndrange = length(ptr2)-1 - ) synchronize(CUDABackend()) return end @@ -312,7 +330,9 @@ end function MadNLP.force_lower_triangular!(I::CuVector{T},J) where T - _force_lower_triangular!(CUDABackend())(I,J; ndrange=length(I)) + if !isempty(I) + _force_lower_triangular!(CUDABackend())(I,J; ndrange=length(I)) + end synchronize(CUDABackend()) end diff --git a/lib/MadNLPGPU/src/kernels.jl b/lib/MadNLPGPU/src/kernels.jl index 306abb41..fcb9e602 100644 --- a/lib/MadNLPGPU/src/kernels.jl +++ b/lib/MadNLPGPU/src/kernels.jl @@ -166,8 +166,8 @@ function MadNLP._set_diag!(A::CuMatrix, inds, a) A, inds, a; ndrange = length(inds) ) - synchronize(CUDABackend()) end + synchronize(CUDABackend()) end @kernel function _set_diag_kernel!( diff --git a/lib/MadNLPGPU/src/lapackgpu.jl b/lib/MadNLPGPU/src/lapackgpu.jl index dcb0546c..5ca8e506 100644 --- a/lib/MadNLPGPU/src/lapackgpu.jl +++ b/lib/MadNLPGPU/src/lapackgpu.jl @@ -221,6 +221,7 @@ function _copyto!(y, x::CUSPARSE.CuSparseMatrixCSC{T}) where T n = size(y,2) fill!(y, zero(T)) kernel_copyto!(CUDABackend())(y, x.colPtr, x.rowVal, x.nzVal, ndrange=n) + synchronize(CUDABackend()) end @kernel function kernel_copyto!(y, @Const(colptr), @Const(rowval), @Const(nzval)) col = @index(Global) diff --git a/lib/MadNLPGPU/test/madnlpgpu_test.jl b/lib/MadNLPGPU/test/madnlpgpu_test.jl index b6643503..3ebbd06e 100644 --- a/lib/MadNLPGPU/test/madnlpgpu_test.jl +++ b/lib/MadNLPGPU/test/madnlpgpu_test.jl @@ -84,7 +84,7 @@ testset = [ lapack_algorithm=MadNLP.CHOLESKY, print_level=MadNLP.ERROR ), - ["infeasible", "lootsma", "eigmina"], # KKT system not PD + ["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"], # KKT system not PD ], ] diff --git a/lib/MadNLPTests/src/MadNLPTests.jl b/lib/MadNLPTests/src/MadNLPTests.jl index 5062cd24..7bf26e43 100644 --- a/lib/MadNLPTests/src/MadNLPTests.jl +++ b/lib/MadNLPTests/src/MadNLPTests.jl @@ -111,7 +111,7 @@ end function test_madnlp(name,optimizer_constructor::Function,exclude; Arr = Array) @testset "$name" begin - for f in [infeasible,unbounded,lootsma,eigmina] + for f in [infeasible,unbounded,lootsma,eigmina,lp_examodels_issue75] !(string(f) in exclude) && f(optimizer_constructor; Arr = Arr) end end @@ -331,6 +331,27 @@ function eigmina(optimizer_constructor::Function; Arr = Array) end end +function lp_examodels_issue75(optimizer_constructor::Function; Arr = Array) + @testset "lp_examodels_issue75" begin + + m = Model() + @variable(m, x >= 0) + @variable(m, 0 <= y <= 3) + @NLobjective(m, Min, 12x + 20y) + @NLconstraint(m, c1, 6x + 8y >= 100) + @NLconstraint(m, c2, 7x + 12y >= 120) + + nlp = SparseWrapperModel( + Arr, + NLPModelsJuMP.MathOptNLPModel(m) + ) + optimizer = optimizer_constructor() + result = MadNLP.madnlp(nlp; optimizer.options...) + + @test result.status == MadNLP.SOLVE_SUCCEEDED + end +end + include("Instances/dummy_qp.jl") include("Instances/hs15.jl") include("Instances/nls.jl") diff --git a/src/KKT/sparse.jl b/src/KKT/sparse.jl index d85101ba..655f2c41 100644 --- a/src/KKT/sparse.jl +++ b/src/KKT/sparse.jl @@ -622,7 +622,6 @@ nzval(H) = H.nzval similar(nzval(H), Ti, size(H,1)+1), one(Tv) ) - rowval = Ti[] n = size(H,2) @@ -637,7 +636,6 @@ nzval(H) = H.nzval 1:size(H,2) ) - _build_condensed_aug_symbolic_hess( H, @view(sym[n+1:n+nnz(H)]), diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl index 2ed6e760..979e5dc6 100644 --- a/test/madnlp_test.jl +++ b/test/madnlp_test.jl @@ -40,7 +40,7 @@ testset = [ linear_solver=MadNLP.LapackCPUSolver, lapack_algorithm=MadNLP.CHOLESKY, print_level=MadNLP.ERROR), - ["infeasible", "lootsma", "eigmina"] + ["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"] ], [ "Option: RELAX_BOUND",