From 2026064a89a5bfa567c981593dd661e206e76bc6 Mon Sep 17 00:00:00 2001 From: PetersBas <1.bas.peters@gmail.com> Date: Fri, 28 Jan 2022 12:54:40 -0800 Subject: [PATCH] improve parallel version, simplify some code, some memory reduction and speedup --- examples/projection_intersection_2D.jl | 32 +- examples/projection_intersection_3D.jl | 24 +- src/CDS_MVp.jl | 64 ++- src/CDS_MVp_MT.jl | 2 +- src/CDS_MVp_MT_subfunc.jl | 15 +- src/CDS_scaled_add!.jl | 4 +- src/PARSDMM.jl | 200 ++++----- src/PARSDMM_initialize.jl | 564 ++++++++++++------------- src/adapt_rho_gamma.jl | 210 ++++----- src/adapt_rho_gamma_parallel.jl | 190 ++++----- src/cg.jl | 16 +- src/compute_relative_feasibility.jl | 22 +- src/projectors/project_l1_Duchi!.jl | 10 +- src/rhs_compose.jl | 29 +- src/update_y_l.jl | 56 +-- src/update_y_l_parallel.jl | 49 +-- test/runtests.jl | 2 +- 17 files changed, 781 insertions(+), 708 deletions(-) diff --git a/examples/projection_intersection_2D.jl b/examples/projection_intersection_2D.jl index d47c47a..36119c4 100755 --- a/examples/projection_intersection_2D.jl +++ b/examples/projection_intersection_2D.jl @@ -36,14 +36,14 @@ elseif options.FL==Float32 end #load image to project -file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat")) -m = read(file, "Data");close(file) -m = m[1:341,200:600] -m = permutedims(m,[2,1]) +file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat")); +m = read(file, "Data");close(file); +m = m[1:341,200:600]; +m = permutedims(m,[2,1]); #set up computational grid (25 and 6 m are the original distances between grid points) comp_grid = compgrid((TF(25.0), TF(6.0)),(size(m,1), size(m,2))) -m = convert(Vector{TF},vec(m)) +m = convert(Vector{TF},vec(m)); #define axis limits and colorbar limits xmax = comp_grid.d[1]*comp_grid.n[1] @@ -55,13 +55,13 @@ vma = 4500 constraint = Vector{SetIntersectionProjection.set_definitions}() #bounds: -m_min = 1500.0 +m_min = 1480.0 m_max = 4500.0 set_type = "bounds" TD_OP = "identity" app_mode = ("matrix","") custom_TD_OP = ([],false) -push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) +push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); #slope constraints (vertical) m_min = 0.0 @@ -70,11 +70,11 @@ set_type = "bounds" TD_OP = "D_z" app_mode = ("matrix","") custom_TD_OP = ([],false) -push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) +push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); options.parallel = false -(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) -(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) +(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); +(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); println("") println("PARSDMM serial (bounds and bounds on D_z):") @@ -103,14 +103,17 @@ tight_layout() #tight_layout(pad=0.0, w_pad=0.0, h_pad=1.0) savefig("PARSDMM_logs.png",bbox_inches="tight") +#print timings in terminal +log_PARSDMM.timing + println("") println("PARSDMM parallel (bounds and bounds on D_z):") options.parallel = true -(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) -(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) +(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); +(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); @time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); -#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); +@time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); #@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); #plot @@ -119,6 +122,9 @@ subplot(2,1,1);imshow(permutedims(reshape(m,(comp_grid.n[1],comp_grid.n[2])),[2, subplot(2,1,2);imshow(permutedims(reshape(x2,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("Projection (bounds and bounds on D_z)") savefig("projected_model_ParallelPARSDMM.png",bbox_inches="tight") +#print timings in terminal +log_PARSDMM.timing + # #use multilevel-serial (2-levels) # options.parallel = false diff --git a/examples/projection_intersection_3D.jl b/examples/projection_intersection_3D.jl index bbfa714..8e10726 100644 --- a/examples/projection_intersection_3D.jl +++ b/examples/projection_intersection_3D.jl @@ -3,7 +3,7 @@ # Bas Peters, 2017 using Distributed -using LinearAlgebra +@everywhere using LinearAlgebra @everywhere using SetIntersectionProjection using HDF5 using PyPlot @@ -17,7 +17,7 @@ end if ~isfile("overthrust_3D_true_model.h5") run(`wget ftp://slim.gatech.edu/data/SoftwareRelease/WaveformInversion.jl/3DFWI/overthrust_3D_true_model.h5`) end -n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m") +n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m"); m .= 1000.0 ./ sqrt.(m); m = m[50:200,50:200,:]; @@ -36,7 +36,7 @@ options.Blas_active = true options.maxit = 500 set_zero_subnormals(true) -BLAS.set_num_threads(4) +@everywhere BLAS.set_num_threads(4) #select working precision if options.FL==Float64 @@ -68,7 +68,7 @@ set_type = "bounds" TD_OP = "identity" app_mode = ("matrix","") custom_TD_OP = ([],false) -push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) +push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); #slope constraints (vertical) m_min = 0.0 @@ -77,11 +77,11 @@ set_type = "bounds" TD_OP = "D_z" app_mode = ("matrix","") custom_TD_OP = ([],false) -push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) +push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); options.parallel = false -(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) -(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) +(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); +(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); println("") println("PARSDMM serial (bounds and bounds on D_z):") @@ -133,12 +133,12 @@ println("PARSDMM multilevel-serial (bounds and bounds on D_z):") println("") println("PARSDMM parallel (bounds and bounds on D_z):") options.parallel = true -(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) -(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) +(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); +(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); -@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options) -@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options) -@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options) +@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); +@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); +@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); #use multilevel-parallel (2-levels) options.parallel = true diff --git a/src/CDS_MVp.jl b/src/CDS_MVp.jl index 9299417..4f5c190 100755 --- a/src/CDS_MVp.jl +++ b/src/CDS_MVp.jl @@ -1,4 +1,4 @@ -export CDS_MVp +export CDS_MVp, CDS_MVp2, CDS_MVp3, CDS_MVp4 """ compute single-thread matrix vector product with vector x, output is vector y: y=A*x @@ -7,15 +7,15 @@ R is a tall matrix N by ndiagonals, corresponding to a square matrix A offsets indicate offset of diagonal compared to the main diagonal in A (which is 0) """ function CDS_MVp( - N::Integer, - ndiags::Integer, - R::Array{TF,2}, - offset::Vector{TI}, - x::Vector{TF}, - y::Vector{TF}) where {TF<:Real,TI<:Integer} + N ::Integer, + ndiags ::Integer, + R ::Array{TF,2}, + offset ::Vector{TI}, + x ::Vector{TF}, + y ::Vector{TF}) where {TF<:Real,TI<:Integer} for i = 1 : ndiags - d = offset[i] + d = offset[i] r0 = max(1, 1-d) r1 = min(N, N-d) c0 = max(1, 1+d) @@ -26,3 +26,51 @@ function CDS_MVp( end return y end + +# #the versions below is insignificantly faster than the original version above +# function CDS_MVp( +# N::Integer, +# ndiags::Integer, +# R::Array{TF,2}, +# offset::Vector{TI}, +# x::Vector{TF}, +# y::Vector{TF}) where {TF<:Real,TI<:Integer} + +# for i = 1 : ndiags +# d = offset[i] +# r0 = max(1, 1-d) +# r1 = min(N, N-d) +# c0 = max(1, 1+d) +# #r0_plus_c0 = r0 + c0 +# c = deepcopy(c0) +# for r = r0 : r1 +# @inbounds y[r] = y[r] + R[r,i] * x[c]#original +# c += 1 +# end +# end +# return y +# end + +# function CDS_MVp( +# N ::Integer, +# ndiags ::Integer, +# R ::Array{TF,2}, +# offset ::Vector{TI}, +# x ::Vector{TF}, +# y ::Vector{TF}) where {TF<:Real,TI<:Integer} + +# for i = 1 : ndiags +# d = offset[i] +# r0 = max(1, 1-d) +# r1 = min(N, N-d) +# c0 = max(1, 1+d) +# # for r = r0 : r1 +# # c = r - r0 + c0 #original +# # @inbounds y[r] = y[r] + R[r,i] * x[c]#original +# # end +# @inbounds y[r0 : r1] .= y[r0 : r1] .+ R[r0:r1,i].*x[r0 - r0 + c0:r1 - r0 + c0] +# end + +# return y +# end + diff --git a/src/CDS_MVp_MT.jl b/src/CDS_MVp_MT.jl index 654de77..53e4e35 100644 --- a/src/CDS_MVp_MT.jl +++ b/src/CDS_MVp_MT.jl @@ -15,7 +15,7 @@ function CDS_MVp_MT( y ::Vector{TF}) where {TF<:Real,TI<:Integer} for i = 1 : ndiags - d = offset[i] + d = offset[i] r0 = max(1, 1-d) r1 = min(N, N-d) c0 = max(1, 1+d) diff --git a/src/CDS_MVp_MT_subfunc.jl b/src/CDS_MVp_MT_subfunc.jl index 5b0cdc1..84da27a 100644 --- a/src/CDS_MVp_MT_subfunc.jl +++ b/src/CDS_MVp_MT_subfunc.jl @@ -4,15 +4,14 @@ wrapper around Julia threads to compute a multi-threaded matrix vector product i This is a subfunction of CDS_MVp_MT.jl """ function CDS_MVp_MT_subfunc( - R::Array{TF,2}, - x::Vector{TF}, - y::Vector{TF}, - r0::Int, - c0::Int, - r1::Int, - i::Int) where {TF<:Real} + R ::Array{TF,2}, + x ::Vector{TF}, + y ::Vector{TF}, + r0 ::Int, + c0 ::Int, + r1 ::Int, + i ::Int) where {TF<:Real} - #s=rind-1 @Threads.threads for r = r0 : r1 c = r - r0 + c0 #original @inbounds y[r] = y[r] + R[r,i] * x[c]#original diff --git a/src/CDS_scaled_add!.jl b/src/CDS_scaled_add!.jl index 70477ba..d0f60f6 100644 --- a/src/CDS_scaled_add!.jl +++ b/src/CDS_scaled_add!.jl @@ -3,7 +3,7 @@ export CDS_scaled_add! """ Computes A = A + alpha * B for A and B in the compressed diagonal storage format (CDS/DIA) - TO DO: make this function multi-threaded per column of A and B + TO DO: make this function multi-threaded per column of A and B, or use BLAS functions """ function CDS_scaled_add!( A ::Array{TF,2}, @@ -16,7 +16,7 @@ function CDS_scaled_add!( for k=1:length(B_offsets) A_update_col = findall((in)(B_offsets[k]),A_offsets) if isempty(A_update_col) == true - error("attempted to update a diagonal in A in CDS storage that does not excist. A and B need to have the same nonzero diagonals") + error("attempted to update a diagonal in A in CDS storage that does not exist. A and B need to have the same nonzero diagonals") end A[:,A_update_col] .= A[:,A_update_col] .+ alpha .* B[:,k]; end diff --git a/src/PARSDMM.jl b/src/PARSDMM.jl index d944f25..12d1a2e 100755 --- a/src/PARSDMM.jl +++ b/src/PARSDMM.jl @@ -95,57 +95,58 @@ x_solve_tol_ref = TF(1.0) #scalar required to determine tolerance for x-minimiza end #end initialization timer for i=1:maxit #main loop + #form right hand side for x-minimization @timeit to "form rhs for linear system" begin - rhs = rhs_compose(rhs,l,y,rho,TD_OP,p,Blas_active,parallel) + rhs = rhs_compose(rhs,l,y,rho,TD_OP,p,Blas_active,parallel) end #end timer for rhs # x-minimization @timeit to "argmin x" begin copy!(x_old,x); (x,iter,relres,x_solve_tol_ref) = argmin_x(Q,rhs,x,x_solve_tol_ref,i,log_PARSDMM,Q_offsets,Ax_out) - log_PARSDMM.cg_it[i] = iter;#cg_log.iters; - log_PARSDMM.cg_relres[i] = relres;#cg_log[:resnorm][end]; + log_PARSDMM.cg_it[i] = iter + log_PARSDMM.cg_relres[i] = relres end #end timer for argmin x # y-minimization & l-update @timeit to "argmin y and l update" begin if parallel==true - [ @spawnat pid update_y_l_parallel(x,i,Blas_active, - y[:L],y_old[:L],l[:L],l_old[:L],rho[:L],gamma[:L],prox[:L],TD_OP[:L],P_sub[:L], - x_hat[:L],r_pri[:L],s[:L],set_feas[:L],feasibility_only) for pid in y.pids] - - # [@spawnat pid update_y_l_parallel_exp(x,i,Blas_active, - # y[:L][1],y_old[:L][1],l[:L][1],l_old[:L][1],rho[:L][1],gamma[:L][1],prox[:L][1],TD_OP[:L][1],P_sub[:L], - # x_hat[:L][1],r_pri[:L][1],s[:L][1],set_feas[:L]) for pid in y.pids] - #logging distributed quantities - @sync for ii=1:p - @async log_PARSDMM.r_pri[i,ii]=@fetchfrom r_pri.pids[ii] norm(r_pri[:L][1]) - end - if mod(i,10)==0 #log every 10 it, or whatever number is suitable - for ii=1:pp - log_PARSDMM.set_feasibility[counter,ii]=@fetchfrom set_feas.pids[ii] set_feas[:L][1] - end - counter+=1 + + #using spawnat + [ @sync @spawnat pid update_y_l_parallel(x,i,Blas_active, + y[:L],y_old[:L],l[:L],l_old[:L],rho[:L],gamma[:L],prox[:L],TD_OP[:L],P_sub[:L], + x_hat[:L],r_pri[:L],s[:L],set_feas[:L],feasibility_only) for pid in y.pids] + + #logging distributed quantities + for ii=1:p + log_PARSDMM.r_pri[i,ii] = norm(r_pri[ii]) + #log_PARSDMM.r_dual[i,ii] = norm( rho[ii]*(TD_OP[ii]'*(y[ii]-y_old[ii])) ) + end + if mod(i,10)==0 #log every 10 it, or whatever number is suitable + for ii=1:pp + log_PARSDMM.set_feasibility[counter,ii] = set_feas[ii] end - else - (y,l,r_pri,s,log_PARSDMM,counter,y_old,l_old)=update_y_l(x,p,i,Blas_active,y,y_old,l,l_old,rho,gamma,prox,TD_OP,log_PARSDMM,P_sub,counter,x_hat,r_pri,s,feasibility_only); + counter+=1 + end + else #not distributed + (y,l,r_pri,s,log_PARSDMM,counter,y_old,l_old) = update_y_l(x,p,i,Blas_active,y,y_old,l,l_old,rho,gamma,prox,TD_OP,log_PARSDMM,P_sub,counter,x_hat,r_pri,s,feasibility_only); log_PARSDMM.r_dual_total[i] = sum(log_PARSDMM.r_dual[i,:]); end #some more logging - log_PARSDMM.r_pri_total[i] = sum(log_PARSDMM.r_pri[i,:]); + log_PARSDMM.r_pri_total[i] = sum(log_PARSDMM.r_pri[i,:]); if options.Minkowski == false - log_PARSDMM.obj[i] = TF(0.5) .* norm(x .- m)^2 + log_PARSDMM.obj[i] = TF(0.5) .* norm(x .- m)^2 else - log_PARSDMM.obj[i] = TF(0.5) .* norm(TD_OP[end]*x .- m)^2 + log_PARSDMM.obj[i] = TF(0.5) .* norm(TD_OP[end]*x .- m)^2 end - log_PARSDMM.evol_x[i] = norm(x_old .- x) ./ norm(x); - log_PARSDMM.rho[i,:] = rho; - log_PARSDMM.gamma[i,:] = gamma; + log_PARSDMM.evol_x[i] = norm(x_old .- x) ./ norm(x); + log_PARSDMM.rho[i,:] = rho; + log_PARSDMM.gamma[i,:] = gamma; - end #end timer for argmin y and l-update + end #end timer for argmin y and l-update # Stopping conditions @timeit to "stopping conditions check" begin @@ -160,7 +161,7 @@ for i=1:maxit #main loop # adjust penalty parameters rho and relaxation parameters gamma @timeit to "adjust rho and gamma" begin - if i==1 + if i==1 #only at the first iteration if parallel==true [@spawnat pid l_hat[:L][1] = l_old[:L][1] .+ rho[:L][1] .* ( -s[:L][1] .+ y_old[:L][1] ) for pid in l_hat.pids] [@spawnat pid copy!(l_hat_0[:L][1],l_hat[:L][1] ) for pid in l_hat.pids] @@ -178,76 +179,77 @@ for i=1:maxit #main loop end end - if (adjust_rho == true || adjust_gamma == true) && mod(i,rho_update_frequency)==TF(0) - if parallel==true - [ @spawnat pid adapt_rho_gamma_parallel(gamma[:L],rho[:L],adjust_gamma,adjust_rho,y[:L], - y_old[:L],s[:L],s_0[:L],l[:L],l_hat_0[:L],l_0[:L],l_old[:L],y_0[:L],l_hat[:L],d_l_hat[:L],d_H_hat[:L],d_l[:L],d_G_hat[:L]) for pid in y.pids] - #[ @spawnat pid adapt_rho_gamma_parallel_exp(gamma[:L][1],rho[:L][1],adjust_gamma,adjust_rho,y[:L][1], - #y_old[:L][1],s[:L][1],s_0[:L][1],l[:L][1],l_hat_0[:L][1],l_0[:L][1],l_old[:L][1],y_0[:L][1],l_hat[:L][1],d_l_hat[:L][1],d_H_hat[:L][1],d_l[:L][1],d_G_hat[:L][1]) for pid in y.pids] - else - (rho,gamma,l_hat,d_l_hat,d_H_hat,d_l,d_G_hat)=adapt_rho_gamma(i,gamma,rho,adjust_gamma,adjust_rho,y,y_old,s,s_0,l,l_hat_0,l_0,l_old,y_0,p,l_hat,d_l_hat,d_H_hat,d_l,d_G_hat); - end - if i>1 - if parallel==true - [@spawnat pid copy!(l_hat_0[:L][1],l_hat[:L][1] ) for pid in l_hat.pids] - [@spawnat pid copy!(y_0[:L][1] , y[:L][1] ) for pid in y.pids] - [@spawnat pid copy!(s_0[:L][1] , s[:L][1] ) for pid in s.pids] - [@spawnat pid copy!(l_0[:L][1] , l[:L][1] ) for pid in l.pids] - else - for ii=1:p - copy!(l_hat_0[ii],l_hat[ii]) - copy!(y_0[ii],y[ii]) - copy!(s_0[ii],s[ii]) - copy!(l_0[ii],l[ii]) - end - end #end if parallel - end - end #end adjust rho and gamma - - #adjust rho to set-feasibility estimates - if parallel==true - rho = convert(Vector{TF},rho); #gather rho - end - if adjust_feasibility_rho == true && mod(i,10)==TF(0) #&& norm(rho-log_PARSDMM.rho[i,:])<(10*eps(TF))#only update rho if it is not already updated - ## adjust rho feasibility - #if primal residual for a set is much larger than for the other sets - #and the feasibility error is also much larger, increase rho to lower - #primal residual and (hopefully) feasibility error - (max_set_feas,max_set_feas_ind) = findmax(log_PARSDMM.set_feasibility[counter-1,:]) - sort_feas = sort(log_PARSDMM.set_feasibility[counter-1,:]); - if i>10 - rho[max_set_feas_ind] = TF(2.0) .* rho[max_set_feas_ind] + if (adjust_rho == true || adjust_gamma == true) && mod(i,rho_update_frequency)==0#TF(0) + if parallel==true + #using spawnat + [@sync @spawnat pid adapt_rho_gamma_parallel(gamma[:L],rho[:L],adjust_gamma,adjust_rho,y[:L], + y_old[:L],s[:L],s_0[:L],l[:L],l_hat_0[:L],l_0[:L],l_old[:L],y_0[:L],l_hat[:L],d_l_hat[:L],d_H_hat[:L],d_l[:L],d_G_hat[:L]) for pid in y.pids] + + else + (rho,gamma,l_hat,d_l_hat,d_H_hat,d_l,d_G_hat) = adapt_rho_gamma(i,gamma,rho,adjust_gamma,adjust_rho,y,y_old,s,s_0,l,l_hat_0,l_0,l_old,y_0,p,l_hat,d_l_hat,d_H_hat,d_l,d_G_hat); + end + + if i>1 #after the first iteration + if parallel==true + [@spawnat pid copy!(l_hat_0[:L][1],l_hat[:L][1] ) for pid in l_hat.pids] + [@spawnat pid copy!(y_0[:L][1] , y[:L][1] ) for pid in y.pids] + [@spawnat pid copy!(s_0[:L][1] , s[:L][1] ) for pid in s.pids] + [@spawnat pid copy!(l_0[:L][1] , l[:L][1] ) for pid in l.pids] + else + for ii=1:p + copy!(l_hat_0[ii],l_hat[ii]) + copy!(y_0[ii],y[ii]) + copy!(s_0[ii],s[ii]) + copy!(l_0[ii],l[ii]) end - end #end adjust_feasibility_rho - - #enforce max and min values for rho, to prevent the condition number of Q -> inf - rho = max.(min.(rho,TF(1e4)),TF(1e-2)) #hardcoded bounds - end #end timer for rho and gamma update - - @timeit to "Q-update" begin - ind_updated = findall(rho .!= log_PARSDMM.rho[i,:]) # locate changed rho index - ind_updated = convert(Array{TI,1},ind_updated) - - #re-assemble total transform domain operator as a matrix - if isempty(findall((in)(ind_updated),p))==false - if parallel==true && feasibility_only==false - prox = convert(Vector{Any},prox); #gather rho - prox[p] = input -> prox_l2s!(input,rho[p],m) - prox = distribute(prox) - elseif feasibility_only==false - prox[p] = input -> prox_l2s!(input,rho[p],m) - end - end - Q = Q_update!(Q,AtA,set_Prop,rho,ind_updated,log_PARSDMM,i,Q_offsets) - if parallel==true - rho = distribute(rho) #distribute again (gather -> process -> distribute is a ugly hack, fix this later) - end - end #end Q-update timer - - if i==maxit - println("PARSDMM reached maxit") - (TD_OP,AtA,log_PARSDMM) = output_check_PARSDMM(x,TD_OP,AtA,log_PARSDMM,i,counter) - end + end #end if parallel + end + end #end adjust rho and gamma + + #adjust rho to set-feasibility estimates + if parallel==true + rho = convert(Vector{TF},rho); #gather rho + end + if adjust_feasibility_rho == true && mod(i,10)==TF(0) #&& norm(rho-log_PARSDMM.rho[i,:])<(10*eps(TF))#only update rho if it is not already updated + ## adjust rho feasibility + #if primal residual for a set is much larger than for the other sets + #and the feasibility error is also much larger, increase rho to lower + #primal residual and (hopefully) feasibility error + (max_set_feas,max_set_feas_ind) = findmax(log_PARSDMM.set_feasibility[counter-1,:]) + sort_feas = sort(log_PARSDMM.set_feasibility[counter-1,:]); + if i>10 + rho[max_set_feas_ind] = TF(2.0) .* rho[max_set_feas_ind] + end + end #end adjust_feasibility_rho + + #enforce max and min values for rho, to prevent the condition number of Q -> inf + rho = max.(min.(rho,TF(1e4)),TF(1e-2)) #hardcoded bounds + end #end timer for rho and gamma update + + @timeit to "Q-update" begin + ind_updated = findall(rho .!= log_PARSDMM.rho[i,:]) # locate changed rho index + ind_updated = convert(Array{TI,1},ind_updated) + + #re-assemble total transform domain operator as a matrix + if isempty(findall((in)(ind_updated),p))==false + if parallel==true && feasibility_only==false + prox = convert(Vector{Any},prox); #gather rho + prox[p] = input -> prox_l2s!(input,rho[p],m) + prox = distribute(prox) + elseif feasibility_only==false + prox[p] = input -> prox_l2s!(input,rho[p],m) + end + end + Q = Q_update!(Q,AtA,set_Prop,rho,ind_updated,log_PARSDMM,i,Q_offsets) + if parallel==true + rho = distribute(rho) #distribute again (gather -> process -> distribute is a ugly hack, fix this later) + end +end #end Q-update timer + + if i==maxit + println("PARSDMM reached maxit") + (TD_OP,AtA,log_PARSDMM) = output_check_PARSDMM(x,TD_OP,AtA,log_PARSDMM,i,counter) + end end #end main loop @@ -258,7 +260,7 @@ end #end funtion # output checks function output_check_PARSDMM(x,TD_OP,AtA,log_PARSDMM,i,counter) if isreal(x)==false - println("warning: Result of PARSDMM is not real") + @warn "Result of PARSDMM is not real" end log_PARSDMM.obj = log_PARSDMM.obj[1:i] log_PARSDMM.evol_x = log_PARSDMM.evol_x[1:i] diff --git a/src/PARSDMM_initialize.jl b/src/PARSDMM_initialize.jl index b2808f6..28efcc4 100755 --- a/src/PARSDMM_initialize.jl +++ b/src/PARSDMM_initialize.jl @@ -27,288 +27,288 @@ function PARSDMM_initialize( feasibility_only=false ::Bool ) where {TF<:Real,TI<:Integer} - ind_ref = maxit - if options.Minkowski == false - N = length(x) - elseif options.Minkowski == true && zero_ini_guess==true - N = length(x)*2 - elseif options.Minkowski == true && zero_ini_guess==false - @assert length(x)==(2*length(m)) - N = length(x) - end - # const N = size(TD_OP[1],2) #this line will give really weird errors later on for some reason.. - - - # if typeof(AtA[1])==SparseMatrixCSC{TF,TI} - # push!(AtA,speye(TF,N)); - # elseif typeof(AtA[1])==Array{TF,2} - # push!(AtA,ones(TF,N,1)) - # end - # push!(TD_OP,speye(TF,N)); - # - # push!(set_Prop.AtA_offsets,[0]) - # push!(set_Prop.banded,true) - # push!(set_Prop.AtA_diag,true) - # push!(set_Prop.dense,false) - - p = length(TD_OP); #number of terms in in the sum of functions of the projeciton problem - pp = p-1; - if feasibility_only==true; pp=p; end; - - rho = Vector{TF}(undef,p) - if length(rho_ini)==1 - fill!(rho,rho_ini[1]) - else - copy!(rho,rho_ini) - end - prox = copy(P_sub) - if feasibility_only==false - #define prox for all terms in the sum (projectors onto sets) - #add prox for the data fidelity term 0.5||m-x||_2^2 - m_orig = deepcopy(m) ::Vector{TF} - prox_data = input -> prox_l2s!(input,rho[end],m_orig) - push!(prox,prox_data); - end - if parallel==true - prox = distribute(prox) - P_sub = distribute(P_sub) - rho = distribute(rho) - end - - if parallel==true && nworkers()= 1 #at least one operator is a JOLI operator -> wrap all others as a JOLI matrix as well - Q_offsets = [] - if typeof(AtA[1]) <: joAbstractLinearOperator - Q = rho[1]*AtA[1] - else - Q = rho[1]*joMatrix(AtA[1]) - end - for i=2:p - if typeof(AtA[i]) <: joAbstractLinearOperator - Q = Q + rho[i]*AtA[i] - else - Q = Q + rho[i]*joMatrix(AtA[i]) - end - end - else #n_CDS_matrices == p should hold #all matrics in AtA are in compressed diagonal storage format - all_offsets=zeros(TI,999,99) - for i=1:length(AtA) #find all unique offset - all_offsets[1:length(set_Prop.AtA_offsets[i]),i]=set_Prop.AtA_offsets[i] - end - Q_offsets=convert(Vector{TI},unique(all_offsets)) - Q=zeros(TF,size(AtA[1],1),length(Q_offsets)) - for i=1:length(AtA) - for j=1:length(set_Prop.AtA_offsets[i]) - #Q_current_col = findin(Q_offsets,set_Prop.AtA_offsets[i][j]) - Q_current_col = findall((in)(set_Prop.AtA_offsets[i][j]),Q_offsets) - Q[:,Q_current_col] .= Q[:,Q_current_col] .+ rho[i] .* AtA[i][:,j] - end - end - end - - # log_PARSDMM = log_type_PARSDMM(zeros(maxit,pp),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),zeros(maxit), - # zeros(maxit),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),TF(0),TF(0),TF(0),TF(0),TF(0),TF(0),TF(0)); - log_PARSDMM = log_type_PARSDMM(zeros(maxit,pp),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),zeros(maxit), - zeros(maxit),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),TimerOutput()); - - log_PARSDMM.set_feasibility[1,:]=feasibility_initial - - if parallel==true - - gamma = distribute(gamma) - - #y = distribute(y) - y_0 = distribute(y_0) - y_old = distribute(y_old) - - #l = distribute(l) - l_0 = distribute(l_0) - l_old = distribute(l_old) - l_hat_0 = distribute(l_hat_0) - l_hat = distribute(l_hat) - - x_hat = distribute(x_hat) - s_0 = distribute(s_0) - s = distribute(s) - r_pri = distribute(r_pri) - - d_l_hat = distribute(d_l_hat) - d_H_hat = distribute(d_H_hat) - d_l = distribute(d_l) - d_G_hat = distribute(d_G_hat) - end - - #fill distributed vectors with zeros (from 1 entry to N entries, because this is faster than first fill and then dist. Should be able to do in one go ideally) - if parallel == true - [@spawnat pid y_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid y_old[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid l_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid l_old[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid l_hat_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid l_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid x_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid s_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid s[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid r_pri[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid d_l_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid d_H_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid d_l[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - [@spawnat pid d_G_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] - end - - #distribute y and l if they are not already distributed - if (parallel==true) && (typeof(l)<:DistributedArrays.DArray) == false - l=distribute(l) - end - if parallel==true && (typeof(y)<:DistributedArrays.DArray) == false - y=distribute(y) - end - if parallel==true - [ @spawnat pid adjust_gamma for pid in y.pids ] - [ @spawnat pid adjust_rho for pid in y.pids ] - [ @spawnat pid adjust_feasibility_rho for pid in y.pids ] - end - if parallel == true - [@spawnat pid comp_grid for pid in y.pids] - end - - if parallel==true - set_feas=zeros(TF,length(TD_OP)) #initialize a vector only required for parallel stuff - set_feas=distribute(set_feas) - else - set_feas=[] - end - - if zero_ini_guess==true - if parallel==false - for i=1:length(l); fill!(l[i],TF(0.0)); end - for i=1:length(y); fill!(y[i],TF(0.0)); end - else - [@spawnat pid fill!(l[:L][1],TF(0.0)) for pid in l.pids] - [@spawnat pid fill!(y[:L][1],TF(0.0)) for pid in y.pids] - end - fill!(x,TF(0.0)) - end + ind_ref = maxit + if options.Minkowski == false + N = length(x) + elseif options.Minkowski == true && zero_ini_guess==true + N = length(x)*2 + elseif options.Minkowski == true && zero_ini_guess==false + @assert length(x)==(2*length(m)) + N = length(x) + end + # const N = size(TD_OP[1],2) #this line will give really weird errors later on for some reason.. + + + # if typeof(AtA[1])==SparseMatrixCSC{TF,TI} + # push!(AtA,speye(TF,N)); + # elseif typeof(AtA[1])==Array{TF,2} + # push!(AtA,ones(TF,N,1)) + # end + # push!(TD_OP,speye(TF,N)); + # + # push!(set_Prop.AtA_offsets,[0]) + # push!(set_Prop.banded,true) + # push!(set_Prop.AtA_diag,true) + # push!(set_Prop.dense,false) + + p = length(TD_OP); #number of terms in in the sum of functions of the projeciton problem + pp = p-1; + if feasibility_only==true; pp=p; end; + + rho = Vector{TF}(undef,p) + if length(rho_ini)==1 + fill!(rho,rho_ini[1]) + else + copy!(rho,rho_ini) + end + prox = copy(P_sub) + if feasibility_only==false + #define prox for all terms in the sum (projectors onto sets) + #add prox for the data fidelity term 0.5||m-x||_2^2 + m_orig = deepcopy(m) ::Vector{TF} + prox_data = input -> prox_l2s!(input,rho[end],m_orig) + push!(prox,prox_data); + end + if parallel==true + prox = distribute(prox) + P_sub = distribute(P_sub) + rho = distribute(rho) + end + + if parallel==true && nworkers() compute_relative_feasibility(m,feasibility_initial,TD_OP,P_sub) + #[@spawnat pid f_compute_rel_feas for pid in 2:nworkers()] + #feasibility_initial = pmap(f_compute_rel_feas, feasibility_initial,TD_OP,P_sub; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) + feasibility_initial = pmap((feasibility_initial,TD_OP,P_sub) -> compute_relative_feasibility(m,feasibility_initial,TD_OP,P_sub) , feasibility_initial,TD_OP,P_sub; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) + else + for ii = 1:length(P_sub) + feasibility_initial[ii] = norm(P_sub[ii](TD_OP[ii]*m) .- TD_OP[ii]*m) ./ (norm(TD_OP[ii]*m)+(100*eps(TF))); + end + end + if maximum(feasibility_initial)= 1 #at least one operator is a JOLI operator -> wrap all others as a JOLI matrix as well + Q_offsets = [] + if typeof(AtA[1]) <: joAbstractLinearOperator + Q = rho[1]*AtA[1] + else + Q = rho[1]*joMatrix(AtA[1]) + end + for i=2:p + if typeof(AtA[i]) <: joAbstractLinearOperator + Q = Q + rho[i]*AtA[i] + else + Q = Q + rho[i]*joMatrix(AtA[i]) + end + end + else #n_CDS_matrices == p should hold #all matrics in AtA are in compressed diagonal storage format + all_offsets=zeros(TI,999,99) + for i=1:length(AtA) #find all unique offset + all_offsets[1:length(set_Prop.AtA_offsets[i]),i]=set_Prop.AtA_offsets[i] + end + Q_offsets=convert(Vector{TI},unique(all_offsets)) + Q=zeros(TF,size(AtA[1],1),length(Q_offsets)) + for i=1:length(AtA) + for j=1:length(set_Prop.AtA_offsets[i]) + #Q_current_col = findin(Q_offsets,set_Prop.AtA_offsets[i][j]) + Q_current_col = findall((in)(set_Prop.AtA_offsets[i][j]),Q_offsets) + Q[:,Q_current_col] .= Q[:,Q_current_col] .+ rho[i] .* AtA[i][:,j] + end + end + end + + #initialize logs + log_PARSDMM = log_type_PARSDMM(zeros(maxit,pp),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),zeros(maxit), + zeros(maxit),zeros(maxit,p),zeros(maxit,p),zeros(maxit),zeros(maxit),TimerOutput()); + + log_PARSDMM.set_feasibility[1,:] = feasibility_initial + + if parallel == true + + gamma = distribute(gamma) + + #y = distribute(y) + y_0 = distribute(y_0) + y_old = distribute(y_old) + + #l = distribute(l) + l_0 = distribute(l_0) + l_old = distribute(l_old) + l_hat_0 = distribute(l_hat_0) + l_hat = distribute(l_hat) + + x_hat = distribute(x_hat) + s_0 = distribute(s_0) + s = distribute(s) + r_pri = distribute(r_pri) + + d_l_hat = distribute(d_l_hat) + d_H_hat = distribute(d_H_hat) + d_l = distribute(d_l) + d_G_hat = distribute(d_G_hat) + end + + #fill distributed vectors with zeros (from 1 entry to N entries, because this is faster than first fill and then dist. Should be able to do in one go ideally) + if parallel == true + [@spawnat pid y_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid y_old[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid l_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid l_old[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid l_hat_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid l_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid x_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid s_0[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid s[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid r_pri[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid d_l_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid d_H_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid d_l[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + [@spawnat pid d_G_hat[:L][1] = zeros(TF,size(TD_OP[:L][1],1)) for pid in y_0.pids] + end + + #distribute y and l if they are not already distributed + if (parallel==true) && (typeof(l)<:DistributedArrays.DArray) == false + l = distribute(l) + end + if parallel==true && (typeof(y)<:DistributedArrays.DArray) == false + y = distribute(y) + end + if parallel==true + [ @spawnat pid adjust_gamma for pid in y.pids ] + [ @spawnat pid adjust_rho for pid in y.pids ] + [ @spawnat pid adjust_feasibility_rho for pid in y.pids ] + end + if parallel == true + [@spawnat pid comp_grid for pid in y.pids] + end + + if parallel == true + set_feas = zeros(TF,length(TD_OP)) #initialize a vector only required for parallel stuff + set_feas = distribute(set_feas) + else + set_feas = [] + end + + if zero_ini_guess == true + if parallel==false + for i=1:length(l); fill!(l[i],TF(0.0)); end + for i=1:length(y); fill!(y[i],TF(0.0)); end + else + [@spawnat pid fill!(l[:L][1],TF(0.0)) for pid in l.pids] + [@spawnat pid fill!(y[:L][1],TF(0.0)) for pid in y.pids] + end + fill!(x,TF(0.0)) + end + return ind_ref,N,TD_OP,AtA,p,rho_update_frequency,adjust_gamma,adjust_rho,adjust_feasibility_rho,gamma_ini,rho,gamma,y,y_0,y_old, l,l_0,l_old,l_hat_0,x_0,x_old,r_dual,rhs,s,s_0,Q,prox,log_PARSDMM,l_hat,x_hat,r_pri, d_l_hat,d_H_hat,d_l,d_G_hat,P_sub,Q_offsets,stop,feasibility_initial,set_feas,Ax_out diff --git a/src/adapt_rho_gamma.jl b/src/adapt_rho_gamma.jl index 5f31d3a..aa3cb8c 100644 --- a/src/adapt_rho_gamma.jl +++ b/src/adapt_rho_gamma.jl @@ -6,127 +6,127 @@ Updates relaxation and augmented-Lagrangian penalty parameter. To be used in serial version of PARSDMM. """ function adapt_rho_gamma( - i ::Integer, - gamma ::Vector{TF}, - rho ::Vector{TF}, - adjust_gamma ::Bool, - adjust_rho ::Bool, - y ::Vector{Vector{TF}}, - y_old ::Vector{Vector{TF}}, - s ::Vector{Vector{TF}}, - s_0 ::Vector{Vector{TF}}, - l ::Vector{Vector{TF}}, - l_hat_0 ::Vector{Vector{TF}}, - l_0 ::Vector{Vector{TF}}, - l_old ::Vector{Vector{TF}}, - y_0 ::Vector{Vector{TF}}, - p ::Integer, - l_hat ::Vector{Vector{TF}}, - d_l_hat ::Vector{Vector{TF}}, - d_H_hat ::Vector{Vector{TF}}, - d_l ::Vector{Vector{TF}}, - d_G_hat ::Vector{Vector{TF}} - ) where {TF<:Real} + i ::Integer, + gamma ::Vector{TF}, + rho ::Vector{TF}, + adjust_gamma ::Bool, + adjust_rho ::Bool, + y ::Vector{Vector{TF}}, + y_old ::Vector{Vector{TF}}, + s ::Vector{Vector{TF}}, + s_0 ::Vector{Vector{TF}}, + l ::Vector{Vector{TF}}, + l_hat_0 ::Vector{Vector{TF}}, + l_0 ::Vector{Vector{TF}}, + l_old ::Vector{Vector{TF}}, + y_0 ::Vector{Vector{TF}}, + p ::Integer, + l_hat ::Vector{Vector{TF}}, + d_l_hat ::Vector{Vector{TF}}, + d_H_hat ::Vector{Vector{TF}}, + d_l ::Vector{Vector{TF}}, + d_G_hat ::Vector{Vector{TF}} + ) where {TF<:Real} - if TF==Float64 - safeguard = 1e-10 - elseif TF==Float32 - safeguard = 1f-6 - end + if TF==Float64 + safeguard = 1e-10 + elseif TF==Float32 + safeguard = 1f-6 + end eps_correlation = TF(0.3) #hardcoded and suggested value by the paper based on numerical evidence #Threads.@threads for ii = 1:p for ii = 1:p - l_hat[ii] .= l_old[ii] .+ rho[ii] .* ( -s[ii] .+ y_old[ii] ) - d_l_hat[ii] .= l_hat[ii] .- l_hat_0[ii] - d_H_hat[ii] .= s[ii] .- s_0[ii] + l_hat[ii] .= l_old[ii] .+ rho[ii] .* ( -s[ii] .+ y_old[ii] ) + d_l_hat[ii] .= l_hat[ii] .- l_hat_0[ii] + d_H_hat[ii] .= s[ii] .- s_0[ii] - d_dHh_dlh = dot(d_H_hat[ii],d_l_hat[ii]) - n_d_H_hat = norm(d_H_hat[ii]) - n_d_l_hat = norm(d_l_hat[ii]) - d_l[ii] .= l[ii] .- l_0[ii] - n_d_l = norm(d_l[ii]) - d_G_hat[ii].= -(y[ii] .- y_0[ii]) - n_d_G_hat = norm(d_G_hat[ii]) - d_dGh_dl = dot(d_G_hat[ii],d_l[ii]) + d_dHh_dlh = dot(d_H_hat[ii],d_l_hat[ii]) + n_d_H_hat = norm(d_H_hat[ii]) + n_d_l_hat = norm(d_l_hat[ii]) + d_l[ii] .= l[ii] .- l_0[ii] + n_d_l = norm(d_l[ii]) + d_G_hat[ii].= -(y[ii] .- y_0[ii]) + n_d_G_hat = norm(d_G_hat[ii]) + d_dGh_dl = dot(d_G_hat[ii],d_l[ii]) - alpha_reliable = false; - if (n_d_H_hat*n_d_l_hat) > safeguard && (n_d_H_hat.^2) > safeguard && d_dHh_dlh>safeguard - alpha_reliable = true - alpha_correlation = d_dHh_dlh./( n_d_H_hat*n_d_l_hat ); - end + alpha_reliable = false; + if (n_d_H_hat*n_d_l_hat) > safeguard && (n_d_H_hat.^2) > safeguard && d_dHh_dlh>safeguard + alpha_reliable = true + alpha_correlation = d_dHh_dlh./( n_d_H_hat*n_d_l_hat ); + end - beta_reliable = false; - if (n_d_G_hat*n_d_l) > safeguard && (n_d_G_hat^2) > safeguard && d_dGh_dl > safeguard - beta_reliable = true - beta_correlation = d_dGh_dl ./ ( n_d_G_hat*n_d_l ); - end + beta_reliable = false; + if (n_d_G_hat*n_d_l) > safeguard && (n_d_G_hat^2) > safeguard && d_dGh_dl > safeguard + beta_reliable = true + beta_correlation = d_dGh_dl ./ ( n_d_G_hat*n_d_l ); + end - alpha_comp = false - if (alpha_reliable==true) && (alpha_correlation > eps_correlation) - alpha_comp = true - alpha_hat_MG = d_dHh_dlh ./ (n_d_H_hat.^2); - alpha_hat_SD = (n_d_l_hat^2) ./ d_dHh_dlh; - if (TF(2.0)*alpha_hat_MG) > alpha_hat_SD - alpha_hat = alpha_hat_MG; - else - alpha_hat = alpha_hat_SD - alpha_hat_MG/TF(2.0); - end + alpha_comp = false + if (alpha_reliable==true) && (alpha_correlation > eps_correlation) + alpha_comp = true + alpha_hat_MG = d_dHh_dlh ./ (n_d_H_hat.^2); + alpha_hat_SD = (n_d_l_hat^2) ./ d_dHh_dlh; + if (TF(2.0)*alpha_hat_MG) > alpha_hat_SD + alpha_hat = alpha_hat_MG; + else + alpha_hat = alpha_hat_SD - alpha_hat_MG/TF(2.0); end + end - beta_comp = false - if (beta_reliable==1) && (beta_correlation > eps_correlation) - beta_comp = true - beta_hat_MG = d_dGh_dl ./ (n_d_G_hat^2); - beta_hat_SD = (n_d_l^2) ./ d_dGh_dl; - if (TF(2.0)*beta_hat_MG) > beta_hat_SD - beta_hat = beta_hat_MG; - else - beta_hat = beta_hat_SD - beta_hat_MG/TF(2.0); - end + beta_comp = false + if (beta_reliable==1) && (beta_correlation > eps_correlation) + beta_comp = true + beta_hat_MG = d_dGh_dl ./ (n_d_G_hat^2); + beta_hat_SD = (n_d_l^2) ./ d_dGh_dl; + if (TF(2.0)*beta_hat_MG) > beta_hat_SD + beta_hat = beta_hat_MG; + else + beta_hat = beta_hat_SD - beta_hat_MG/TF(2.0); end + end - #update rho and or gamma - if adjust_rho == true && adjust_gamma == false - if alpha_comp == true && beta_comp == true - rho[ii] = sqrt(alpha_hat*beta_hat); - elseif alpha_comp == true && beta_comp == false - rho[ii] = alpha_hat; - elseif alpha_comp == false && beta_comp == true - rho[ii] = beta_hat; - else - #rho = rho; #do nothing - end - elseif adjust_rho == true && adjust_gamma == true - if alpha_comp == true && beta_comp == true - rho[ii] = sqrt(alpha_hat*beta_hat); - gamma[ii]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) ) ./ ( alpha_hat+beta_hat )); - elseif alpha_comp == true && beta_comp == false - rho[ii] = alpha_hat; - gamma[ii]=TF(1.9); - elseif alpha_comp == false && beta_comp == true - rho[ii] = beta_hat; - gamma[ii]=TF(1.1); - else - #rho = rho; #do nothing - gamma[ii]=TF(1.5); - end - elseif adjust_rho == false && adjust_gamma == true - if alpha_comp == true && beta_comp == true - gamma[ii]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) ) ./ ( alpha_hat+beta_hat )); - elseif alpha_comp == true && beta_comp == false - gamma[ii]=TF(1.9); - elseif alpha_comp == false && beta_comp == true - gamma[ii]=TF(1.1); - else - gamma[ii]=TF(1.5); - end - end #end compute new rho and gamma - end #end for loop + #update rho and or gamma + if adjust_rho == true && adjust_gamma == false + if alpha_comp == true && beta_comp == true + rho[ii] = sqrt(alpha_hat*beta_hat); + elseif alpha_comp == true && beta_comp == false + rho[ii] = alpha_hat; + elseif alpha_comp == false && beta_comp == true + rho[ii] = beta_hat; + else + #rho = rho; #do nothing + end + elseif adjust_rho == true && adjust_gamma == true + if alpha_comp == true && beta_comp == true + rho[ii] = sqrt(alpha_hat*beta_hat); + gamma[ii]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) ) ./ ( alpha_hat+beta_hat )); + elseif alpha_comp == true && beta_comp == false + rho[ii] = alpha_hat; + gamma[ii]=TF(1.9); + elseif alpha_comp == false && beta_comp == true + rho[ii] = beta_hat; + gamma[ii]=TF(1.1); + else + #rho = rho; #do nothing + gamma[ii]=TF(1.5); + end + elseif adjust_rho == false && adjust_gamma == true + if alpha_comp == true && beta_comp == true + gamma[ii]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) ) ./ ( alpha_hat+beta_hat )); + elseif alpha_comp == true && beta_comp == false + gamma[ii]=TF(1.9); + elseif alpha_comp == false && beta_comp == true + gamma[ii]=TF(1.1); + else + gamma[ii]=TF(1.5); + end + end #end compute new rho and gamma + end #end for loop -return rho,gamma,l_hat,d_l_hat,d_H_hat,d_l ,d_G_hat +return rho ,gamma, l_hat, d_l_hat, d_H_hat, d_l, d_G_hat end #function adapt_rho_gamma diff --git a/src/adapt_rho_gamma_parallel.jl b/src/adapt_rho_gamma_parallel.jl index f351435..b73cb99 100644 --- a/src/adapt_rho_gamma_parallel.jl +++ b/src/adapt_rho_gamma_parallel.jl @@ -26,104 +26,104 @@ function adapt_rho_gamma_parallel( d_G_hat ::Vector{Vector{TF}} ) where {TF<:Real} -eps_correlation = TF(0.3) #hardcoded and suggested value by the paper based on numerical evidence - - - if TF==Float64 - safeguard = 1e-10 - elseif TF==Float32 - safeguard = 1f-6 - end - - #standard loop-fusion - @. l_hat[1] = l_old[1] + rho[1]* ( -s[1] + y_old[1] ) - # @. d_l_hat[1] = l_hat[1] - l_hat_0[1] - # @. d_H_hat[1] = s[1] - s_0[1] - # @. d_l[1] = l[1] - l_0[1] - # @. d_G_hat[1] = -(y[1] - y_0[1]) - - #wrap multi-theading inside a function - a_is_b_min_c_MT!(d_l_hat[1],l_hat[1],l_hat_0[1]) - a_is_b_min_c_MT!(d_H_hat[1],s[1],s_0[1]) - a_is_b_min_c_MT!(d_l[1],l[1],l_0[1]) - a_is_b_min_c_MT!(d_G_hat[1],y_0[1],y[1]) - - - d_dHh_dlh = dot(d_H_hat[1],d_l_hat[1]) - d_dGh_dl = dot(d_G_hat[1],d_l[1]) - - n_d_H_hat = norm(d_H_hat[1]) - n_d_l_hat = norm(d_l_hat[1]) - n_d_l = norm(d_l[1]) - n_d_G_hat = norm(d_G_hat[1]) - - alpha_comp = false - #print(alpha_comp) - if (n_d_H_hat*n_d_l_hat) > safeguard && (n_d_H_hat.^2) > safeguard && d_dHh_dlh>safeguard - alpha_correlation = d_dHh_dlh./( n_d_H_hat*n_d_l_hat ) - if alpha_correlation > eps_correlation - alpha_comp = true - alpha_hat_MG = d_dHh_dlh./(n_d_H_hat.^2); - alpha_hat_SD = (n_d_l_hat^2)./d_dHh_dlh; - if (TF(2.0)*alpha_hat_MG) > alpha_hat_SD - alpha_hat = alpha_hat_MG; - else - alpha_hat = alpha_hat_SD - alpha_hat_MG/TF(2.0); - end - end - end - - beta_comp = false - if (n_d_G_hat*n_d_l) > safeguard && (n_d_G_hat^2) > safeguard && d_dGh_dl > safeguard - beta_correlation = d_dGh_dl./( n_d_G_hat*n_d_l ) - if beta_correlation > eps_correlation - beta_comp = true - beta_hat_MG = d_dGh_dl./(n_d_G_hat^2); - beta_hat_SD = (n_d_l^2)./d_dGh_dl; - if (TF(2.0)*beta_hat_MG) > beta_hat_SD - beta_hat = beta_hat_MG; - else - beta_hat = beta_hat_SD - beta_hat_MG/TF(2.0); - end - end + eps_correlation = TF(0.3) #hardcoded and suggested value by the paper based on numerical evidence + + + if TF==Float64 + safeguard = 1e-10 + elseif TF==Float32 + safeguard = 1f-6 + end + + #standard loop-fusion + @. l_hat[1] = l_old[1] + rho[1]* ( -s[1] + y_old[1] ) + # @. d_l_hat[1] = l_hat[1] - l_hat_0[1] + # @. d_H_hat[1] = s[1] - s_0[1] + # @. d_l[1] = l[1] - l_0[1] + # @. d_G_hat[1] = -(y[1] - y_0[1]) + + #wrap multi-theading inside a function + a_is_b_min_c_MT!(d_l_hat[1],l_hat[1],l_hat_0[1]) + a_is_b_min_c_MT!(d_H_hat[1],s[1],s_0[1]) + a_is_b_min_c_MT!(d_l[1],l[1],l_0[1]) + a_is_b_min_c_MT!(d_G_hat[1],y_0[1],y[1]) + + + d_dHh_dlh = dot(d_H_hat[1],d_l_hat[1]) + d_dGh_dl = dot(d_G_hat[1],d_l[1]) + + n_d_H_hat = norm(d_H_hat[1]) + n_d_l_hat = norm(d_l_hat[1]) + n_d_l = norm(d_l[1]) + n_d_G_hat = norm(d_G_hat[1]) + + alpha_comp = false + #print(alpha_comp) + if (n_d_H_hat*n_d_l_hat) > safeguard && (n_d_H_hat.^2) > safeguard && d_dHh_dlh>safeguard + alpha_correlation = d_dHh_dlh./( n_d_H_hat*n_d_l_hat ) + if alpha_correlation > eps_correlation + alpha_comp = true + alpha_hat_MG = d_dHh_dlh./(n_d_H_hat.^2); + alpha_hat_SD = (n_d_l_hat^2)./d_dHh_dlh; + if (TF(2.0)*alpha_hat_MG) > alpha_hat_SD + alpha_hat = alpha_hat_MG; + else + alpha_hat = alpha_hat_SD - alpha_hat_MG/TF(2.0); end - - #update rho and or gamma - if adjust_rho == true && adjust_gamma == true - if alpha_comp == true && beta_comp == true - rho[1] = sqrt(alpha_hat*beta_hat); - gamma[1]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) )./( alpha_hat+beta_hat )); - elseif alpha_comp == true && beta_comp == false - rho[1] = alpha_hat; - gamma[1]=TF(1.9); - elseif alpha_comp == false && beta_comp == true - rho[1] = beta_hat; - gamma[1]=TF(1.1); + end + end + + beta_comp = false + if (n_d_G_hat*n_d_l) > safeguard && (n_d_G_hat^2) > safeguard && d_dGh_dl > safeguard + beta_correlation = d_dGh_dl./( n_d_G_hat*n_d_l ) + if beta_correlation > eps_correlation + beta_comp = true + beta_hat_MG = d_dGh_dl./(n_d_G_hat^2); + beta_hat_SD = (n_d_l^2)./d_dGh_dl; + if (TF(2.0)*beta_hat_MG) > beta_hat_SD + beta_hat = beta_hat_MG; else - #rho = rho; #do nothing - gamma[1]=TF(1.5); + beta_hat = beta_hat_SD - beta_hat_MG/TF(2.0); end - elseif adjust_rho == true && adjust_gamma == false - if alpha_comp == true && beta_comp == true - rho[1] = sqrt(alpha_hat*beta_hat); - elseif alpha_comp == true && beta_comp == false - rho[1] = alpha_hat; - elseif alpha_comp == false && beta_comp == true - rho[1] = beta_hat; - else - #rho = rho; #do nothing - end - elseif adjust_rho == false && adjust_gamma == true - if alpha_comp == true && beta_comp == true - gamma[1]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) )./( alpha_hat+beta_hat )); - elseif alpha_comp == true && beta_comp == false - gamma[1]=TF(1.9); - elseif alpha_comp == false && beta_comp == true - gamma[1]=TF(1.1); - else - gamma[1]=TF(1.5); - end - end #end compute new rho and gamma + end + end + + #update rho and or gamma + if adjust_rho == true && adjust_gamma == true + if alpha_comp == true && beta_comp == true + rho[1] = sqrt(alpha_hat*beta_hat); + gamma[1]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) )./( alpha_hat+beta_hat )); + elseif alpha_comp == true && beta_comp == false + rho[1] = alpha_hat; + gamma[1]=TF(1.9); + elseif alpha_comp == false && beta_comp == true + rho[1] = beta_hat; + gamma[1]=TF(1.1); + else + #rho = rho; #do nothing + gamma[1]=TF(1.5); + end + elseif adjust_rho == true && adjust_gamma == false + if alpha_comp == true && beta_comp == true + rho[1] = sqrt(alpha_hat*beta_hat); + elseif alpha_comp == true && beta_comp == false + rho[1] = alpha_hat; + elseif alpha_comp == false && beta_comp == true + rho[1] = beta_hat; + else + #rho = rho; #do nothing + end + elseif adjust_rho == false && adjust_gamma == true + if alpha_comp == true && beta_comp == true + gamma[1]=TF(1.0)+(( TF(2.0)*sqrt(alpha_hat*beta_hat) )./( alpha_hat+beta_hat )); + elseif alpha_comp == true && beta_comp == false + gamma[1]=TF(1.9); + elseif alpha_comp == false && beta_comp == true + gamma[1]=TF(1.1); + else + gamma[1]=TF(1.5); + end + end #end compute new rho and gamma diff --git a/src/cg.jl b/src/cg.jl index ff0767c..eb92023 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -85,16 +85,20 @@ function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Fu Ap = A(p)::Vector{TF} gamma = dot(r,z) + #gamma = BLAS.dot(n, r,1,z,1)# alpha = gamma / dot(p,Ap) + #alpha = gamma / BLAS.dot(n, p,1,Ap,1)#dot(p,Ap) + if alpha==Inf || alpha<0 flag = -2; break end BLAS.axpy!(n,alpha,p,1,x,1) # x = alpha*p+x - if storeInterm; X[:,iter] = x; end + #if storeInterm; X[:,iter] = x; end BLAS.axpy!(n,-alpha,Ap,1,r,1) # r -= alpha*Ap - resvec[iter] = norm(r)/nr0 + #resvec[iter] = BLAS.nrm2(n, r, 1) / nr0# + resvec[iter] = norm(r)/nr0 if out==2 println(iter,resvec[iter]) end @@ -103,10 +107,12 @@ function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Fu end z = M(r)::Vector{TF} - beta = dot(z,r) / gamma + #beta = BLAS.dot(n, z,1,r,1) / gamma + beta = dot(z,r) / gamma # the following two lines are equivalent to p = z + beta*p - p = BLAS.scal!(n,beta,p,1)::Vector{TF} - p = BLAS.axpy!(n,TF(1.0),z,1,p,1)::Vector{TF} + #p = BLAS.scal!(n,beta,p,1)::Vector{TF} + #p = BLAS.axpy!(n,TF(1.0),z,1,p,1)::Vector{TF} + p = BLAS.axpby!(TF(1.0), z, beta, p)::Vector{TF} # Overwrite Y with X*a + Y*b, where a and b are scalars. Return Y end if out>=0 diff --git a/src/compute_relative_feasibility.jl b/src/compute_relative_feasibility.jl index 98101aa..45b8282 100644 --- a/src/compute_relative_feasibility.jl +++ b/src/compute_relative_feasibility.jl @@ -6,13 +6,19 @@ r_feas = ||P(A*x)-A*x||/||A*x|| Helper function that is intended for the parallel version only, where just a single operator is sent to a worker where this script runs """ -function compute_relative_feasibility(m::Vector{TF}, feasibility_initial::Vector{TF}, - TD_OP::Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}}, - P_sub) where {TF<:Real,TI<:Integer} +# function compute_relative_feasibility(m::Vector{TF}, feasibility_initial::Vector{TF}, +# TD_OP::Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}}, +# P_sub) where {TF<:Real,TI<:Integer} +function compute_relative_feasibility(m::Vector{TF},feasibility_initial::TF, + TD_OP::Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}, + P_sub) where {TF<:Real,TI<:Integer} - #the two lines below don't work in this case, because P_sub mutates the input - # pm = TD_OP[1]*m - # feasibility_initial[1]=norm(P_sub[1](pm) .- pm) ./ (norm(pm)+(100*eps(TF))) - feasibility_initial[1]=norm(P_sub[1](TD_OP[1]*m) .- TD_OP[1]*m) ./ (norm(TD_OP[1]*m)+(100*eps(TF))) -#gc() check if we need this in julia >0.7 + #note that P_sub mutates the input in-place + s_temp = TD_OP*m + norm(P_sub(deepcopy(s_temp)) .- s_temp) + # feasibility_initial[1]=norm(P_sub[1](pm) .- pm) ./ (norm(pm)+(100*eps(TF))) + feasibility_initial = norm(P_sub(deepcopy(s_temp)) .- s_temp) ./ (norm(s_temp)+(100*eps(TF))) + s_temp = [] + #gc()# check if we need this in julia >0.7 + return feasibility_initial end diff --git a/src/projectors/project_l1_Duchi!.jl b/src/projectors/project_l1_Duchi!.jl index 509a31c..13a385d 100644 --- a/src/projectors/project_l1_Duchi!.jl +++ b/src/projectors/project_l1_Duchi!.jl @@ -1,4 +1,4 @@ -export project_l1_Duchi!, sa_old, sa_new! +export project_l1_Duchi! """ project_l1_Duchi!(v::Union{Vector{TF},Vector{Complex{TF}}}, b::TF) @@ -29,7 +29,13 @@ function project_l1_Duchi!(v::Union{Vector{TF},Vector{Complex{TF}}}, b::TF) wher #use RadixSort for Float32 (short keywords) copyto!(u, v) u .= abs.(u) - sort!(u, rev=true, alg=RadixSort) + u = convert(Vector{TF},u) + if TF==Float32 + sort!(u, rev=true, alg=RadixSort) + else + u = sort!(u, rev=true, alg=QuickSort) + end + # if TF==Float32 # u = sort!(abs.(u), rev=true, alg=RadixSort) diff --git a/src/rhs_compose.jl b/src/rhs_compose.jl index 3f0677c..2cb02c5 100644 --- a/src/rhs_compose.jl +++ b/src/rhs_compose.jl @@ -4,25 +4,30 @@ export rhs_compose form the right-hand-side for the linear system in PARSDMM for the x-minimization """ function rhs_compose( - rhs ::Vector{TF}, - l ::Union{ Vector{Vector{TF}},DistributedArrays.DArray{Array{TF,1},1,Array{Array{TF,1},1}} }, - y ::Union{ Vector{Vector{TF}},DistributedArrays.DArray{Array{TF,1},1,Array{Array{TF,1},1}} }, - rho ::Union{ Vector{TF}, DistributedArrays.DArray{TF,1,Array{TF,1}} }, - TD_OP ::Union{Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}},DistributedArrays.DArray{Union{JOLI.joAbstractLinearOperator{TF,TF}, SparseMatrixCSC{TF,TI}},1,Array{Union{JOLI.joAbstractLinearOperator{TF,TF}, SparseMatrixCSC{TF,TI}},1}} }, - p ::Integer, - Blas_active ::Bool, - parallel ::Bool - ) where {TF<:Real,TI<:Integer} + rhs ::Vector{TF}, + l ::Union{ Vector{Vector{TF}},DistributedArrays.DArray{Array{TF,1},1,Array{Array{TF,1},1}} }, + y ::Union{ Vector{Vector{TF}},DistributedArrays.DArray{Array{TF,1},1,Array{Array{TF,1},1}} }, + rho ::Union{ Vector{TF}, DistributedArrays.DArray{TF,1,Array{TF,1}} }, + TD_OP ::Union{Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}},DistributedArrays.DArray{Union{JOLI.joAbstractLinearOperator{TF,TF}, SparseMatrixCSC{TF,TI}},1,Array{Union{JOLI.joAbstractLinearOperator{TF,TF}, SparseMatrixCSC{TF,TI}},1}} }, + p ::Integer, + Blas_active ::Bool, + parallel ::Bool + ) where {TF<:Real,TI<:Integer} -if parallel==true +if parallel == true rhs = @distributed (+) for ii = 1:p TD_OP[ii]'*(rho[ii] .* y[ii] .+ l[ii]) end + #mul!(rhs,TD_OP',rho .* y .+ l) #not possible + #rhs = TD_OP'*(rho .* y .+ l) possible but slow else #serial fill!(rhs,TF(0)); if Blas_active #serial with explicit BLAS calls + temp_array = zeros(TF,length(rhs)) for ii=1:p - BLAS.axpy!(TF(1.0), TD_OP[ii]'*(rho[ii] .* y[ii] .+ l[ii]), rhs) + mul!(temp_array,TD_OP[ii]',rho[ii] .* y[ii] .+ l[ii]) + #BLAS.axpy!(TF(1.0), TD_OP[ii]'*(rho[ii] .* y[ii] .+ l[ii]), rhs) + BLAS.axpy!(TF(1.0), temp_array, rhs) end else for ii=1:p #serial with loop fusion only @@ -32,4 +37,4 @@ else #serial end #end creating new rhs return rhs -end #function +end #function \ No newline at end of file diff --git a/src/update_y_l.jl b/src/update_y_l.jl index 86636c4..7ab6b28 100644 --- a/src/update_y_l.jl +++ b/src/update_y_l.jl @@ -39,7 +39,9 @@ for ii=1:p copy!(y_old[ii],y[ii]); copy!(l_old[ii],l[ii]); - s[ii] = TD_OP[ii]*x; + + mul!(s[ii],TD_OP[ii],x) #s[ii] = TD_OP[ii]*x; + if Blas_active #&& i>5 if gamma[ii]==1 #without relaxation copyto!(y[ii],s[ii]) @@ -55,49 +57,49 @@ for ii=1:p copyto!(y[ii],x_hat[ii]) #y[ii] = prox[ii]( BLAS.axpy!(-rho1[ii],l[ii],y[ii])) BLAS.axpy!(-rho1[ii],l[ii],y[ii]) - y[ii] = prox[ii](y[ii]) - r_pri[ii] .= -s[ii] .+ y[ii]; + y[ii] = prox[ii](y[ii]) + r_pri[ii] .= -s[ii] .+ y[ii]; BLAS.axpy!(rho[ii],y[ii] .- x_hat[ii],l[ii]); end - else + else #without explicit BLAS calls if gamma[ii]==1 #without relaxation #y[ii] = prox[ii]( s[ii].-l[ii].*rho1[ii] ); - y[ii] .= s[ii] .- l[ii].*rho1[ii] - y[ii] = prox[ii](y[ii]); + y[ii] .= s[ii] .- l[ii].*rho1[ii] + y[ii] = prox[ii](y[ii]); r_pri[ii] .= -s[ii] .+ y[ii]; l[ii] .= l[ii] .+ rho[ii] .* r_pri[ii]; else #relaxed iterations x_hat[ii] .= gamma[ii].*s[ii] .+ ( TF(1.0) .- gamma[ii] ) .* y[ii]; #y[ii] = prox[ii]( x_hat[ii].-l[ii].*rho1[ii] ); y[ii] .= x_hat[ii] .- l[ii] .* rho1[ii] - y[ii] = prox[ii]( y[ii] ); + y[ii] = prox[ii]( y[ii] ); r_pri[ii] .= -s[ii] .+ y[ii]; l[ii] .= l[ii] .+ rho[ii] .* ( -x_hat[ii] .+ y[ii] ); end end #end blas -log_PARSDMM.r_pri[i,ii] = norm(r_pri[ii])#./(norm(y[ii])+(100*eps(TF))); #add 1-14, because y may be 0 for certain initializations of x,y,l -if Blas_active - log_PARSDMM.r_dual[i,ii] = norm( BLAS.scal!(lx,rho[ii],TD_OP[ii]'*(y[ii] .- y_old[ii]),1 )) -else - log_PARSDMM.r_dual[i,ii] = norm( rho[ii] .* (TD_OP[ii]'*(y[ii] .- y_old[ii])) ) -end -#log feasibility -if feasibility_only==false - if mod(i,10)==0 && ii