Skip to content

Commit

Permalink
improve parallel version, simplify some code, some memory reduction a…
Browse files Browse the repository at this point in the history
…nd speedup
  • Loading branch information
PetersBas committed Jan 28, 2022
1 parent 6005194 commit 2026064
Show file tree
Hide file tree
Showing 17 changed files with 781 additions and 708 deletions.
32 changes: 19 additions & 13 deletions examples/projection_intersection_2D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ elseif options.FL==Float32
end

#load image to project
file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat"))
m = read(file, "Data");close(file)
m = m[1:341,200:600]
m = permutedims(m,[2,1])
file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat"));
m = read(file, "Data");close(file);
m = m[1:341,200:600];
m = permutedims(m,[2,1]);

#set up computational grid (25 and 6 m are the original distances between grid points)
comp_grid = compgrid((TF(25.0), TF(6.0)),(size(m,1), size(m,2)))
m = convert(Vector{TF},vec(m))
m = convert(Vector{TF},vec(m));

#define axis limits and colorbar limits
xmax = comp_grid.d[1]*comp_grid.n[1]
Expand All @@ -55,13 +55,13 @@ vma = 4500
constraint = Vector{SetIntersectionProjection.set_definitions}()

#bounds:
m_min = 1500.0
m_min = 1480.0
m_max = 4500.0
set_type = "bounds"
TD_OP = "identity"
app_mode = ("matrix","")
custom_TD_OP = ([],false)
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));

#slope constraints (vertical)
m_min = 0.0
Expand All @@ -70,11 +70,11 @@ set_type = "bounds"
TD_OP = "D_z"
app_mode = ("matrix","")
custom_TD_OP = ([],false)
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));

options.parallel = false
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);

println("")
println("PARSDMM serial (bounds and bounds on D_z):")
Expand Down Expand Up @@ -103,14 +103,17 @@ tight_layout()
#tight_layout(pad=0.0, w_pad=0.0, h_pad=1.0)
savefig("PARSDMM_logs.png",bbox_inches="tight")

#print timings in terminal
log_PARSDMM.timing

println("")
println("PARSDMM parallel (bounds and bounds on D_z):")
options.parallel = true
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);

@time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
@time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);

#plot
Expand All @@ -119,6 +122,9 @@ subplot(2,1,1);imshow(permutedims(reshape(m,(comp_grid.n[1],comp_grid.n[2])),[2,
subplot(2,1,2);imshow(permutedims(reshape(x2,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("Projection (bounds and bounds on D_z)")
savefig("projected_model_ParallelPARSDMM.png",bbox_inches="tight")

#print timings in terminal
log_PARSDMM.timing

# #use multilevel-serial (2-levels)
# options.parallel = false

Expand Down
24 changes: 12 additions & 12 deletions examples/projection_intersection_3D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Bas Peters, 2017

using Distributed
using LinearAlgebra
@everywhere using LinearAlgebra
@everywhere using SetIntersectionProjection
using HDF5
using PyPlot
Expand All @@ -17,7 +17,7 @@ end
if ~isfile("overthrust_3D_true_model.h5")
run(`wget ftp://slim.gatech.edu/data/SoftwareRelease/WaveformInversion.jl/3DFWI/overthrust_3D_true_model.h5`)
end
n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m")
n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m");

m .= 1000.0 ./ sqrt.(m);
m = m[50:200,50:200,:];
Expand All @@ -36,7 +36,7 @@ options.Blas_active = true
options.maxit = 500

set_zero_subnormals(true)
BLAS.set_num_threads(4)
@everywhere BLAS.set_num_threads(4)

#select working precision
if options.FL==Float64
Expand Down Expand Up @@ -68,7 +68,7 @@ set_type = "bounds"
TD_OP = "identity"
app_mode = ("matrix","")
custom_TD_OP = ([],false)
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));

#slope constraints (vertical)
m_min = 0.0
Expand All @@ -77,11 +77,11 @@ set_type = "bounds"
TD_OP = "D_z"
app_mode = ("matrix","")
custom_TD_OP = ([],false)
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));

options.parallel = false
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);

println("")
println("PARSDMM serial (bounds and bounds on D_z):")
Expand Down Expand Up @@ -133,12 +133,12 @@ println("PARSDMM multilevel-serial (bounds and bounds on D_z):")
println("")
println("PARSDMM parallel (bounds and bounds on D_z):")
options.parallel = true
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);

@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);

#use multilevel-parallel (2-levels)
options.parallel = true
Expand Down
64 changes: 56 additions & 8 deletions src/CDS_MVp.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export CDS_MVp
export CDS_MVp, CDS_MVp2, CDS_MVp3, CDS_MVp4

"""
compute single-thread matrix vector product with vector x, output is vector y: y=A*x
Expand All @@ -7,15 +7,15 @@ R is a tall matrix N by ndiagonals, corresponding to a square matrix A
offsets indicate offset of diagonal compared to the main diagonal in A (which is 0)
"""
function CDS_MVp(
N::Integer,
ndiags::Integer,
R::Array{TF,2},
offset::Vector{TI},
x::Vector{TF},
y::Vector{TF}) where {TF<:Real,TI<:Integer}
N ::Integer,
ndiags ::Integer,
R ::Array{TF,2},
offset ::Vector{TI},
x ::Vector{TF},
y ::Vector{TF}) where {TF<:Real,TI<:Integer}

for i = 1 : ndiags
d = offset[i]
d = offset[i]
r0 = max(1, 1-d)
r1 = min(N, N-d)
c0 = max(1, 1+d)
Expand All @@ -26,3 +26,51 @@ function CDS_MVp(
end
return y
end

# #the versions below is insignificantly faster than the original version above
# function CDS_MVp(
# N::Integer,
# ndiags::Integer,
# R::Array{TF,2},
# offset::Vector{TI},
# x::Vector{TF},
# y::Vector{TF}) where {TF<:Real,TI<:Integer}

# for i = 1 : ndiags
# d = offset[i]
# r0 = max(1, 1-d)
# r1 = min(N, N-d)
# c0 = max(1, 1+d)
# #r0_plus_c0 = r0 + c0
# c = deepcopy(c0)
# for r = r0 : r1
# @inbounds y[r] = y[r] + R[r,i] * x[c]#original
# c += 1
# end
# end
# return y
# end

# function CDS_MVp(
# N ::Integer,
# ndiags ::Integer,
# R ::Array{TF,2},
# offset ::Vector{TI},
# x ::Vector{TF},
# y ::Vector{TF}) where {TF<:Real,TI<:Integer}

# for i = 1 : ndiags
# d = offset[i]
# r0 = max(1, 1-d)
# r1 = min(N, N-d)
# c0 = max(1, 1+d)
# # for r = r0 : r1
# # c = r - r0 + c0 #original
# # @inbounds y[r] = y[r] + R[r,i] * x[c]#original
# # end
# @inbounds y[r0 : r1] .= y[r0 : r1] .+ R[r0:r1,i].*x[r0 - r0 + c0:r1 - r0 + c0]
# end

# return y
# end

2 changes: 1 addition & 1 deletion src/CDS_MVp_MT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function CDS_MVp_MT(
y ::Vector{TF}) where {TF<:Real,TI<:Integer}

for i = 1 : ndiags
d = offset[i]
d = offset[i]
r0 = max(1, 1-d)
r1 = min(N, N-d)
c0 = max(1, 1+d)
Expand Down
15 changes: 7 additions & 8 deletions src/CDS_MVp_MT_subfunc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ wrapper around Julia threads to compute a multi-threaded matrix vector product i
This is a subfunction of CDS_MVp_MT.jl
"""
function CDS_MVp_MT_subfunc(
R::Array{TF,2},
x::Vector{TF},
y::Vector{TF},
r0::Int,
c0::Int,
r1::Int,
i::Int) where {TF<:Real}
R ::Array{TF,2},
x ::Vector{TF},
y ::Vector{TF},
r0 ::Int,
c0 ::Int,
r1 ::Int,
i ::Int) where {TF<:Real}

#s=rind-1
@Threads.threads for r = r0 : r1
c = r - r0 + c0 #original
@inbounds y[r] = y[r] + R[r,i] * x[c]#original
Expand Down
4 changes: 2 additions & 2 deletions src/CDS_scaled_add!.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ export CDS_scaled_add!

"""
Computes A = A + alpha * B for A and B in the compressed diagonal storage format (CDS/DIA)
TO DO: make this function multi-threaded per column of A and B
TO DO: make this function multi-threaded per column of A and B, or use BLAS functions
"""
function CDS_scaled_add!(
A ::Array{TF,2},
Expand All @@ -16,7 +16,7 @@ function CDS_scaled_add!(
for k=1:length(B_offsets)
A_update_col = findall((in)(B_offsets[k]),A_offsets)
if isempty(A_update_col) == true
error("attempted to update a diagonal in A in CDS storage that does not excist. A and B need to have the same nonzero diagonals")
error("attempted to update a diagonal in A in CDS storage that does not exist. A and B need to have the same nonzero diagonals")
end
A[:,A_update_col] .= A[:,A_update_col] .+ alpha .* B[:,k];
end
Expand Down
Loading

0 comments on commit 2026064

Please sign in to comment.