diff --git a/Project.toml b/Project.toml index 4f25a47..9fa1c24 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FinEtoolsHeatDiff" uuid = "972d1c22-8bdd-11e9-11cf-cdcb7577b041" authors = ["Petr Krysl "] -version = "3.0.5" +version = "3.0.6" [deps] Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" diff --git a/examples/steady_state/3-d/mgather1.jl b/examples/steady_state/3-d/mgather1.jl new file mode 100644 index 0000000..6910b2e --- /dev/null +++ b/examples/steady_state/3-d/mgather1.jl @@ -0,0 +1,118 @@ +module mgather1 +using Test +using Random + +function test() + N = 10000 # Number of nodes in the mesh + nen = 20 # Number of nodes per element + nloops = 2 * N + all_indexes = [randperm(N)[1:nen] for _ in 1:nloops] + buffnen3 = rand(nen, 3) + buff3nen = rand(3, nen) + dataN3 = rand(N, 3) + data3N = Matrix(transpose(dataN3)) + + t1 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for i in 1:nen + ii = indexes[i] + for j in 1:3 + buffnen3[i, j] = dataN3[ii, j] + end + end + end + + t2 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for j in 1:3 + # alternative to the loop below + # buffnen3[:, j] .= dataN3[indexes, j] # SLOW + @inbounds for i in 1:nen + ii = indexes[i] + buffnen3[i, j] = dataN3[ii, j] + end + end + end + + t3 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for i in 1:nen + ii = indexes[i] + for j in 1:3 + buff3nen[j, i] = dataN3[ii, j] + end + end + end + + t4 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for j in 1:3 + for i in 1:nen + ii = indexes[i] + buff3nen[j, i] = dataN3[ii, j] + end + end + end + + + t5 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for i in 1:nen + ii = indexes[i] + for j in 1:3 + buffnen3[i, j] = data3N[j, ii] + end + end + end + + t6 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for j in 1:3 + for i in 1:nen + ii = indexes[i] + buffnen3[i, j] = data3N[j, ii] + end + end + end + + t7 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for i in 1:nen + ii = indexes[i] + for j in 1:3 + buff3nen[j, i] = data3N[j, ii] + end + end + end + + t8 = @elapsed for l = 1:nloops + indexes = view(all_indexes, l)[1] + @inbounds for j in 1:3 + for i in 1:nen + ii = indexes[i] + buff3nen[j, i] = data3N[j, ii] + end + end + end + + [t1, t2, t3, t4, t5, t6, t7, t8] ./ nloops .* 1e6 # In microseconds +end +end +using Main.mgather1 +ts = [0.0 for i in 1:8] +ntries = 10 +for i in 1:ntries + @info "Try $i" + ts .+= mgather1.test() +end +ts ./= ntries +ts = Float32.(ts) + +println("Mesh data N x 3, Element buffer nen x 3, Loop i, j: Time $(ts[1]) [mus]") +println("Mesh data N x 3, Element buffer nen x 3, Loop j, i: Time $(ts[2]) [mus]") +println("Mesh data N x 3, Element buffer 3 x nen, Loop i, j: Time $(ts[3]) [mus]") +println("Mesh data N x 3, Element buffer 3 x nen, Loop j, i: Time $(ts[4]) [mus]") +println("Mesh data 3 x N, Element buffer nen x 3, Loop i, j: Time $(ts[5]) [mus]") +println("Mesh data 3 x N, Element buffer nen x 3, Loop j, i: Time $(ts[6]) [mus]") +println("Mesh data 3 x N, Element buffer 3 x nen, Loop i, j: Time $(ts[7]) [mus]") +println("Mesh data 3 x N, Element buffer 3 x nen, Loop j, i: Time $(ts[8]) [mus]") diff --git a/examples/steady_state/3-d/mwe_tasks.jl b/examples/steady_state/3-d/mwe_tasks.jl index c17b0c2..ddb40d0 100644 --- a/examples/steady_state/3-d/mwe_tasks.jl +++ b/examples/steady_state/3-d/mwe_tasks.jl @@ -7,17 +7,21 @@ function work(r) end s end -function test() - nchunks = 5 +function test(nchunks = 4) + @info "nthreads = $(Threads.nthreads())" + @info "maxthreadid = $(Threads.maxthreadid())" + @info "nthreadpools = $(Threads.nthreadpools())" + @info "threadpool.(1:Threads.nthreads()) = $(threadpool.(1:Threads.nthreads()))" + @info "Threads.threadpoolsize.((:default, :interactive)) = $(Threads.threadpoolsize.((:default, :interactive)))" N = 100000000 chunks = [(((i-1)*N+1:i*N), i) for i = 1:nchunks] s = Float64[] + start = time() Threads.@sync begin for ch in chunks - @info "$(ch[2]): Started $(time() - start)" Threads.@spawn let r = $ch[1], i = $ch[2] - @info "$(i): Spawned $(time() - start)" + @info "Chunk $(i), thread $(threadid()), $(Threads.threadpool(threadid())): Spawned $(time() - start)" push!(s, work(r)) @info "$(i): Finished $(time() - start)" end @@ -28,8 +32,9 @@ function test() end end using Main.mwe_tasks; +mwe_tasks.test() ts = [] -for n = 1:50 +for n = 1:20 push!(ts, @elapsed mwe_tasks.test()) end @show extrema(ts) diff --git a/examples/steady_state/3-d/mwe_tasks_2.jl b/examples/steady_state/3-d/mwe_tasks_2.jl new file mode 100644 index 0000000..f6789c9 --- /dev/null +++ b/examples/steady_state/3-d/mwe_tasks_2.jl @@ -0,0 +1,42 @@ +module mwe_tasks_2 +using Base.Threads +function work(r) + s = 0.0 + for j in r + s = s + exp(-(j - minimum(r))^2 / (maximum(r) - minimum(r))^2) + end + s +end +function test(nchunks = 2) + # @info "nchunks = $(nchunks)" + # @info "nthreads.((:default, :interactive)) = $(Threads.nthreads.((:default, :interactive)))" + # @info "maxthreadid = $(Threads.maxthreadid())" + # @info "threadpool.(1:Threads.maxthreadid()) = $(threadpool.(1:Threads.maxthreadid()))" + # @info "Threads.threadpoolsize.((:default, :interactive)) = $(Threads.threadpoolsize.((:default, :interactive)))" + N = 200000000 + chincr = N / nchunks + @assert nchunks * chincr == N + chunks = [(((i-1)*chincr+1:i*chincr), i) for i = 1:nchunks] + + s = Float64[] + start = time() + Threads.@sync begin + for ch in chunks + Threads.@spawn let r = $ch[1], i = $ch[2] + # @info "Chunk $(i), thread $(threadid()), $(Threads.threadpool(threadid())): Spawned $(time() - start)" + push!(s, work(r)) + # @info "$(i): Finished $(time() - start)" + end + end + end + # @info "Finished $(time() - start)" + # @show s +end +end +using Main.mwe_tasks_2; +mwe_tasks_2.test() +ts = [] +for n = 1:10 + push!(ts, @elapsed mwe_tasks_2.test()) +end +@show extrema(ts)