From c265c4ea5f67ee6c1b284512df483f6db48cb76a Mon Sep 17 00:00:00 2001 From: PharmCat Date: Fri, 23 Dec 2022 00:57:59 +0300 Subject: [PATCH 1/2] minor optimiztions --- src/fit.jl | 7 ++- src/gmat.jl | 6 +++ src/linearalgebra.jl | 124 +++++++++++++++++++++++++++++++++++++------ src/reml.jl | 45 +++++++++------- src/rmat.jl | 4 ++ src/sweep.jl | 18 ++++--- 6 files changed, 159 insertions(+), 45 deletions(-) diff --git a/src/fit.jl b/src/fit.jl index 789523c0..23a9765d 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -59,9 +59,10 @@ Fit LMM model. * `io` - output IO * `time_limit` - time limit = 120 sec * `iterations` - maximum iterations = 300 -* `refitinit` - true/false - if true - use last values for initial condition +* `refitinit` - true/false - if `true` - use last values for initial condition (`false` by default) * `optmethod` - Optimization method. Look at Optim.jl documentation. (Newton by default) * `singtol` - singular tolerance = 1e-8 +* `maxthreads` - maximum threads = num_cores() """ function fit!(lmm::LMM{T}; kwargs...) where T @@ -85,6 +86,7 @@ function fit!(lmm::LMM{T}; kwargs...) where T :refitinit ∈ kwkeys ? refitinit = kwargs[:refitinit] : refitinit = false :optmethod ∈ kwkeys ? optmethod = kwargs[:optmethod] : optmethod = :default :singtol ∈ kwkeys ? singtol = kwargs[:singtol] : singtol = 1e-8 + :maxthreads ∈ kwkeys ? maxthreads = kwargs[:maxthreads] : maxthreads = num_cores() # If model was fitted, previous results can be used if `refitinit` == true # Before fitting clear log @@ -159,7 +161,8 @@ function fit!(lmm::LMM{T}; kwargs...) where T varlinkrvecapply!(θ, lmm.covstr.ct; varlinkf = varlinkf, rholinkf = rholinkf) # Twice differentiable object - vloptf(x) = optfunc(lmm, lmm.dv, varlinkvecapply(x, lmm.covstr.ct; varlinkf = varlinkf, rholinkf = rholinkf))[1] + vloptf(x) = optfunc(lmm, lmm.dv, varlinkvecapply(x, lmm.covstr.ct; varlinkf = varlinkf, rholinkf = rholinkf); syrkblas = false, maxthreads = maxthreads)[1] + #vloptfd(x) = optfunc(lmm, lmm.dv, varlinkvecapply(x, lmm.covstr.ct; varlinkf = varlinkf, rholinkf = rholinkf); maxthreads = maxthreads)[1] gcfg = ForwardDiff.GradientConfig(vloptf, θ, chunk) hcfg = ForwardDiff.HessianConfig(vloptf, θ, chunk) diff --git a/src/gmat.jl b/src/gmat.jl index 6571c4dd..c1a7e973 100644 --- a/src/gmat.jl +++ b/src/gmat.jl @@ -248,6 +248,12 @@ function tpnum(m, n, s) end b -= s - n end +#= +function tpnum(m, n, s) + div(m*(2s - 1 - m), 2) - s + n +end +=# + ################################################################################ diff --git a/src/linearalgebra.jl b/src/linearalgebra.jl index 02fb437a..11841b63 100644 --- a/src/linearalgebra.jl +++ b/src/linearalgebra.jl @@ -26,6 +26,23 @@ end Change θ (only upper triangle). B is symmetric. """ +function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix) + axb = axes(B, 1) + sa = size(A, 1) + for j ∈ axb + for i ∈ axb + Bij = B[i, j] + for n ∈ 1:sa + Anj = A[n, j] + for m ∈ 1:n + @inbounds θ[m, n] += A[m, i] * Bij * Anj + end + end + end + end + θ +end +#= function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix) axb = axes(B, 1) sa = size(A, 1) @@ -40,12 +57,30 @@ function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix end θ end - +=# """ θ + A * B * A' * alpha Change θ (only upper triangle). B is symmetric. """ +function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix, alpha) + if !(size(B, 1) == size(B, 2) == size(A, 2)) || !(size(A, 1) == size(θ, 1) == size(θ, 2)) throw(ArgumentError("Wrong dimentions!")) end + axb = axes(B, 1) + sa = size(A, 1) + for j ∈ axb + for i ∈ axb + @inbounds Bij = B[i, j] + for n ∈ 1:sa + @inbounds Anj = A[n, j] + for m ∈ 1:n + @inbounds θ[m, n] += A[m, i] * Bij * Anj * alpha + end + end + end + end + θ +end +#= function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix, alpha) if !(size(B, 1) == size(B, 2) == size(A, 2)) || !(size(A, 1) == size(θ, 1) == size(θ, 2)) throw(ArgumentError("Wrong dimentions!")) end axb = axes(B, 1) @@ -61,26 +96,42 @@ function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix end θ end - +=# """ θ + A * B * (a - b) * alpha Change θ (only upper triangle). B is symmetric. """ +function mulαβαtinc!(θ::AbstractVector{T}, A::AbstractMatrix, B::AbstractMatrix, a::AbstractVector, b::AbstractVector, alpha) where T + if !(size(B, 2) == length(a) == length(b)) || size(B, 1) != size(A, 2) || size(A, 1) != length(θ) throw(ArgumentError("Wrong dimentions.")) end + axb = axes(B, 1) + sa = size(A, 1) + for i ∈ axb + abi = a[i] - b[i] + for j ∈ axb + for m ∈ 1:sa + @inbounds θ[m] += A[m, j] * B[j, i] * abi * alpha + end + end + end + θ +end +#= function mulαβαtinc!(θ::AbstractVector, A::AbstractMatrix, B::AbstractMatrix, a::AbstractVector, b::AbstractVector, alpha) if !(size(B, 2) == length(a) == length(b)) || size(B, 1) != size(A, 2) || size(A, 1) != length(θ) throw(ArgumentError("Wrong dimentions.")) end axb = axes(B, 1) sa = size(A, 1) for m ∈ 1:sa for j ∈ axb + Amj = A[m, j] for i ∈ axb - @inbounds θ[m] += A[m, j] * B[j, i] * (a[i] - b[i]) * alpha + @inbounds θ[m] += Amj * B[j, i] * (a[i] - b[i]) * alpha end end end θ end - +=# """ mulθ₃(y, X, β, V::AbstractMatrix{T})::T where T @@ -95,40 +146,78 @@ function mulθ₃(y, X, β, V::AbstractArray{T}) where T # check for optimizatio if q == 1 cs = zero(T) - #=@turbo=# @inbounds for m in 1:p + for m in 1:p + @inbounds cs += X[1, m] * β[m] + end + return -V[1, 1] * (y[1] - cs)^2 + end + c = zeros(T, q) + @simd for m = 1:p + βm = β[m] + @simd for n = 1:q + @inbounds c[n] += X[n, m] * βm + end + end + @simd for m = 2:q + ycm = y[m] - c[m] + @simd for n = 1:m-1 + @inbounds θ -= V[n, m] * (y[n] - c[n]) * ycm * 2 + end + end + @simd for m = 1:q + @inbounds θ -= V[m, m] * (y[m] - c[m]) ^ 2 + end + return θ +end +#= +function mulθ₃(y, X, β, V::AbstractArray{T}) where T # check for optimization + q = size(V, 1) + p = size(X, 2) + θ = zero(T) + + if q == 1 + cs = zero(T) + @inbounds for m in 1:p cs += X[1, m] * β[m] end return -V[1, 1] * (y[1] - cs)^2 end c = zeros(T, q) - #=@turbo=# @inbounds for n = 1:q, m = 1:p - c[n] += X[n, m] * β[m] + for n = 1:q + for m = 1:p + c[n] += X[n, m] * β[m] + end end - @simd for n = 1:q + @simd for n = 1:q-1 + ycn = y[n] - c[n] @simd for m = n+1:q - @inbounds θ -= V[n, m] * (y[n] - c[n]) * (y[m] - c[m]) * 2 + @inbounds θ -= V[n, m] * ycn * (y[m] - c[m]) * 2 end end - #=@turbo=# @inbounds for m = 1:q + @inbounds for m = 1:q θ -= V[m, m] * (y[m] - c[m]) ^ 2 end return θ end - +=# """ θ + A' * b Change θ. """ -function mulαtβinc!(θ::AbstractVector, A::AbstractMatrix, b::AbstractVector) +function mulαtβinc!(θ::AbstractVector{T}, A::AbstractMatrix, b::AbstractVector) where T q = size(A, 1) if q != length(b) throw(DimensionMismatch("size(A, 1) should be equal length(b)")) end p = size(A, 2) - #=@turbo=# @inbounds for m in 1:q, n in 1:p - θ[n] += b[m] * A[m, n] + for n in 1:p + θn = zero(T) + for m in 1:q + @inbounds θn += b[m] * A[m, n] + end + @inbounds θ[n] += θn end θ end @@ -137,9 +226,10 @@ end @inline function tmul_unsafe(rz, θ::AbstractVector{T}) where T vec = zeros(T, size(rz, 1)) - #=@turbo=# for r ∈ axes(rz, 1) - for i ∈ axes(rz, 2) - @inbounds vec[r] += rz[r, i] * θ[i] + for i ∈ axes(rz, 2) + θi = θ[i] + for r ∈ axes(rz, 1) + @inbounds vec[r] += rz[r, i] * θi end end vec diff --git a/src/reml.jl b/src/reml.jl index 9e6f0548..ab7be2c0 100644 --- a/src/reml.jl +++ b/src/reml.jl @@ -34,7 +34,7 @@ end # REML without provided β ################################################################################ -function reml_sweep_β(lmm, data, θ::Vector{T}; syrkblas::Bool = false) where T # Main optimization way - make gradient / hessian analytical / semi-analytical functions +function reml_sweep_β(lmm, data, θ::Vector{T}; syrkblas::Bool = false, maxthreads::Int = 16) where T # Main optimization way - make gradient / hessian analytical / semi-analytical functions n = length(lmm.covstr.vcovblock) N = length(lmm.data.yv) c = (N - lmm.rankx)*log(2π) @@ -48,7 +48,7 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; syrkblas::Bool = false) where T gvec = gmatvec(θ, lmm.covstr) rθ = θ[lmm.covstr.tr[end]] # R part of θ noerror = true - ncore = min(num_cores(), n, 16) + ncore = min(num_cores(), n, maxthreads) accθ₁ = zeros(T, ncore) accθ₂ = Vector{Matrix{T}}(undef, ncore) accβm = Vector{Vector{T}}(undef, ncore) @@ -71,14 +71,16 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; syrkblas::Bool = false) where T i = offset + j q = length(lmm.covstr.vcovblock[i]) qswm = q + lmm.rankx - Vp = Matrix{T}(undef, qswm, qswm) + Vp = zeros(T, qswm, qswm) + #Vp = Matrix{T}(undef, qswm, qswm) + #fillzeroutri!(Vp) #Vp = view(Vpt[t], qswm, qswm) V = view(Vp, 1:q, 1:q) Vx = view(Vp, 1:q, q+1:qswm) Vc = view(Vp, q+1:qswm, q+1:qswm) - fillzeroutri!(V) + #fillzeroutri!(V) copyto!(Vx, data.xv[i]) - fillzeroutri!(Vc) + #fillzeroutri!(Vc) #------------------------------------------------------------------- # Make V matrix vmatrix!(V, gvec, rθ, lmm, i) @@ -91,7 +93,8 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; syrkblas::Bool = false) where T #----------------------------------------------------------------------- if ne == false erroracc[t] = false end accθ₁[t] += swr - subutri!(accθ₂[t], Vc) + #subutri!(accθ₂[t], Vc) + accθ₂[t] .-= Vc mulαtβinc!(accβm[t], Vx, data.yv[i]) end #----------------------------------------------------------------------- @@ -126,8 +129,8 @@ end # REML with provided β ################################################################################ -function core_sweep_β(lmm, data, θ::Vector{T}, β, n) where T - ncore = min(num_cores(), n, 16) +function core_sweep_β(lmm, data, θ::Vector{T}, β, n; maxthreads::Int = 16) where T + ncore = min(num_cores(), n, maxthreads) accθ₁ = zeros(T, ncore) accθ₂ = Vector{Matrix{T}}(undef, ncore) accθ₃ = zeros(T, ncore) @@ -142,31 +145,33 @@ function core_sweep_β(lmm, data, θ::Vector{T}, β, n) where T i = offset + j q = length(lmm.covstr.vcovblock[i]) qswm = q + lmm.rankx - Vp = Matrix{T}(undef, qswm, qswm) + Vp = zeros(T, qswm, qswm) + #Vp = Matrix{T}(undef, qswm, qswm) V = view(Vp, 1:q, 1:q) Vx = view(Vp, 1:q, q+1:qswm) Vc = view(Vp, q+1:qswm, q+1:qswm) - fillzeroutri!(V) + #fillzeroutri!(V) copyto!(Vx, data.xv[i]) - fillzeroutri!(Vc) + #fillzeroutri!(Vc) vmatrix!(V, gvec, rθ, lmm, i) #----------------------------------------------------------------------- swm, swr, ne = sweepb!(Vector{T}(undef, qswm), Vp, 1:q; logdet = true) #----------------------------------------------------------------------- if ne == false erroracc[t] = false end accθ₁[t] += swr - subutri!(accθ₂[t], Vc) + #subutri!(accθ₂[t], Vc) + accθ₂[t] .-= Vc accθ₃[t] += mulθ₃(data.yv[i], data.xv[i], β, V) end end sum(accθ₁), sum(accθ₂), sum(accθ₃), all(erroracc) end -function reml_sweep_β(lmm, data, θ::Vector{T}, β) where T +function reml_sweep_β(lmm, data, θ::Vector{T}, β; kwargs...) where T n = length(lmm.covstr.vcovblock) N = length(lmm.data.yv) c = (N - lmm.rankx)*log(2π) - θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ::Vector{T}, β, n) + θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ::Vector{T}, β, n; kwargs...) θs₂ = Symmetric(θ₂) logdetθ₂ = logdet(θs₂) return θ₁ + logdetθ₂ + θ₃ + c, θs₂, θ₃, noerror #REML, iC, θ₃ @@ -174,21 +179,21 @@ end ################################################################################ # REML AI-like / scoring part ################################################################################ -function sweep_ai(lmm, data, θ, β) +function sweep_ai(lmm, data, θ, β; kwargs...) n = length(lmm.covstr.vcovblock) - θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n) + θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n; kwargs...) return θ₃ end -function sweep_score(lmm, data, θ, β) +function sweep_score(lmm, data, θ, β; kwargs...) n = length(lmm.covstr.vcovblock) - θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n) + θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n; kwargs...) return -θ₁ + θ₃ end ################################################################################ # variance-covariance matrix of β ################################################################################ -function sweep_β_cov(lmm, data, θ, β) +function sweep_β_cov(lmm, data, θ, β; kwargs...) n = length(lmm.covstr.vcovblock) - θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n) + θ₁, θ₂, θ₃, noerror = core_sweep_β(lmm, data, θ, β, n; kwargs...) return Symmetric(θ₂) end diff --git a/src/rmat.jl b/src/rmat.jl index ad06ed15..d4222888 100644 --- a/src/rmat.jl +++ b/src/rmat.jl @@ -3,11 +3,14 @@ ################################################################################ ################################################################################ +#= function rmat_base_inc_b!(mx, θ, zrv, covstr) rmat!(mx, θ, zrv, covstr.repeated.covtype.s) end +=# ################################################################################ ################################################################################ +#= function rmat_base_inc!(mx, θ, covstr, block, sblock) zblock = view(covstr.rz, block, :) @simd for i ∈ axes(sblock[end], 1) @@ -15,6 +18,7 @@ function rmat_base_inc!(mx, θ, covstr, block, sblock) end mx end +=# @noinline function rmat_base_inc!(mx, θ, covstr, bi) en = covstr.rn + 1 block = covstr.vcovblock[bi] diff --git a/src/sweep.jl b/src/sweep.jl index 4ac56e38..3e614053 100644 --- a/src/sweep.jl +++ b/src/sweep.jl @@ -4,11 +4,17 @@ function nsyrk!(α, x, A) p = checksquare(A) - for i in 1:p, j in i:p - @inbounds A[i,j] += α * x[i] * x[j] + for j in 1:p + xj = x[j] + for i in 1:j + @inbounds A[i, j] += α * x[i] * xj + end end A end +function nsyrk!(α, x, A::AbstractArray{T}) where T <: AbstractFloat + BLAS.syrk!('U', 'N', α, x, one(T), A) +end #= function nsyrk!(α::T, x::AbstractArray{<:T}, A::StridedMatrix{T}) where {T<:Union{LinearAlgebra.BlasFloat, LinearAlgebra.BlasComplex}} nt = LinearAlgebra.BLAS.get_num_threads() @@ -34,11 +40,11 @@ function sweepb!(akk::AbstractArray{T, 1}, A::AbstractArray{T, 2}, k::Integer, i #Rank-k update of the symmetric matrix C as alpha*A*transpose(A) + beta*C #or alpha*transpose(A)*A + beta*C according to trans. #Only the uplo triangle of C is used. Returns C. - if syrkblas - BLAS.syrk!('U', 'N', -d, akk, one(T), A) - else + #if syrkblas + # BLAS.syrk!('U', 'N', -d, akk, one(T), A) + #else nsyrk!(-d, akk, A) - end + #end rmul!(akk, d * (-one(T)) ^ inv) @simd for i in 1:(k-1) From 6ae8d1401b9a0a5d2a162c393dba72d8fd885527 Mon Sep 17 00:00:00 2001 From: PharmCat Date: Fri, 23 Dec 2022 00:59:36 +0300 Subject: [PATCH 2/2] update --- Project.toml | 2 +- change.log | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index fb1427c9..c840e747 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "a1dec852-9fe5-11e9-361f-8d9fde67cfa2" keywords = ["lenearmodel", "mixedmodel"] desc = "Mixed-effects models with flexible covariance structure." authors = ["Vladimir Arnautov "] -version = "0.14.1" +version = "0.14.2" [deps] DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" diff --git a/change.log b/change.log index be96ee67..da98d265 100644 --- a/change.log +++ b/change.log @@ -1,3 +1,7 @@ +v0.14.2 + * minor optimizations and changes + * potential bug fix + v0.14.1 * docs * test