diff --git a/README.md b/README.md index 818bd802..97114ff2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This program comes with absolutely no warranty. No liability is accepted for any | Status | Cover | Build | Docs | |--------|-------|-------|------| -|[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)|[![codecov](https://codecov.io/gh/PharmCat/Metida.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/PharmCat/Metida.jl)| | [![Latest docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://pharmcat.github.io/Metida.jl/dev/)| +|[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)|[![codecov](https://codecov.io/gh/PharmCat/Metida.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/PharmCat/Metida.jl)|![Tier 1](https://github.com/PharmCat/Metida.jl/workflows/Tier%201/badge.svg) | [![Latest docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://pharmcat.github.io/Metida.jl/dev/)| Metida.jl is a experimental Julia package for fitting mixed-effects models with flexible covariance structure. At this moment package is in development stage. diff --git a/change.log b/change.log index d3ed50b6..00f639a6 100644 --- a/change.log +++ b/change.log @@ -1,3 +1,19 @@ +- 0.1.13 + + * output fix + * prepare to MetidaNLopt, MetidaCu + * docs + * logs + * code cosmetic + * fix export + * coverage + +- 0.1.12 + +- 0.1.11 + +- 0.1.10 + - 0.1.9 Experimental part completed diff --git a/docs/make.jl b/docs/make.jl index a98a481e..c61f2b5a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,6 +11,10 @@ makedocs( "Examples" => "examples.md", "Details" => "details.md", "Options" => "options.md", + "NLopt" => "nlopt.md", + "CUDA" => "cuda.md", + "Benchmark" => "bench.md" + "Validation" => "validation.md", "API" => "api.md", "Citation & Reference" => "ref.md", ], diff --git a/docs/src/api.md b/docs/src/api.md index 69463a4b..eaf2a4c6 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -1,6 +1,51 @@ ## API -### Metida.lmm +### Metida.LMM ```@docs -Metida.lmm +Metida.LMM +``` + +### Metida.fit! +```@docs +Metida.fit! +``` + +### Metida.@covstr +```@docs +Metida.@covstr +``` + +### Metida.VarEffect +```@docs +Metida.VarEffect +``` + +### Metida.ScaledIdentity +```@docs +Metida.ScaledIdentity +``` + +### Metida.Diag +```@docs +Metida.Diag +``` + +### Metida.Autoregressive +```@docs +Metida.Autoregressive +``` + +### Metida.HeterogeneousAutoregressive +```@docs +Metida.HeterogeneousAutoregressive +``` + +### Metida.CompoundSymmetry +```@docs +Metida.CompoundSymmetry +``` + +### Metida.HeterogeneousCompoundSymmetry +```@docs +Metida.HeterogeneousCompoundSymmetry ``` diff --git a/docs/src/bench.md b/docs/src/bench.md new file mode 100644 index 00000000..ae9722af --- /dev/null +++ b/docs/src/bench.md @@ -0,0 +1 @@ +## Benchmark diff --git a/docs/src/cuda.md b/docs/src/cuda.md new file mode 100644 index 00000000..ac2ae008 --- /dev/null +++ b/docs/src/cuda.md @@ -0,0 +1,23 @@ +## CUDA + +Use CuBLAS & CuSOLVER for REML calculation. Optimization with NLopt.jl. + +Install: + +``` +using Pkg +Pkg.add("MetidaCu") +``` + +Using: + +``` +using Metida, MetidaCu, StatsBase, StatsModels, CSV, DataFrames +df = CSV.File(dirname(pathof(Metida))*"\\..\\test\\csv\\df0.csv") |> DataFrame +lmm = LMM(@formula(var ~ sequence + period + formulation), df; +random = VarEffect(@covstr(formulation), CSH), +repeated = VarEffect(@covstr(formulation), VC), +subject = :subject) + +fit!(lmm; solver = :cuda) +``` diff --git a/docs/src/nlopt.md b/docs/src/nlopt.md new file mode 100644 index 00000000..a43b8c59 --- /dev/null +++ b/docs/src/nlopt.md @@ -0,0 +1,23 @@ +## NLopt + +Optimization with NLopt.jl. + +Install: + +``` +using Pkg +Pkg.add("MetidaNLopt") +``` + +Using: + +``` +using Metida, MetidaNLopt, StatsBase, StatsModels, CSV, DataFrames +df = CSV.File(dirname(pathof(Metida))*"\\..\\test\\csv\\df0.csv") |> DataFrame +lmm = LMM(@formula(var ~ sequence + period + formulation), df; +random = VarEffect(@covstr(formulation), CSH), +repeated = VarEffect(@covstr(formulation), VC), +subject = :subject) + +fit!(lmm; solver = :nlopt) +``` diff --git a/docs/src/validation.md b/docs/src/validation.md new file mode 100644 index 00000000..a9daa2c7 --- /dev/null +++ b/docs/src/validation.md @@ -0,0 +1,23 @@ +## Validation + + + +### Reference dataset + +* Schütz, H., Labes, D., Tomashevskiy, M. et al. Reference Datasets for Studies in a Replicate Design Intended for Average Bioequivalence with Expanding Limits. AAPS J 22, 44 (2020). https://doi.org/10.1208/s12248-020-0427-6 + +* Gregory Belenky, Nancy J. Wesensten, David R. Thorne, Maria L. Thomas, Helen C. Sing, Daniel P. Redmond, Michael B. Russo and Thomas J. Balkin (2003) Patterns of performance degradation and restoration during sleep restriction and subsequent recovery: a sleep dose-response study. Journal of Sleep Research 12, 1–12. + +* O.L. Davies and P.L. Goldsmith (eds), Statistical Methods in Research and Production, 4th ed., Oliver and Boyd, (1972), section 6.6 + +* O.L. Davies and P.L. Goldsmith (eds), Statistical Methods in Research and Production, 4th ed., Oliver and Boyd, (1972), section 6.5 + +https://vincentarelbundock.github.io/Rdatasets/datasets.html + + +Stock, J.H. and Watson, M.W. (2007). Introduction to Econometrics, 2nd ed. Boston: Addison Wesley. +Anglin, P.M. and R. Gencay (1996) “Semiparametric estimation of a hedonic price function”, Journal of Applied Econometrics, 11(6), 633-648. +Anglin, P., and Gencay, R. (1996). Semiparametric Estimation of a Hedonic Price Function. Journal of Applied Econometrics, 11, 633–648. + +Verbeek, M. (2004). A Guide to Modern Econometrics, 2nd ed. Chichester, UK: John Wiley. +O'Brien, R. G., and Kaiser, M. K. (1985) MANOVA method for analyzing repeated measures designs: An extensive primer. Psychological Bulletin 97, 316–333, Table 7. diff --git a/src/Metida.jl b/src/Metida.jl index 0956aa79..d31aa2fc 100644 --- a/src/Metida.jl +++ b/src/Metida.jl @@ -4,7 +4,7 @@ __precompile__() module Metida -using Distributions, LinearAlgebra, StatsBase, StatsModels, Tables, ForwardDiff, CategoricalArrays#, BlockDiagonals, BlockArrays#, SweepOperator +using Distributions, LinearAlgebra, StatsBase, StatsModels, Tables, ForwardDiff, CategoricalArrays using Optim, LineSearches #using TimerOutputs @@ -14,7 +14,14 @@ import LinearAlgebra:checksquare import StatsBase: fit, fit!, coef import Base:show -export @formula, @covstr, VC, VarianceComponents, CSH, HeterogeneousCompoundSymmetry, SI, ScaledIdentity, fit!, LMM, VarEffect +export @formula, @covstr, +SI, ScaledIdentity, +DIAG, Diag, +AR, Autoregressive, +ARH, HeterogeneousAutoregressive, +CS, CompoundSymmetry, +CSH, HeterogeneousCompoundSymmetry, +fit!, LMM, VarEffect, theta, logreml, m2logreml, thetalength include("abstracttype.jl") include("sweep.jl") diff --git a/src/fit.jl b/src/fit.jl index 35beb691..3e829a1c 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -14,7 +14,18 @@ end fit_nlopt!(lmm::MetidaModel; kwargs...) = error("MetidaNLopt not found. \n - Run `using MetidaNLopt` before.") """ - fit!(lmm::LMM{T}) where T + fit!(lmm::LMM{T}; + solver::Symbol = :default, + verbose = :auto, + varlinkf = :exp, + rholinkf = :sigm, + aifirst::Bool = false, + g_tol::Float64 = 1e-12, + x_tol::Float64 = 1e-12, + f_tol::Float64 = 1e-12, + hcalck::Bool = true, + init = nothing, + io::IO = stdout) where T Fit LMM model. """ @@ -28,7 +39,11 @@ function fit!(lmm::LMM{T}; x_tol::Float64 = 1e-12, f_tol::Float64 = 1e-12, hcalck::Bool = true, - init = nothing) where T + init = nothing, + io::IO = stdout) where T + + if lmm.result.fit lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Refit model...")) end + if solver == :nlopt return fit_nlopt!(lmm; solver = :nlopt, verbose = verbose, varlinkf = varlinkf, rholinkf = rholinkf, aifirst = aifirst, g_tol = g_tol, x_tol = x_tol, f_tol = f_tol, hcalck = false, init = init) elseif solver == :cuda @@ -44,7 +59,7 @@ function fit!(lmm::LMM{T}; # Optimization function if lmm.blocksolve optfunc = reml_sweep_β_b - lmmlog!(lmm, verbose, LMMLogMsg(:INFO, "Solving by blocks...")) + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Solving by blocks...")) else optfunc = reml_sweep_β end @@ -71,8 +86,13 @@ function fit!(lmm::LMM{T}; #ux = similar(θ) #lx .= 0.0 #ux .= Inf - if isa(init, Vector{T}) && length(θ) == length(init) - copyto!(θ, init) + if isa(init, Vector{T}) + if length(θ) == length(init) + copyto!(θ, init) + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Using provided θ: "*string(θ))) + else + error("init length $(length(init)) != θ length $(length(init))") + end else initθ = sqrt(initvar(lmm.mf.data[lmm.mf.f.lhs.sym], lmm.mm.m)[1]/4) θ .= initθ @@ -83,7 +103,7 @@ function fit!(lmm::LMM{T}; #ux[i] = 1.0 end end - lmmlog!(lmm, verbose, LMMLogMsg(:INFO, "Initial θ: "*string(θ))) + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Initial θ: "*string(θ))) end #dfc = TwiceDifferentiableConstraints(lx, ux) #Initial step with modified Newton method @@ -93,6 +113,7 @@ function fit!(lmm::LMM{T}; grf(x) = optfunc(lmm, x)[1] ai = ForwardDiff.hessian(aif, θ) gr = ForwardDiff.gradient(grf, θ) + initθ = copy(θ) try θ .-= (inv(ai) ./4 )*gr catch @@ -106,11 +127,11 @@ function fit!(lmm::LMM{T}; θ[i] = 0.0 end else - if θ[i] < 0.01 θ[i] = initθ / 2 end - if θ[i] > initθ*1.25 θ[i] = initθ*1.25 end + if θ[i] < 0.01 θ[i] = initθ[i] / 2 end + if θ[i] > initθ[i] * 1.25 θ[i] = initθ[i] * 1.25 end end end - lmmlog!(lmm, verbose, LMMLogMsg(:INFO, "First step with AI-like method, θ: "*string(θ))) + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "First step with AI-like method, θ: "*string(θ))) end varlinkrvecapply2!(θ, lmm.covstr.ct) @@ -136,21 +157,22 @@ function fit!(lmm::LMM{T}; end #Theta (θ) vector lmm.result.theta = varlinkvecapply2!(deepcopy(Optim.minimizer(lmm.result.optim)), lmm.covstr.ct) + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Resulting θ: "*string(lmm.result.theta))) try if hcalck #Hessian lmm.result.h = ForwardDiff.hessian(x -> optfunc(lmm, x)[1], lmm.result.theta) #H positive definite check if !isposdef(lmm.result.h) - lmmlog!(lmm, verbose, LMMLogMsg(:WARN, "Hessian is not positive definite.")) + lmmlog!(io, lmm, verbose, LMMLogMsg(:WARN, "Hessian is not positive definite.")) end qrd = qr(lmm.result.h, Val(true)) for i = 1:length(lmm.result.theta) if abs(qrd.R[i,i]) < 1E-10 if lmm.covstr.ct[qrd.jpvt[i]] == :var - lmmlog!(lmm, verbose, LMMLogMsg(:WARN, "Variation QR.R diagonal value ($(qrd.jpvt[i])) is less than 1e-10.")) + lmmlog!(io, lmm, verbose, LMMLogMsg(:WARN, "Variation QR.R diagonal value ($(qrd.jpvt[i])) is less than 1e-10.")) elseif lmm.covstr.ct[qrd.jpvt[i]] == :rho - lmmlog!(lmm, verbose, LMMLogMsg(:WARN, "Rho QR.R diagonal value ($(qrd.jpvt[i])) is less than 1e-10.")) + lmmlog!(io, lmm, verbose, LMMLogMsg(:WARN, "Rho QR.R diagonal value ($(qrd.jpvt[i])) is less than 1e-10.")) end end end @@ -163,11 +185,15 @@ function fit!(lmm::LMM{T}; lmm.result.se = sqrt.(diag(lmm.result.c)) #Fit true lmm.result.fit = true + + lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Model fitted.")) catch #-2 LogREML, β, iC lmm.result.reml, lmm.result.beta, iC, θ₃ = optfunc(lmm, lmm.result.theta) #Fit false lmm.result.fit = false + + lmmlog!(io, lmm, verbose, LMMLogMsg(:ERROR, "Model not fitted!")) end lmm end diff --git a/src/lmm.jl b/src/lmm.jl index 1239b39d..c8ff744b 100644 --- a/src/lmm.jl +++ b/src/lmm.jl @@ -76,9 +76,13 @@ end Length of theta vector. """ -function thetalength(lmm::LMM) +function thetalength(lmm) lmm.covstr.tl end + +function theta(lmm::LMM) + copy(lmm.result.theta) +end ################################################################################ function lmmlog!(io, lmmlog::Vector{LMMLogMsg}, verbose, vmsg) if verbose == 1 @@ -118,7 +122,8 @@ function Base.show(io::IO, lmm::LMM) println(io, "") if lmm.result.fit print(io, "Status: ") - Optim.converged(lmm.result.optim) ? printstyled(io, "converged \n"; color = :green) : printstyled(io, "not converged \n"; color = :red) + printresult(io, lmm.result.optim) + #Optim.converged(lmm.result.optim) ? printstyled(io, "converged \n"; color = :green) : printstyled(io, "not converged \n"; color = :red) #if length(lmm.log) > 0 printstyled(io, "Warnings! See lmm.log \n"; color = :yellow) end println(io, "") println(io, " -2 logREML: ", round(lmm.result.reml, sigdigits = 6)) @@ -153,6 +158,16 @@ function Base.show(io::IO, lmm::LMM) end end +function printresult(io, res::T) where T <: Optim.MultivariateOptimizationResults + Optim.converged(res) ? printstyled(io, "converged \n"; color = :green) : printstyled(io, "not converged \n"; color = :red) +end +function printresult(io, res) + if res[3] == :FTOL_REACHED || res[3] == :XTOL_REACHED || res[3] == :SUCCESS + printstyled(io, "converged ($(string(res[3])))\n"; color = :green) + else + printstyled(io, "not converged ($(string(res[3])))\n"; color = :red) + end +end function Base.show(io::IO, lmmlog::LMMLogMsg) if lmmlog.type == :INFO @@ -164,7 +179,5 @@ function Base.show(io::IO, lmmlog::LMMLogMsg) elseif lmmlog.type == :ERROR printstyled(io, " ERROR : "; color = :red) println(io, lmmlog.msg) - else - end end diff --git a/src/varstruct.jl b/src/varstruct.jl index a101b51f..7e8d28b1 100644 --- a/src/varstruct.jl +++ b/src/varstruct.jl @@ -1,6 +1,11 @@ ################################################################################ # @covstr macro ################################################################################ +""" + @covstr(ex) + +Macros for random/repeated effect model. +""" macro covstr(ex) return :(@formula(nothing ~ $ex).rhs) end @@ -37,36 +42,55 @@ struct CovarianceType <: AbstractCovarianceType rho::Function #number of rho parameters for Z size 2 end ################################################################################ +""" + ScaledIdentity() + +Scaled identity covariance type. + +SI = ScaledIdentity() +""" function ScaledIdentity() CovarianceType(:SI, ffxone, ffxone, ffxzero) end const SI = ScaledIdentity() - +""" + Diag() +""" function Diag() CovarianceType(:DIAG, ffx, ffx, ffxzero) end const DIAG = Diag() - +""" + Autoregressive() +""" function Autoregressive() CovarianceType(:AR, x -> 2, ffxone, ffxone) end const AR = Autoregressive() - +""" + HeterogeneousAutoregressive() +""" function HeterogeneousAutoregressive() CovarianceType(:ARH, ffxpone, ffx, ffxone) end const ARH = HeterogeneousAutoregressive() - +""" + CompoundSymmetry() +""" function CompoundSymmetry() CovarianceType(:CS, x -> 2, ffxone, ffxone) end const CS = CompoundSymmetry() - +""" + HeterogeneousCompoundSymmetry() +""" function HeterogeneousCompoundSymmetry() CovarianceType(:CSH, ffxpone, ffx, ffxone) end const CSH = HeterogeneousCompoundSymmetry() - +""" + RZero() +""" function RZero() CovarianceType(:ZERO, x -> 0, x -> 0, x -> 0) end @@ -84,6 +108,8 @@ end ################################################################################ """ VarEffect(model, covtype::T, coding; fulldummy = true, subj = nothing) where T <: AbstractCovarianceType + +Random/repeated effect. """ struct VarEffect model::Union{Tuple{Vararg{AbstractTerm}}, Nothing, AbstractTerm} @@ -114,10 +140,8 @@ struct VarEffect function VarEffect(model, covtype::T; coding = nothing, fulldummy = true, subj = nothing) where T <: AbstractCovarianceType VarEffect(model, covtype, coding; fulldummy = fulldummy, subj = subj) end - - function VarEffect(model; coding = nothing) - VarEffect(model, DIAG, coding) + VarEffect(model, SI, coding) end function VarEffect(covtype::T; coding = nothing, subj = nothing) where T <: AbstractCovarianceType VarEffect(@covstr(1), covtype, coding; subj = subj) @@ -154,11 +178,10 @@ struct CovStructure{T} <: AbstractCovarianceStructure # range of each parameters in θ vector tr::Vector{UnitRange{UInt32}} # θ Parameter count - tl::UInt16 + tl::Int # Parameter type :var / :rho ct::Vector{Symbol} #-- - # function CovStructure(random, repeated, data, blocks) alleffl = length(random) + 1 # @@ -246,7 +269,6 @@ struct CovStructure{T} <: AbstractCovarianceStructure end end end - # new{eltype(z)}(random, repeated, schema, rcnames, block, z, sblock, zrndur, rz, q, t, tr, tl, ct) end @@ -344,7 +366,6 @@ function Base.show(io::IO, e::VarEffect) println(io, "Subject:", e.subj) end - function Base.show(io::IO, cs::CovStructure) println(io, "Covariance Structure:") for i = 1:length(cs.random) diff --git a/test/test.jl b/test/test.jl index c058a6e3..c2c181f4 100644 --- a/test/test.jl +++ b/test/test.jl @@ -6,11 +6,11 @@ path = dirname(@__FILE__) include("testdata.jl") @testset " Basic test " begin + io = IOBuffer(); lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; random = Metida.VarEffect(Metida.@covstr(formulation), Metida.DIAG), ) Metida.fit!(lmm) - io = IOBuffer(); Base.show(io, lmm) Base.show(io, lmm.data) Base.show(io, lmm.result) @@ -18,13 +18,24 @@ include("testdata.jl") Base.show(io, lmm.log) @test Metida.logreml(lmm) ≈ -12.564740317165533 atol=1E-6 @test Metida.m2logreml(lmm) ≈ 25.129480634331067 atol=1E-6 + @test Metida.thetalength(lmm) == 3 @test lmm.result.reml ≈ 25.129480634331063 atol=1E-6 #need chec lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; repeated = Metida.VarEffect(Metida.SI, subj = :subject), ) - Metida.fit!(lmm) + Metida.fit!(lmm; verbose = 2, io = io) @test Metida.m2logreml(lmm) ≈ 25.129480634331067 atol=1E-6 + + lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; + random = Metida.VarEffect(Metida.@covstr(formulation), Metida.DIAG), + subject = :subject) + + Metida.fit!(lmm; aifirst = true) + @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 + + Metida.fit!(lmm; aifirst = true, init = Metida.theta(lmm)) + @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 end @testset " Model: DIAG/subject + nothing " begin lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0;