diff --git a/change.log b/change.log index f73ac165..c0e4a412 100644 --- a/change.log +++ b/change.log @@ -1,6 +1,7 @@ v0.10.0 * MetidaBase for abstract types - * Optimization with LoopVectorization + * contain DF closer to the truth + * minor changes v0.9.2 * z matrix for random effect diff --git a/src/anova.jl b/src/anova.jl index 34ac07da..994d3b5e 100644 --- a/src/anova.jl +++ b/src/anova.jl @@ -39,7 +39,7 @@ function anova(lmm::LMM{T}; ddf::Symbol = :satter) where T if ddf == :satter df[i] = dof_satter(lmm, L) elseif ddf == :contain - df[i] = dof_contain(lmm, i) + df[i] = dof_contain_f(lmm, i) elseif ddf == :residual df[i] = dof_residual(lmm) end diff --git a/src/dof_contain.jl b/src/dof_contain.jl index 00280cc2..b43df2fc 100644 --- a/src/dof_contain.jl +++ b/src/dof_contain.jl @@ -30,61 +30,72 @@ end function rankxz(lmm::LMM, i) rank(hcat(lmm.data.xv, zmatrix(lmm, i))) end -#= -function zmatrix(lmm, r) - l = zeros(Int, lmm.covstr.sn[r]) - si = 1 - rzm = Matrix{Int}(undef, 0, length(lmm.covstr.zrndur[r]) * lmm.covstr.sn[r]) - for b = 1:length(lmm.covstr.vcovblock) - zblock = view(lmm.covstr.z, lmm.covstr.vcovblock[b], lmm.covstr.zrndur[r]) - for s = 1:length(lmm.covstr.sblock[b][r]) - zi = view(zblock, lmm.covstr.sblock[b][r][s], :) - l[si] = 1 - rzm = vcat(rzm, kron(l', zi)) - l[si] = 0 - si +=1 - end - end - rzm -end -=# + """ - dof_contain(lmm) + dof_contain(lmm, i) !!! warning - Experimental - -Return the containment denominator degrees of freedom: rank(XZ) - rank(X) + Experimental! + Compute rank(XZi) for each random effect that syntactically contain factor assigned for β[i] element (Where Zi - Z matrix for random effect i). + Minimum returned. If no random effect found N - rank(XZ) returned. """ -function dof_contain(lmm) - rankxz(lmm) - lmm.rankx +function dof_contain(lmm, i) + ind = lmm.mm.assign[i] + sym = get_symb(lmm.mf.f.rhs.terms[ind]) + rr = Vector{Int}(undef, 0) + for r = 1:length(lmm.covstr.random) + if length(intersect(sym, get_symb(lmm.covstr.random[r].model))) > 0 + push!(rr, rankxz(lmm, r)) + end + end + if length(rr) > 0 + return minimum(rr) + else + return nobs(lmm) - rankxz(lmm) + end end -#function dof_contain(lmm, i) -# rank(hcat(lmm.data.xv, fullzmatrix(lmm))) - lmm.rankx -#end -#= -function dof_contain2(lmm) - tl = length(lmm.mf.f.rhs.terms) - df = Vector{Int}(undef, tl) - dfb = Vector{Int}(undef, coefn(lmm)) - cnt = 1 - for i = 1:tl - dfv=Vector{Int}(undef, 0) +function dof_contain(lmm) + dof = zeros(Int, length(lmm.mm.assign)) + rrt = zeros(Int, length(lmm.covstr.random)) + rz = 0 + for i = 1:length(lmm.mm.assign) + ind = lmm.mm.assign[i] + sym = get_symb(lmm.mf.f.rhs.terms[ind]) + rr = Vector{Int}(undef, 0) for r = 1:length(lmm.covstr.random) - if contain(lmm.mf.f.rhs.terms[i], lmm.covstr.random[r]) - push!(dfv, rank(hcat(lmm.data.xv, zmatrix(lmm, r)))) + if length(intersect(sym, get_symb(lmm.covstr.random[r].model))) > 0 + if rrt[r] == 0 rrt[r] = rankxz(lmm, r) end + push!(rr, rrt[r]) end end - if length(dfv) > 0 df[i] = minimum(dfv) else df[i] = nobs(lmm) - rank(hcat(lmm.data.xv, fullzmatrix(lmm))) end - for c = 1:termsize(lmm.mf.f.rhs.terms[i]) - dfb[cnt] = df[i] - cnt += 1 + if length(rr) > 0 + dof[i] = minimum(rr) + else + if rz == 0 rz = nobs(lmm) - rankxz(lmm) end + dof[i] = rz end end - dfb + dof end -=# -function dof_contain(lmm, i) +""" + dof_contain_f(lmm, i) + +!!! warning + Experimental +""" +function dof_contain_f(lmm, i) + sym = get_symb(lmm.mf.f.rhs.terms[i]) + rr = Vector{Int}(undef, 0) + for r = 1:length(lmm.covstr.random) + if length(intersect(sym, get_symb(lmm.covstr.random[r].model))) > 0 + push!(rr, rankxz(lmm, r)) + end + end + if length(rr) > 0 + return minimum(rr) + else + return nobs(lmm) - rankxz(lmm) + end end diff --git a/src/fit.jl b/src/fit.jl index cbf9fae4..3cccfd11 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -50,9 +50,8 @@ function fit!(lmm::LMM{T}; if lmm.result.fit lmmlog!(io, lmm, verbose, LMMLogMsg(:INFO, "Refit model...")) end lmm.result.fit = false - if solver != :default - return fit_nlopt!(lmm; solver = solver, verbose = verbose, varlinkf = varlinkf, rholinkf = rholinkf, aifirst = aifirst, g_tol = g_tol, x_tol = x_tol, f_tol = f_tol, hes = false, init = init, io = io) - end + solver == :default || return fit_nlopt!(lmm; solver = solver, verbose = verbose, varlinkf = varlinkf, rholinkf = rholinkf, aifirst = aifirst, g_tol = g_tol, x_tol = x_tol, f_tol = f_tol, hes = false, init = init, io = io) + if verbose == :auto verbose = 1 diff --git a/src/lmm.jl b/src/lmm.jl index 59d5e5ae..da0d1270 100644 --- a/src/lmm.jl +++ b/src/lmm.jl @@ -26,7 +26,7 @@ struct LMM{T} <: MetidaModel covstr::CovStructure{T} data::LMMData{T} nfixed::Int - rankx::UInt32 + rankx::Int result::ModelResult log::Vector{LMMLogMsg} @@ -53,7 +53,11 @@ struct LMM{T} <: MetidaModel end lmmdata = LMMData(mm.m, response(mf)) covstr = CovStructure(random, repeated, data) - new{eltype(mm.m)}(model, mf, mm, covstr, lmmdata, nfixed, rank(mm.m), ModelResult(), lmmlog) + rankx = rank(mm.m) + if rankx != size(mm.m, 2) + lmmlog!(lmmlog, 1, LMMLogMsg(:WARN, "Fixed-effect matrix not full-rank!")) + end + new{eltype(mm.m)}(model, mf, mm, covstr, lmmdata, nfixed, rankx, ModelResult(), lmmlog) end end """ diff --git a/src/statsbase.jl b/src/statsbase.jl index 27b50e1d..f6425ce1 100644 --- a/src/statsbase.jl +++ b/src/statsbase.jl @@ -35,7 +35,7 @@ function StatsBase.confint(lmm::LMM{T}; level::Real=0.95, ddf::Symbol = :satter) if ddf == :satter ddfv = dof_satter(lmm) elseif ddf == :contain - ddfv = fill!(Vector{Float64}(undef, coefn(lmm)), dof_contain(lmm)) + ddfv = dof_contain(lmm) elseif ddf == :residual ddfv = fill!(Vector{Float64}(undef, coefn(lmm)), dof_residual(lmm)) end @@ -78,7 +78,7 @@ end """ StatsBase.dof_residual(lmm::LMM) -DOF residuals. +DOF residuals: N - rank(X). """ function StatsBase.dof_residual(lmm::LMM) nobs(lmm) - lmm.rankx diff --git a/src/utils.jl b/src/utils.jl index 4f9eef4d..f4761656 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -130,6 +130,9 @@ end function logreml(lmm) -m2logreml(lmm)/2 end +function m2logreml(lmm, theta) + reml_sweep_β(lmm, LMMDataViews(lmm), theta)[1] +end ################################################################################ function optim_callback(os) @@ -195,3 +198,24 @@ function hessian(lmm) hessian(lmm, lmm.result.theta) end ################################################################################ + + +function get_symb(t::T; v = Vector{Symbol}(undef, 0)) where T <: Union{ConstantTerm, InterceptTerm, FunctionTerm} + v +end +function get_symb(t::T; v = Vector{Symbol}(undef, 0)) where T <: Union{Term, CategoricalTerm} + push!(v, t.sym) + v +end +function get_symb(t::T; v = Vector{Symbol}(undef, 0)) where T <: InteractionTerm + for i in t.terms + get_symb(i; v = v) + end + v +end +function get_symb(t::T; v = Vector{Symbol}(undef, 0)) where T <: Tuple{Vararg{AbstractTerm}} + for i in t + get_symb(i; v = v) + end + v +end diff --git a/test/test.jl b/test/test.jl index 4eb7f117..5e87d7a9 100644 --- a/test/test.jl +++ b/test/test.jl @@ -13,6 +13,7 @@ include("testdata.jl") lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; random = Metida.VarEffect(Metida.@covstr(formulation|nosubj), Metida.DIAG), ) + # Basic show (before fitting) Base.show(io, lmm) Metida.fit!(lmm) @@ -33,8 +34,15 @@ include("testdata.jl") ) Metida.fit!(lmm; aifirst = true) @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 + + anovatable = Metida.anova(lmm; ddf = :contain) # NOT VALIDATED + anovatable = Metida.anova(lmm; ddf = :residual) anovatable = Metida.anova(lmm) Base.show(io, anovatable) + + + ############################################################################ + ############################################################################ # API test ############################################################################ @@ -46,7 +54,7 @@ include("testdata.jl") @test aicc(lmm) ≈ 24.241112644506067 atol=1E-6 @test Metida.caic(lmm) ≈ 27.558878811225412 atol=1E-6 @test dof_residual(lmm) == 14 - @test Metida.dof_contain(lmm) == 6 + @test Metida.dof_satter(lmm, 6) ≈ 5.81896814947982 atol=1E-2 @test Metida.dof_satter(lmm)[end] ≈ 5.81896814947982 atol=1E-2 @test Metida.dof_satter(lmm, [0 0 0 0 0 1]) ≈ 5.81896814947982 atol=1E-2 @@ -69,7 +77,8 @@ include("testdata.jl") @test length(coefnames(lmm)) == 6 @test Metida.confint(lmm)[end][1] ≈ -0.7630380758015894 atol=1E-4 @test Metida.confint(lmm; ddf = :residual)[end][1] ≈ -0.6740837049617738 atol=1E-4 - Metida.confint(lmm; ddf = :contain)[end][1] + + Metida.confint(lmm; ddf = :contain)[end][1] #NOT VALIDATED @test size(crossmodelmatrix(lmm), 1) == 6 @test anovatable.pval[4] ≈ 0.7852154468081014 atol=1E-6 ############################################################################ @@ -112,6 +121,15 @@ include("testdata.jl") Metida.fit!(lmm; hes = false) @test Metida.m2logreml(lmm) ≈ 14.819463206995163 atol=1E-6 @test Metida.dof_satter(lmm, 6) ≈ 3.981102548214154 atol=1E-2 + + lmm = Metida.LMM(@formula(var~period*formulation), df0; + random = Metida.VarEffect(Metida.@covstr(formulation+sequence|nosubj), Metida.SI), + ) + Metida.fit!(lmm) + @test Metida.m2logreml(lmm, [0.222283, 0.444566]) ≈ Metida.m2logreml(lmm) atol=1E-6 + # EXPERIMENTAL + @test Metida.dof_contain(lmm, 1) == 12 + @test Metida.dof_contain(lmm, 5) == 8 end ################################################################################ # df0