diff --git a/change.log b/change.log index 0a6c76e0..f0668c95 100644 --- a/change.log +++ b/change.log @@ -2,6 +2,9 @@ v0.6.0 * rho link change * TOEPH * TOEPHP + * Custom covariance type - rename + * validation & documentation + v0.5.1 * dof_satter fix diff --git a/docs/src/faq.md b/docs/src/faq.md index aca144d2..97d762b6 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -2,8 +2,12 @@ * Q1: Why it working so slow? -Metida.jl work with small and medium datasets. Model fitting is based on variance-covariance matrix inversion at each iteration. That's why if you have big blocks it will work slow. If you have big blocks you can try to use MetidaCu.jl for optimization on CUDA GPU. You can use MetidaNLopt.jl for better performance, but you will not get Hessian matrix at the end of optimization. Also if you don't need specify repeated-measure (R) covariance part and use SI, DIAG, CS, CSH covariance types for random-effect part (G) you can use MixedModels.jl - it work much faster. +Metida.jl work with small and medium datasets. Model fitting is based on variance-covariance matrix inversion at each iteration. That's why if you have big blocks it will work slow. If you have big blocks you can try to use MetidaCu.jl for optimization on CUDA GPU. You can use MetidaNLopt.jl for better performance, but you will not get Hessian matrix at the end of optimization. Also if you don't need to specify repeated-measures (R) covariance part and use SI, DIAG, CS, CSH covariance types for random-effect part (G) you can use MixedModels.jl - it work much faster. * Q2: What blocks is? Blocks depend on subjects. If you have only one random effect block is equivalent to subject. If you have more than one random effect blocks will be made as non-crossing combination for all subject variables. + +* Q3: model not converging! + +Optimization of REML function can depend on many factors. In some cases covariance parameters can be correlated (ill-conditioned/singular covariance matrix). So hypersurface in the maximum area can be very flat, that why the result can be different for different starting values (or for different software even REML is near equal). Also, some models can not be fitted for specific data at all. If the model not fitted try to check how meaningful and reasonable is the model or try to guess more robust initial conditions. diff --git a/docs/src/instanduse.md b/docs/src/instanduse.md index 149e2f09..a85cabb6 100644 --- a/docs/src/instanduse.md +++ b/docs/src/instanduse.md @@ -110,13 +110,10 @@ using Metida, CSV, DataFrames # hide df0 = CSV.File(joinpath(dirname(pathof(Metida)), "..", "test", "csv", "df0.csv"); types = [String, String, String, String, Float64, Float64]) |> DataFrame -#Make struct for G and R matrix -ccsg = CustomCovarianceStruct((q,p) -> (q, 1), Metida.gmat_csh!) -ccsr = CustomCovarianceStruct((q,p) -> (q, 0), Metida.rmatp_diag!) -#Make type struct -CCTG = CustomCovarianceType(ccsg) -CCTR = CustomCovarianceType(ccsr) +#Make methods for G and R matrix and CovarianceType struct +CCTG = CovarianceType(CovmatMethod((q,p) -> (q, 1), Metida.gmat_csh!)) +CCTR = CovarianceType(CovmatMethod((q,p) -> (q, 0), Metida.rmatp_diag!)) #Make model lmm = LMM(@formula(var~sequence+period+formulation), df0; diff --git a/docs/src/validation.md b/docs/src/validation.md index 24b3a5e4..906c57a8 100644 --- a/docs/src/validation.md +++ b/docs/src/validation.md @@ -295,7 +295,3 @@ Full SPSS code provided in validation folder ([here](https://github.com/PharmCat SPSS [output](https://github.com/PharmCat/Metida.jl/blob/master/validation/RDS-OUTPUT.docx) in DOCX format. Validation dataset available [here](https://link.springer.com/article/10.1208%2Fs12248-020-0427-6), [12248_2020_427_MOESM2_ESM.xls](https://static-content.springer.com/esm/art%3A10.1208%2Fs12248-020-0427-6/MediaObjects/12248_2020_427_MOESM2_ESM.xls). - -## Discussion - -Optimization of REML function can depend on many factors. Most of all in some cases covariance parameters can be correlated (ill-conditioned/singular covariance matrix). So hypersurface in the maximum area can be very flat, that why the result can be different for different starting values (or for different software even REML is near equal). Also, some models can not be fitted for specific data at all. If the model not fitted try to check how meaningful and reasonable is the model or try to guess more robust initial conditions. diff --git a/src/Metida.jl b/src/Metida.jl index 491f69b7..80166d37 100644 --- a/src/Metida.jl +++ b/src/Metida.jl @@ -26,9 +26,10 @@ TOEP, Toeplitz, TOEPP, ToeplitzParameterized, TOEPH, HeterogeneousToeplitz, TOEPHP, HeterogeneousToeplitzParameterized, -CustomCovarianceStruct, CustomCovarianceType, +CovmatMethod, fit!, LMM, VarEffect, theta, logreml, m2logreml, thetalength, dof_satter, dof_contain, rankx, caic, -gmatrix, rmatrix, vmatrix! +gmatrix, rmatrix, vmatrix!, +AbstractCovarianceType, AbstractCovmatMethod, MetidaModel include("abstracttype.jl") include("sweep.jl") @@ -45,7 +46,7 @@ include("ml.jl") include("fit.jl") include("showutils.jl") include("statsbase.jl") -include("initg.jl") +#include("initg.jl") include("utils.jl") include("dof_satter.jl") include("dof_contain.jl") diff --git a/src/abstracttype.jl b/src/abstracttype.jl index 01785e86..b294bc6c 100644 --- a/src/abstracttype.jl +++ b/src/abstracttype.jl @@ -4,4 +4,6 @@ abstract type MetidaModel <: StatisticalModel end abstract type AbstractCovarianceStructure end +abstract type AbstractCovmatMethod end + abstract type AbstractCovarianceType end diff --git a/src/initg.jl b/src/initg.jl index 108ec746..2bff9c16 100644 --- a/src/initg.jl +++ b/src/initg.jl @@ -1,6 +1,7 @@ - +#= function initgstep(f, θ) g = ForwardDiff.gradient(f, θ) he = ForwardDiff.hessian(f, θ) θ .-= inv(he)*g ./2 end +=# diff --git a/src/varstruct.jl b/src/varstruct.jl index 1460d306..6da53255 100644 --- a/src/varstruct.jl +++ b/src/varstruct.jl @@ -236,7 +236,7 @@ function RZero() end """ - CustomCovarianceStruct(nparamf::Function, xmat!::Function) + CovmatMethod(nparamf::Function, xmat!::Function) * `nparamf` - function type (t, q) -> (a, b) where: @@ -274,17 +274,17 @@ function rmatp_diag!(mx, θ::Vector{T}, rz, p) where T end ``` """ -struct CustomCovarianceStruct +struct CovmatMethod <: AbstractCovmatMethod nparamf::Function xmat!::Function end """ - CustomCovarianceType(ccs::CustomCovarianceStruct) + CovarianceType(cm::AbstractCovmatMethod) Make custom covariance type with CustomCovarianceStruct. """ -CustomCovarianceType(ccs::CustomCovarianceStruct) = CovarianceType(:FUNC, ccs) +CovarianceType(cm::AbstractCovmatMethod) = CovarianceType(:FUNC, cm) function covstrparam(ct::CovarianceType, t::Int, q::Int)::Tuple{Int, Int} if ct.s == :SI diff --git a/test/test.jl b/test/test.jl index 4c705b2a..f18d8119 100644 --- a/test/test.jl +++ b/test/test.jl @@ -50,6 +50,7 @@ include("testdata.jl") @test isa(response(lmm), Vector) @test sum(Metida.hessian(lmm)) ≈ 1118.160713481362 atol=1E-2 @test Metida.nblocks(lmm) == 5 + @test length(coefnames(lmm)) == 6 #AI like algo Metida.fit!(lmm; aifirst = true, init = Metida.theta(lmm)) @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 @@ -136,10 +137,8 @@ end @test Metida.m2logreml(lmm) ≈ 10.065239006121315 atol=1E-6 end @testset " Model: Custom covariance type " begin - ccsg = Metida.CustomCovarianceStruct((q,p) -> (q, 1), Metida.gmat_csh!) - ccsr = Metida.CustomCovarianceStruct((q,p) -> (q, 0), Metida.rmatp_diag!) - CCTG = Metida.CustomCovarianceType(ccsg) - CCTR = Metida.CustomCovarianceType(ccsr) + CCTG = Metida.CovarianceType(Metida.CovmatMethod((q,p) -> (q, 1), Metida.gmat_csh!)) + CCTR = Metida.CovarianceType(Metida.CovmatMethod((q,p) -> (q, 0), Metida.rmatp_diag!)) lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; random = Metida.VarEffect(Metida.@covstr(formulation|subject), CCTG), repeated = Metida.VarEffect(Metida.@covstr(formulation|subject), CCTR), diff --git a/test/validation_s2.jl b/test/validation_s2.jl index e69de29b..a878c21b 100644 --- a/test/validation_s2.jl +++ b/test/validation_s2.jl @@ -0,0 +1,15 @@ + + +df = CSV.File(joinpath(path, "csv", "Penicillin.csv"); types = [String, Float64, String, String]) |> DataFrame + +@testset " Model 7: Penicillin.csv parameters " begin + lmm = Metida.LMM(@formula(diameter ~ 1), df; + random = [Metida.VarEffect(@covstr(1|plate), Metida.SI), Metida.VarEffect(@covstr(1|sample), Metida.SI)] + ) + Metida.fit!(lmm) + @test Metida.coef(lmm)[1] ≈ 22.9722 atol = 1E-4 + @test Metida.stderror(lmm)[1] ≈ 0.808573 atol = 1E-4 + @test Metida.theta(lmm)[1]^2 ≈ 0.716908 atol = 1E-4 + @test Metida.theta(lmm)[2]^2 ≈ 3.73092 atol = 1E-4 + @test Metida.theta(lmm)[3]^2 ≈ 0.302415 atol = 1E-4 +end diff --git a/test/validation_s3.jl b/test/validation_s3.jl index 2fd7a6c6..690351d8 100644 --- a/test/validation_s3.jl +++ b/test/validation_s3.jl @@ -28,8 +28,11 @@ remlsrc= [530.14451859,-30.67456491,425.44656047,314.22176883,-74.87997833, commentsc1 = [1,3,6,13,15,16,24,25] commentsc2 = [2,5,7,10,22,30] -c1 = "The final Hessian matrix is not positive definite although all convergence criteria are satisfied. The MIXED procedure continues despite this warning. Validity of subsequent results cannot be ascertained." -c2 = "Iteration was terminated but convergence has not been achieved. The MIXED procedure continues despite this warning. Subsequent results produced are based on the last iteration. Validity of the model fit is uncertain." +c1 = "The final Hessian matrix is not positive definite although all convergence criteria are satisfied. +The MIXED procedure continues despite this warning. Validity of subsequent results cannot be ascertained." +c2 = "Iteration was terminated but convergence has not been achieved. +The MIXED procedure continues despite this warning. Subsequent results produced are based on the last iteration. +Validity of the model fit is uncertain." #8, 9, 12 #15? #! 16, 27 @@ -49,7 +52,7 @@ REML G - SI, R - SI 30 - 32.418048 =# - +@testset " RDS Test " begin for i = 1:30 dfrds = CSV.File(joinpath(path, "csv", "berds", "rds"*string(i)*".csv"), types = Dict(:PK => Float64, :subject => String, :period => String, :sequence => String, :treatment => String )) |> DataFrame dropmissing!(dfrds) @@ -76,17 +79,18 @@ for i = 1:30 remlsc[i] = Metida.m2logreml(lmm) end end +end remlsrc[30] = 14.94403807 -dftable = DataFrame(a = remlsb, b = remlsrb, c = Vector{Any}(undef, 30), d = remlsc, e = remlsrc, f = Vector{Any}(undef, 30), g = fill!(Vector{String}(undef, 30),"")) +dftable = DataFrame(n = collect(1:30), a = remlsb, b = remlsrb, c = Vector{Any}(undef, 30), d = remlsc, e = remlsrc, f = Vector{Any}(undef, 30), g = fill!(Vector{String}(undef, 30),"")) for i = 1:30 - if isapprox(dftable[i, 1], dftable[i, 2]; atol=1E-6) dftable[i, 3] = "OK" else dftable[i, 3] = dftable[i, 1] - dftable[i, 2] end - if isapprox(dftable[i, 4], dftable[i, 5]; atol=1E-6) dftable[i, 6] = "OK" else dftable[i, 6] = dftable[i, 4] - dftable[i, 5] end + if isapprox(dftable.a[i], dftable.b[i]; atol=1E-6) dftable.c[i] = "OK" else dftable.c[i] = dftable.a[i] - dftable.b[i] end + if isapprox(dftable.d[i], dftable.e[i]; atol=1E-6) dftable.f[i] = "OK" else dftable.f[i] = dftable.d[i] - dftable.e[i] end if i in commentsc1 - dftable[i, 7] = "*" + dftable.g[i] = "*" end if i in commentsc2 - dftable[i, 7] = "**" + dftable.g[i] = "**" end end @@ -99,10 +103,13 @@ dfrds.lnpk = log.(dfrds.PK) fm2 = @formula(lnpk~sequence+period+treatment+(1|subject)) mm = fit(MixedModel, fm2, dfrds, REML=true) - -pretty_table(dftable, ["REML B Metida", "REML B SPSS", "REML B RESULT","REML C Metida", "REML C SPSS", "REML C RESULT", "Comment C"]) +println("") +pretty_table(dftable, ["RDS" "REML B" "REML B" "DIFF B" "REML C" "REML C" "DIFF C" "Comment C"; + " N " "Metida" " SPSS " " " "Metida" " SPSS " " " " "]) +println("") println("* - ", c1) +println("") println("** - ", c2) - -println("DataSet 27 MixedModels.jl result:") +println("") +println("DataSet 27 - MixedModels.jl result:") println(mm)