From 76213a8a0824b34a5188b7d6dbd7c67b45c6afbf Mon Sep 17 00:00:00 2001 From: Brandmaier Date: Fri, 17 Apr 2020 19:47:13 +0200 Subject: [PATCH 1/5] added missing argument to semtree.control --- R/semtree.control.R | 4 +++- man/semtree.control.Rd | 3 ++- vignettes/score-based-tests.Rmd | 13 +++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/semtree.control.R b/R/semtree.control.R index a5ad6ba..64dc1db 100644 --- a/R/semtree.control.R +++ b/R/semtree.control.R @@ -5,7 +5,7 @@ function(method="naive", min.N = 20, max.depth=NA, alpha=.05, alpha.invariance=N mtry=NA, report.level=0, exclude.code=NA, score.tests = list(nominal = 'LMuo', ordinal = 'maxLMo', metric = 'CvM'), information.matrix = "info", scaled_scores = TRUE, linear = TRUE, - min.bucket=10, naive.bonferroni.type=0) + min.bucket=10, naive.bonferroni.type=0, missing = 'ignore') { options <- list() # verbose output during generation of SEMTree @@ -58,6 +58,8 @@ function(method="naive", min.N = 20, max.depth=NA, alpha=.05, alpha.invariance=N options$report.level <- report.level # type of counting the number of tests (0=all splits, 1=# of variables) options$naive.bonferroni.type <- naive.bonferroni.type + # missing data treatment + options$missing <- "ignore" class(options) <- "semtree.control" diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd index ece4f2a..07b1fa4 100644 --- a/man/semtree.control.Rd +++ b/man/semtree.control.Rd @@ -15,7 +15,7 @@ custom.stopping.rule=NA, mtry=NA, report.level=0, exclude.code=NA, score.tests = list(nominal = "LMuo", ordinal = "maxLMo", metric = "CvM"), information.matrix = "info", scaled_scores = TRUE, linear = TRUE, - min.bucket = 10, naive.bonferroni.type=0) + min.bucket = 10, naive.bonferroni.type=0, missing = 'ignore') } % print.semtree.control(x, ...) %- maybe also 'usage' for other objects documented here. @@ -84,6 +84,7 @@ } \item{naive.bonferroni.type}{Default: 0. When set to zero, bonferroni correction for the naive test counts the number of dichotomous tests. When set to one, bonferroni correction counts the number of variables tested. } } +\item{Missing value treatment. Default is 'ignore'} \value{ A control object containing a list of the above parameters. } diff --git a/vignettes/score-based-tests.Rmd b/vignettes/score-based-tests.Rmd index 83e3a1d..6d8fd09 100644 --- a/vignettes/score-based-tests.Rmd +++ b/vignettes/score-based-tests.Rmd @@ -26,7 +26,12 @@ Load affect dataset from the `psychTools` package. These are data from two studi library(psychTools) data(affect) -knitr::kable(head(affect)) +affect$Film <- as.ordered(affect$Film) + + +tree.data <- affect[,c("state1","Film","neur","ext")] + +knitr::kable(head(tree.data)) ``` ## Create simple model of state anxiety @@ -41,7 +46,7 @@ manifestVars = manifests, latentVars = latents, mxPath(from="one",to=manifests, free=c(TRUE), value=c(1.0) , arrows=1, label=c("mu") ), mxPath(from=manifests,to=manifests, free=c(TRUE), value=c(1.0) , arrows=2, label=c("sigma2") ), -mxData(affect, type = "raw") +mxData(tree.data, type = "raw") ); result <- mxRun(model) @@ -55,11 +60,11 @@ Use score-based tests to create the tree. ```{r} library(semtree) ctrl = semtree.control(method="score") -#tree = semtree(model=result, data=affect, control=ctrl) +#tree = semtree(model=result, data=tree.data, control=ctrl) ``` Plot the tree ```{r} -#plot(tree) +plot(tree) ``` \ No newline at end of file From f7fd7dc6bf930669d880e4547defe7cca253d8df Mon Sep 17 00:00:00 2001 From: Brandmaier Date: Fri, 17 Apr 2020 20:07:58 +0200 Subject: [PATCH 2/5] added missing data model --- R/growTree.R | 8 ++++++++ R/semtree.control.R | 2 +- R/traverse.R | 13 ++++++++++++- man/semtree.control.Rd | 3 ++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/R/growTree.R b/R/growTree.R index 3aa7727..555d415 100644 --- a/R/growTree.R +++ b/R/growTree.R @@ -382,6 +382,14 @@ growTree <- function(model=NULL, mydata=NULL, } ########################################################## + # build a model for missing data + if (control$missing == "ctree") { + temp = mydata[!is.na(mydata[,result$name.max]),] + result$missing.model = party::ctree( + data = temp, + formula = as.formula(paste0(result$name.max,"~."))) + } + # recursively continue splitting # result1 - RHS; result2 - LHS result2 <- growTree( model, sub2, control, invariance, meta, edgelabel=0, depth=depth+1, constraints) diff --git a/R/semtree.control.R b/R/semtree.control.R index 64dc1db..7cd033a 100644 --- a/R/semtree.control.R +++ b/R/semtree.control.R @@ -59,7 +59,7 @@ function(method="naive", min.N = 20, max.depth=NA, alpha=.05, alpha.invariance=N # type of counting the number of tests (0=all splits, 1=# of variables) options$naive.bonferroni.type <- naive.bonferroni.type # missing data treatment - options$missing <- "ignore" + options$missing <- missing class(options) <- "semtree.control" diff --git a/R/traverse.R b/R/traverse.R index 46d307c..729ee52 100644 --- a/R/traverse.R +++ b/R/traverse.R @@ -10,7 +10,18 @@ value <- tryCatch({ row[[tree$rule$name]] },error=function(cond){ message("ERROR! Incomplete dataset!"); stop(); return(NA);}) -if (is.na(value)) return(tree$node_id); +if (is.na(value)) { + + if (is.null(tree$missing.model)) { + return(tree$node_id); + } else { + browser() + value = predict(tree$missing.model, newdata=matrix(row)) + } + + + +} log.val <- NA diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd index 07b1fa4..2a43975 100644 --- a/man/semtree.control.Rd +++ b/man/semtree.control.Rd @@ -83,8 +83,9 @@ \item{min.bucket}{Minimum bucket size to continue splitting } \item{naive.bonferroni.type}{Default: 0. When set to zero, bonferroni correction for the naive test counts the number of dichotomous tests. When set to one, bonferroni correction counts the number of variables tested. } + +\item{missing}{Missing value treatment. Default is ignore} } -\item{Missing value treatment. Default is 'ignore'} \value{ A control object containing a list of the above parameters. } From e66ed535782d4bbf18f7105abee030c33b04140c Mon Sep 17 00:00:00 2001 From: Brandmaier Date: Fri, 17 Apr 2020 20:55:31 +0200 Subject: [PATCH 3/5] fixed NAMESPACE imports --- NAMESPACE | 2 +- vignettes/score-based-tests.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fa2de54..d47d430 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,7 @@ importFrom("strucchange", "catL2BB", "maxBB", "meanL2BB", "ordL2BB", "ordwmax", importFrom("zoo", "zoo") importFrom("utils","toLatex") importFrom("lavaan", "lavScores", "nobs", "vcov") - +importFrom("stats", "as.formula", "predict") # # S3 methods diff --git a/vignettes/score-based-tests.Rmd b/vignettes/score-based-tests.Rmd index 6d8fd09..3ce09c9 100644 --- a/vignettes/score-based-tests.Rmd +++ b/vignettes/score-based-tests.Rmd @@ -66,5 +66,5 @@ ctrl = semtree.control(method="score") Plot the tree ```{r} -plot(tree) +#plot(tree) ``` \ No newline at end of file From 7b7109c2260b7706cc2501c03ff948793212e8f9 Mon Sep 17 00:00:00 2001 From: Brandmaier Date: Fri, 17 Apr 2020 22:00:24 +0200 Subject: [PATCH 4/5] fixed predict for missing data --- R/growTree.R | 2 +- R/traverse.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/growTree.R b/R/growTree.R index 555d415..7fc012d 100644 --- a/R/growTree.R +++ b/R/growTree.R @@ -385,7 +385,7 @@ growTree <- function(model=NULL, mydata=NULL, # build a model for missing data if (control$missing == "ctree") { temp = mydata[!is.na(mydata[,result$name.max]),] - result$missing.model = party::ctree( + node$missing.model = party::ctree( data = temp, formula = as.formula(paste0(result$name.max,"~."))) } diff --git a/R/traverse.R b/R/traverse.R index 729ee52..b13b984 100644 --- a/R/traverse.R +++ b/R/traverse.R @@ -15,8 +15,8 @@ if (is.na(value)) { if (is.null(tree$missing.model)) { return(tree$node_id); } else { - browser() - value = predict(tree$missing.model, newdata=matrix(row)) + #browser() + value = predict(tree$missing.model, newdata=row) } From d9719eafaab9323436b7aeb7a0f62c9a787da9fb Mon Sep 17 00:00:00 2001 From: Brandmaier Date: Fri, 17 Apr 2020 22:33:23 +0200 Subject: [PATCH 5/5] added rpart missing data strategy --- R/growTree.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/growTree.R b/R/growTree.R index 7fc012d..9913001 100644 --- a/R/growTree.R +++ b/R/growTree.R @@ -384,8 +384,14 @@ growTree <- function(model=NULL, mydata=NULL, # build a model for missing data if (control$missing == "ctree") { + ui_warn("Missing data treatment with ctree is not yet implemented.") + #temp = mydata[!is.na(mydata[,result$name.max]),] + #node$missing.model = party::ctree( + # data = temp, + # formula = as.formula(paste0(result$name.max,"~."))) + } else if (control$missing == "rpart") { temp = mydata[!is.na(mydata[,result$name.max]),] - node$missing.model = party::ctree( + node$missing.model = rpart::rpart( data = temp, formula = as.formula(paste0(result$name.max,"~."))) }