From 6c7443c8f2d96b9b618f3ad37589bc49c16fa853 Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Fri, 20 Sep 2024 22:51:14 +0800 Subject: [PATCH] Refactor project by replacing obsolete targets pipeline --- R/constants.R | 62 ---- R/preproc.R | 112 ------ R/scoring.R | 153 -------- R/stats.R | 183 ---------- R/targets.R | 36 -- R/utils.R | 188 +++++++--- _scripts/analyze_inter_subject.R | 514 -------------------------- _targets.R | 602 ++++++++++++++++--------------- 8 files changed, 442 insertions(+), 1408 deletions(-) delete mode 100644 R/constants.R delete mode 100644 R/preproc.R delete mode 100644 R/scoring.R delete mode 100644 R/stats.R delete mode 100644 R/targets.R delete mode 100644 _scripts/analyze_inter_subject.R diff --git a/R/constants.R b/R/constants.R deleted file mode 100644 index 10cfbec..0000000 --- a/R/constants.R +++ /dev/null @@ -1,62 +0,0 @@ -# permutations number: 1000 divided into 100 batches of 10 reps -num_batches <- 100 -num_reps <- 10 - -hypers_pred_perf <- tibble::tribble( - ~index_name, ~index_name_sjt, - "remember", "rem_dp", - "knowadj", "know_dp_adj", - "avg_rk", "mean_rem_know", - "old", "coarse_dp", - "avg_score", "sum_mean" -) - -hypers_prep_shared <- tidyr::expand_grid( - resp_trans = "precise", - # c( - # "precise", # use the original response, i.e., 1-4 - # "coarse", # collapse 1-2 and 3-4 respectively - # "compromise" # collapse 3-4 for old and collapse 1-2 for new - # ), - include = c("old", "all") -) |> - dplyr::mutate( - transform_resp = rlang::syms( - sprintf("transform_resp_%s", resp_trans) - ), - tar_resp_mat = rlang::syms( - sprintf("resp_mat_%s_%s", resp_trans, include) - ) - ) -hypers_dist_shared <- tibble::tibble(method = c("sm", "gower")) - -hypers_rs <- tidyr::expand_grid( - type = c("inter", "group"), - acq = c("trial", "whole", "window") -) |> - dplyr::mutate( - tar_name_path = rlang::syms( - sprintf("file_rs_%s_%s", type, acq) - ), - batches_file = dplyr::if_else(acq == "window", 50, 1) - ) - -labels_acq <- c( - "trial" = "Averaged Trial-level", - "whole" = "Concatenated Time-series" -) -labels_index_name <- c( - "knowadj" = "Familiarity", - "remember" = "Recollection", - "avg_rk" = "Average F/R", - "old" = "Coarse d'", - "avg_score" = "Memory Score" -) -labels_include <- c( - "all" = "Both Items", - "old" = "Old Items Only" -) -labels_method <- c( - "gower" = "Gower (ordinal)", - "sm" = "Simple Match (nomial)" -) diff --git a/R/preproc.R b/R/preproc.R deleted file mode 100644 index 57e748e..0000000 --- a/R/preproc.R +++ /dev/null @@ -1,112 +0,0 @@ -# keep pairs of the same response to check the inter-subject similarity -extract_response_shared <- function(events_encoding, events_retrieval) { - events_retrieval |> - filter(memory_type != 0) |> - pivot_wider( - id_cols = word_id, - names_from = subj_id, - values_from = response_type - ) |> - mutate( - dplyover::across2x( - -1, -1, - ~ if_else(.x == .y, .x, NA), - .comb = "minimal" - ) - ) |> - select(contains("_")) |> - pivot_longer( - -word_id, - names_to = "subj_pair", - values_to = "response_type_shared" - ) |> - separate( - subj_pair, - c("subj_id_col", "subj_id_row"), - convert = TRUE - ) |> - mutate( - response_type_shared = case_match( - response_type_shared, - "remember" ~ "Rem", - "know" ~ "Know", - "unsure" ~ "Unsure", - "new" ~ "New", - .ptype = factor(levels = c("Rem", "Know", "Unsure", "New")) - ) - ) |> - nest(.by = word_id, .key = "resp_matched") |> - left_join( - distinct(events_encoding, trial_id, word_id, word_category), - by = "word_id" - ) -} - -filter_shared <- function(file, response_shared) { - arrow::read_parquet(file) |> - inner_join(response_shared, by = "trial_id") |> - mutate( - filtered = map2( - resp_matched, - fisher_z, - ~ .x |> - add_column(fisher_z = .y) |> - filter(!is.na(response_type_shared)) - ), - .keep = "unused" - ) |> - unnest(filtered) |> - filter( - sum(!is.na(fisher_z)) >= 5, - .by = c(region_id, word_category, contains("subj_id")) - ) -} - -# average inter-subject similarity across trials for trial-level analysis -average_rs_trials <- function(file_parquet, - col_rs = fisher_z, - col_trial = trial_id, - scalar_rs = FALSE) { - dat <- arrow::read_parquet(file_parquet) |> - nest(.by = -c({{ col_trial }}, {{ col_rs }})) - if (scalar_rs) { - dat |> - mutate( - mean_fisher_z = map_dbl( - data, - ~ mean(pull(.x, {{ col_rs }}), na.rm = TRUE) - ), - .keep = "unused" - ) - } else { - dat |> - mutate( - mean_fisher_z = map( - data, - ~ do.call(rbind, pull(.x, {{ col_rs }})) |> - colMeans(na.rm = TRUE) - ), - .keep = "unused" - ) - } -} - -# prepare shuffled behavioral measures by permuting subject id -permutate_behav <- function(data, cols_id) { - data_ids <- unique(data[cols_id]) - data_ids_perm <- data_ids[sample.int(nrow(data_ids)), ] - suff_tmp <- "_perm" - names(data_ids_perm) <- paste0(cols_id, suff_tmp) - bind_cols(data_ids, data_ids_perm) |> - left_join(data, by = cols_id) |> - select(-all_of(cols_id)) |> - rename_with( - ~ str_remove(.x, suff_tmp), - ends_with(suff_tmp) - ) -} - -permutate_simil <- function(simil) { - perm <- sample.int(attr(simil, "Size")) - as.dist(as.matrix(simil)[perm, perm]) -} diff --git a/R/scoring.R b/R/scoring.R deleted file mode 100644 index 49aa959..0000000 --- a/R/scoring.R +++ /dev/null @@ -1,153 +0,0 @@ -# memeory ability (performance) ---- -calc_mem_perf <- function(events_retrieval) { - count_trials <- events_retrieval |> - distinct(word_id, old_new) |> - count(old_new, name = "n_total") - events_clean <- events_retrieval |> - mutate( - old_new = factor(old_new, c("old", "new")), - response_type = factor( - response_type, - c("remember", "know", "unsure", "new") - ) - ) |> - filter(memory_type > 0) - dprime_precise <- calc_dprime( - events_clean, count_trials, - types_signal = c("remember", "know") - ) - dprime_adj <- dprime_precise |> - summarise( - across( - c(hr, far), - ~ .x[response_type == "know"] / - (1 - .x[response_type == "remember"]) - ), - .by = subj_id - ) |> - mutate( - response_type = "knowadj", - dprime = qnorm(hr) - qnorm(far) - ) - dprime_avg <- bind_rows(dprime_precise, dprime_adj) |> - summarise( - dprime = mean(dprime[response_type != "know"]), - .by = subj_id - ) |> - mutate(response_type = "avg_rk") - dprime_coarse <- events_clean |> - mutate( - response_type = fct_collapse( - response_type, - old = c("remember", "know"), - new = c("unsure", "new") - ) - ) |> - calc_dprime(count_trials, types_signal = "old") - grades <- events_retrieval |> - filter(memory_type != 0) |> - mutate( - score = if_else( - old_new == "old", - 5 - memory_type, - memory_type - ) - ) |> - summarise( - score = mean(score), - .by = subj_id - ) |> - add_column(index_name = "avg_score", .before = "score") - bind_rows( - dprime_precise, - dprime_adj, - dprime_avg, - dprime_coarse - ) |> - select(subj_id, index_name = response_type, score = dprime) |> - bind_rows(grades) -} - -calc_dist_mem_perf <- function(mem_perf, basis = c("knowadj", "remember")) { - mat <- mem_perf |> - filter(index_name %in% basis) |> - pivot_wider( - names_from = index_name, - values_from = score - ) |> - column_to_rownames("subj_id") - # use separate expression to make a clean "call" attributes - dist(mat, method = "euclidean") -} - -# similarity/distance of participants' responses ---- -transform_resp_precise <- function(events_retrieval) { - events_retrieval -} - -transform_resp_coarse <- function(events_retrieval) { - events_retrieval |> - mutate( - memory_type = case_match( - memory_type, - c(1, 2) ~ 1, - c(3, 4) ~ 2, - .default = 0 - ) - ) -} - -prepare_resp_mat <- function(resp, include) { - if (include == "all") { - include <- c("old", "new") - } - # note: 0's in memory_type are not removed now - resp |> - filter(old_new %in% include) |> - pivot_wider( - id_cols = subj_id, - names_from = word_id, - values_from = memory_type - ) |> - column_to_rownames("subj_id") -} - -calc_dist_resp_mat <- function(resp_mat, method = c("sm", "gower")) { - switch(method, - sm = 1 - nomclust::sm(resp_mat), - gower = resp_mat |> - mutate( - across( - everything(), - # 0 means no response and should be removed here - \(x) factor(na_if(x, 0), ordered = TRUE) - ) - ) |> - proxy::simil(method = "Gower") - ) -} - -# helper functions ---- -calc_dprime <- function(data, count_trials, types_signal) { - data |> - count(subj_id, response_type, old_new, .drop = FALSE) |> - left_join(count_trials, by = "old_new") |> - mutate( - rate = (n + 0.5) / (n_total + 1), - .by = c(subj_id, response_type) - ) |> - filter(response_type %in% types_signal) |> - mutate( - type = case_match( - old_new, - "old" ~ "hr", - "new" ~ "far" - ) - ) |> - pivot_wider( - id_cols = c(subj_id, response_type), - names_from = type, - values_from = rate - ) |> - mutate(dprime = qnorm(hr) - qnorm(far)) -} diff --git a/R/stats.R b/R/stats.R deleted file mode 100644 index b08fb83..0000000 --- a/R/stats.R +++ /dev/null @@ -1,183 +0,0 @@ -# statistics for subsequent memory effect -extract_stats_sme <- function(dat) { - dat |> - group_by(region_id) |> - group_modify( - ~ .x |> - rcompanion::pairwisePermutationMatrix( - mean_fisher_z ~ response_type_shared, - data = _, - method = "holm" - ) |> - tidy_pairwise() - ) |> - ungroup() |> - mutate(across(c(x, y), ~ factor(.x, c("Rem", "Know", "Unsure", "New")))) -} - -tidy_pairwise <- function(m, name_p_col = "p.value") { - m[c("Unadjusted", "Adjusted")] |> - purrr::map(t) |> # the matrix for unadjusted is upper triangle - purrr::map(stretch, name_value = name_p_col) |> - bind_rows(.id = "type") |> - mutate( - type = case_match( - type, - "Unadjusted" ~ "", - "Adjusted" ~ ".adj" - ) - ) |> - pivot_wider( - names_from = "type", - names_prefix = name_p_col, - values_from = all_of(name_p_col) - ) -} - -# Extract statistics for each type of prediction -extract_stats_pred_perf <- function(dat, mem_perf) { - dat |> - left_join(mem_perf, by = "subj_id", relationship = "many-to-many") |> - summarise( - cor.test(mean_fisher_z, score, alternative = "greater") |> - broom::tidy(), - .by = c(region_id, window_id, index_name) - ) -} - -extract_stats_pred_content <- function(dat, simil_content, ..., - covariate = NULL, - col_rs = mean_fisher_z, - keep_perms = FALSE) { - dat |> - mutate( - map( - {{ col_rs }}, - ~ stats_mantel( - as_dist_vec(.x), simil_content, covariate, - keep_perms = keep_perms - ) - ) |> - list_rbind(), - .keep = "unused" - ) -} - -stats_mantel <- function(x, y, z = NULL, ..., keep_perms = FALSE) { - stats <- if (is.null(z)) { - vegan::mantel(x, y, ...) - } else { - vegan::mantel.partial(x, y, z, ...) - } - stats_tbl <- as_tibble_row(stats[c("statistic", "signif")]) - if (keep_perms) { - stats_tbl$perm <- list(stats$perm) - } - stats_tbl -} - -# extract cluster-based permutation p value -extract_cluster_p <- function(stats_real, stats_perm, ..., - cols_region = region_id, - cols_group = NULL, - cols_perm = starts_with("tar")) { - null_distribution <- extract_stats_cluster( - stats_perm, - ..., - by = c({{ cols_region }}, {{cols_group}}, {{ cols_perm }}), - keep = "largest" - ) |> - summarise( - cluster_mass_perm = max(cluster_mass), - .by = c({{cols_group}}, {{ cols_perm }}) - ) |> - select(!{{ cols_perm }}) |> - chop(cluster_mass_perm) - cluster_real <- extract_stats_cluster( - stats_real, - ..., - by = c({{ cols_region }}, {{ cols_group }}) - ) - data <- if (is.null(substitute(cols_group))) { - bind_cols(cluster_real, null_distribution) - } else { - cols_group_chr <- tidyselect::eval_select( - substitute(cols_group), - cluster_real - ) - inner_join(cluster_real, null_distribution, by = names(cols_group_chr)) - } - data |> - mutate( - p_perm = map2_dbl( - cluster_mass, cluster_mass_perm, - \(real, perm) mean(perm > real) - ), - .keep = "unused" - ) -} - -extract_cluster_p_rps <- function(file, ...) { - dat <- R.matlab::readMat(file) - stats_real <- expand_grid( - window_id = 1:47, - region_id = 1:6 - ) |> - add_column( - p.value = as.vector(dat$P.real), - statistic = as.vector(dat$R.real), - ... - ) - clusters_p <- stats_real |> - extract_stats_cluster(by = region_id) |> - mutate( - p_perm = map_dbl( - cluster_mass, - ~ mean(dat$rsum.max.mean > .x) - ), - ... - ) - lst(stats_real, clusters_p) -} - -extract_stats_cluster <- function(stats, by, - col_p_value = p.value, - col_statistic = statistic, - col_window = window_id, - keep = c("all", "largest"), - alpha = 0.05) { - keep <- match.arg(keep) - stats |> - # order is essential for cluster detection - arrange({{ col_window }}) |> - reframe( - find_cluster({{ col_statistic }}, {{ col_p_value }} < alpha, keep), - .by = {{ by }} - ) -} - -find_cluster <- function(statistic, signif, keep) { - # https://stackoverflow.com/a/43875717/5996475 - rle_signif <- rle(signif) - if (!any(rle_signif$values)) { - return(tibble(start = NA, end = NA, cluster_mass = 0)) - } - end <- cumsum(rle_signif$lengths) - start <- c(1, lag(end)[-1] + 1) - clusters <- tibble( - start = start[rle_signif$values], - end = end[rle_signif$values] - ) |> - mutate( - cluster_mass = map2_dbl( - start, end, - \(start, end) { - sum(statistic[start:end]) - } - ) - ) - if (keep == "largest") { - clusters <- slice_max(clusters, cluster_mass) - } - clusters -} diff --git a/R/targets.R b/R/targets.R deleted file mode 100644 index 61234ae..0000000 --- a/R/targets.R +++ /dev/null @@ -1,36 +0,0 @@ -tar_combine_with_meta <- function(name, targets, cols_targets, ..., - prefix = NULL, - fun_pre = NULL, - fun_post = NULL) { - rlang::check_dots_used() - ischar_name <- tryCatch( - is.character(name) && length(name) == 1L, - error = function(e) FALSE - ) - if (!ischar_name) { - name <- deparse1(substitute(name)) - } - if (is.null(prefix)) { - prefix <- name - } - if (is.null(fun_pre)) { - fun_pre <- \(x) x - } - if (is.null(fun_post)) { - fun_post <- \(x) x - } - tarchetypes::tar_combine_raw( - name, - targets, - command = bquote( - list(!!!.x) |> - lapply(.(rlang::as_function(fun_pre))) |> - bind_rows(.id = "id") |> - # note there is delimiter after prefix should be removed too - mutate(id = str_remove(id, str_c(.(prefix), "."))) |> - separate(id, .(cols_targets), convert = TRUE) |> - .(rlang::as_function(fun_post))() - ), - ... - ) -} diff --git a/R/utils.R b/R/utils.R index 98a34ad..9aa4904 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,75 +1,145 @@ -# paths and files ---- -config_files_rs <- function(type, acq) { - switch(acq, - window = fs::path( - "data", - sprintf("type-%s_acq-%s", type, acq) - ) |> - fs::dir_ls( - recurse = TRUE, - type = "file" +calc_iss <- function(patterns, pattern_semantics) { + patterns |> + mutate( + iss = map_dbl( + pattern, + \(pattern) { + cor(atanh(pattern), pattern_semantics, use = "pairwise") + } ), - trial = , - whole = fs::path( - "data", - sprintf( - "type-%s_acq-%s_rs.parquet", - type, acq - ) - ), - stop("Unknown parameter") - ) + .keep = "unused" + ) } -config_files_pred_perf_rps <- function(index_name_sjt) { - fs::path( - "data/representational_space", - sprintf( - "corr_indiv2grp_rps_spc_%s_rightside.mat", - index_name_sjt +calc_iss_stats <- function(data, ..., .by = c(cca_id, time_id)) { + data |> + summarise( + broom::tidy(t.test(iss, ...)), + .by = {{ .by }} ) - ) } -config_files_pred_content_rps <- function(method, type = c("real", "perm")) { - fs::path( - "data/representational_space", - sprintf( - "res_par-mantel_isc_rps_spc_memory_content%s_%s.csv", - if_else(type == "real", "", "_rand1000"), method +calc_igs <- function(patterns, patterns_group) { + patterns |> + mutate( + igs = map2_dbl( + pattern, cca_id, + \(pat, cca) { + with( + patterns_group, + cor( + pattern[[which(cca_id == cca)]], + pat, + use = "pairwise" + ) + ) + } + ) + ) |> + select(!pattern) +} + +calc_sync_smc <- function(sync_whole_trials, smc) { + sync_whole_trials |> + mutate( + mantel = map(neu_sync, ~ vegan::mantel(.x, smc)), + .keep = "unused" ) - ) } -# programming ---- -select_list <- function(.l, ...) { - pos <- tidyselect::eval_select( - rlang::expr(c(...)), - .l - ) - rlang::set_names(.l[pos], names(pos)) +calc_clusters_stats <- function(stats, stats_permuted, + by = "cca_id", + col_statistic = statistic, + col_p_value = p.value, + col_time_id = time_id, + col_id_permuted = starts_with("tar")) { + clusters <- stats |> + reframe( + find_cluster( + {{ col_statistic }}, + {{ col_p_value }}, + {{ col_time_id }} + ), + .by = all_of(by) + ) + clusters_permuted <- stats_permuted |> + reframe( + find_cluster( + {{ col_statistic }}, + {{ col_p_value }}, + {{ col_time_id }}, + keep = "largest" + ), + .by = c(all_of(by), {{ col_id_permuted }}) + ) + clusters |> + left_join( + clusters_permuted |> + select(cca_id, cluster_mass_perm = cluster_mass) |> + chop(cluster_mass_perm), + by = by + ) |> + mutate( + p_perm = map2_dbl( + cluster_mass_perm, + cluster_mass, + ~ (sum(.x >= .y) + 1) / (length(.x) + 1) + ) + ) } -# distance ---- -as_dist_vec <- function(vec, ..., size = NULL, diag = FALSE, upper = FALSE) { - if (is.null(size)) { - size <- 0.5 + sqrt(0.25 + 2 * length(vec)) +find_cluster <- function(statistic, p.value, + index = NULL, + keep = c("all", "largest"), + alpha = 0.05) { + keep <- match.arg(keep) + # https://stackoverflow.com/a/43875717/5996475 + rle_signif <- rle(p.value < alpha) + if (!any(rle_signif$values)) { + return(tibble(start = NA, end = NA, cluster_mass = 0)) } - stopifnot(all.equal(size, as.integer(size))) - structure( - vec, - class = "dist", - Size = size, - Diag = diag, - Upper = upper, - ... + end <- cumsum(rle_signif$lengths) + start <- c(1, lag(end)[-1] + 1) + clusters <- tibble( + start = start[rle_signif$values], + end = end[rle_signif$values] + ) |> + mutate( + cluster_mass = map2_dbl( + start, end, + \(start, end) { + sum(statistic[start:end]) + } + ) + ) + if (!is.null(index)) { + clusters$start <- index[clusters$start] + clusters$end <- index[clusters$end] + } + if (keep == "largest") { + clusters <- slice_max(clusters, cluster_mass) + } + clusters +} + +convert_p2_p1 <- function(statistic, p.value, + alternative = c("greater", "less")) { + alternative <- match.arg(alternative) + ifelse( + xor(alternative == "greater", statistic > 0), + 1 - p.value / 2, + p.value / 2 ) } -stretch <- function(mat, name_value = "val") { - d <- as.dist(mat) - combn(names(d), 2, simplify = FALSE) |> - do.call(rbind, args = _) |> - as_tibble(.name_repair = ~ c("x", "y")) |> - mutate("{name_value}" := as.vector(d)) +tidy_mantel <- function(mantel) { + tibble( + statistic = mantel$statistic, + p.value = mantel$signif, + method = mantel$method + ) } + +get_resid <- function(y, x) { + resid(lm(y ~ x, na.action = na.exclude)) +} \ No newline at end of file diff --git a/_scripts/analyze_inter_subject.R b/_scripts/analyze_inter_subject.R deleted file mode 100644 index 398e51c..0000000 --- a/_scripts/analyze_inter_subject.R +++ /dev/null @@ -1,514 +0,0 @@ -library(targets) - -tar_option_set( - packages = c("tidyverse"), - controller = crew::crew_controller_local( - name = "local", - workers = 12 - ), - garbage_collection = TRUE, - memory = "transient", - retrieval = "worker", - storage = "worker" -) - -# for whole times series analysis, we would remove the first 200 ms baseline -index_onset <- floor(256 * (200 / 1000)) - -calc_iss <- function(patterns, pattern_semantics) { - patterns |> - mutate( - iss = map_dbl( - pattern, - \(pattern) { - cor(atanh(pattern), pattern_semantics, use = "pairwise") - } - ), - .keep = "unused" - ) -} - -calc_iss_stats <- function(data, ..., .by = c(cca_id, time_id)) { - data |> - summarise( - broom::tidy(t.test(iss, ...)), - .by = {{ .by }} - ) -} - -calc_igs <- function(patterns, patterns_group) { - patterns |> - mutate( - igs = map2_dbl( - pattern, cca_id, - \(pat, cca) { - with( - patterns_group, - cor( - pattern[[which(cca_id == cca)]], - pat, - use = "pairwise" - ) - ) - } - ) - ) |> - select(!pattern) -} - -calc_sync_smc <- function(sync_whole_trials, smc) { - sync_whole_trials |> - mutate( - mantel = map(neu_sync, ~ vegan::mantel(.x, smc)), - .keep = "unused" - ) -} - -calc_clusters_stats <- function(stats, stats_permuted, - by = "cca_id", - col_statistic = statistic, - col_p_value = p.value, - col_time_id = time_id, - col_id_permuted = starts_with("tar")) { - clusters <- stats |> - reframe( - find_cluster( - {{ col_statistic }}, - {{ col_p_value }}, - {{ col_time_id }} - ), - .by = all_of(by) - ) - clusters_permuted <- stats_permuted |> - reframe( - find_cluster( - {{ col_statistic }}, - {{ col_p_value }}, - {{ col_time_id }}, - keep = "largest" - ), - .by = c(all_of(by), {{ col_id_permuted }}) - ) - clusters |> - left_join( - clusters_permuted |> - select(cca_id, cluster_mass_perm = cluster_mass) |> - chop(cluster_mass_perm), - by = by - ) |> - mutate( - p_perm = map2_dbl( - cluster_mass_perm, - cluster_mass, - ~ (sum(.x >= .y) + 1) / (length(.x) + 1) - ) - ) -} - -find_cluster <- function(statistic, p.value, - index = NULL, - keep = c("all", "largest"), - alpha = 0.05) { - keep <- match.arg(keep) - # https://stackoverflow.com/a/43875717/5996475 - rle_signif <- rle(p.value < alpha) - if (!any(rle_signif$values)) { - return(tibble(start = NA, end = NA, cluster_mass = 0)) - } - end <- cumsum(rle_signif$lengths) - start <- c(1, lag(end)[-1] + 1) - clusters <- tibble( - start = start[rle_signif$values], - end = end[rle_signif$values] - ) |> - mutate( - cluster_mass = map2_dbl( - start, end, - \(start, end) { - sum(statistic[start:end]) - } - ) - ) - if (!is.null(index)) { - clusters$start <- index[clusters$start] - clusters$end <- index[clusters$end] - } - if (keep == "largest") { - clusters <- slice_max(clusters, cluster_mass) - } - clusters -} - -convert_p2_p1 <- function(statistic, p.value, - alternative = c("greater", "less")) { - alternative <- match.arg(alternative) - ifelse( - xor(alternative == "greater", statistic > 0), - 1 - p.value / 2, - p.value / 2 - ) -} - -tidy_mantel <- function(mantel) { - tibble( - statistic = mantel$statistic, - p.value = mantel$signif, - method = mantel$method - ) -} - -get_resid <- function(y, x) { - resid(lm(y ~ x, na.action = na.exclude)) -} - -list( - tar_target( - file_cca_y, - "data/CorCAExtra/cca_y_subjs206.parquet", - format = "file" - ), - tar_target( - subj_id_loop, - arrow::open_dataset(file_cca_y) |> - distinct(subj_id) |> - pull(subj_id, as_vector = TRUE) - ), - tar_target( - patterns_indiv_dynamic, - arrow::open_dataset(file_cca_y) |> - filter(subj_id == subj_id_loop) |> - collect() |> - pivot_wider(names_from = trial_id, values_from = y) |> - reframe( - pick(!time_id) |> - slider::slide( - \(x) as.dist(cor(x, use = "pairwise")), - .before = 25, - .after = 25, - .step = 5, - .complete = TRUE - ) |> - enframe(name = "time_id", value = "pattern") |> - filter(!map_lgl(pattern, is.null)), - .by = c(subj_id, cca_id) - ), - pattern = map(subj_id_loop) - ), - tar_target(file_seq, "config/sem_sequence.mat", format = "file"), - tar_target(file_w2v, "data/stimuli/words_w2v.txt", format = "file"), - tar_target( - pattern_semantics, - raveio::read_mat(file_seq)$SM[, 1:2] |> - as_tibble(.name_repair = ~ c("trial_id", "word_id")) |> - left_join( - read_table(file_w2v, show_col_types = FALSE, col_names = FALSE), - by = c("word_id" = "X1") - ) |> - filter(trial_id > 0) |> - select(-word_id, -X2) |> - column_to_rownames("trial_id") |> - proxy::simil(method = "cosine") - ), - tar_target(data_iss_dynamic, calc_iss(patterns_indiv_dynamic, pattern_semantics)), - tar_target(stats_iss_dynamic, calc_iss_stats(data_iss_dynamic)), - tarchetypes::tar_rep( - data_iss_dynamic_permuted, - calc_iss( - patterns_indiv_dynamic, - seriation::permute(pattern_semantics, sample.int(150L)) - ), - reps = 10, - batches = 100, - iteration = "list" - ), - tarchetypes::tar_rep2( - stats_iss_dynamic_permuted, - calc_iss_stats(data_iss_dynamic_permuted, alternative = "greater"), - data_iss_dynamic_permuted - ), - tar_target( - clusters_stats_iss, - stats_iss_dynamic |> - mutate(p.value = convert_p2_p1(statistic, p.value)) |> - calc_clusters_stats(stats_iss_dynamic_permuted) - ), - tar_target( - patterns_indiv_whole, - arrow::open_dataset(file_cca_y) |> - filter(time_id >= index_onset) |> - collect() |> - pivot_wider(names_from = trial_id, values_from = y) |> - summarise( - pattern = list(as.dist(cor(pick(matches("^\\d+$")), use = "pairwise"))), - .by = c(subj_id, cca_id) - ) - ), - tar_target(data_iss_whole, calc_iss(patterns_indiv_whole, pattern_semantics)), - tar_target(stats_iss_whole, calc_iss_stats(data_iss_whole, .by = cca_id)), - tar_target( - iss_comparison, - data_iss_whole |> - mutate(cca_id = factor(cca_id)) |> - lmerTest::lmer(iss ~ cca_id + (1 | subj_id), data = _) |> - emmeans::emmeans( - ~cca_id, - lmer.df = "satterthwaite", - lmerTest.limit = Inf - ) |> - emmeans::contrast("pairwise") |> - broom::tidy() |> - separate_wider_delim( - contrast, " - ", - names = c("start", "end") - ) |> - mutate(across(c("start", "end"), parse_number)) - ), - tar_target( - patterns_group_whole, - # `na.rm` not supported in `open_dataset()` - # https://github.com/apache/arrow/issues/44089 - arrow::read_parquet(file_cca_y) |> - filter(time_id >= index_onset) |> - summarise( - y_avg = mean(y, na.rm = TRUE), - .by = c(cca_id, trial_id, time_id) - ) |> - arrange(trial_id) |> - pivot_wider( - names_from = trial_id, - values_from = y_avg - ) |> - summarise( - pattern = list( - atanh(as.dist(cor(pick(matches("\\d+")), use = "pairwise"))) - ), - .by = cca_id - ) - ), - tar_target( - data_igs_whole, - calc_igs(patterns_indiv_whole, patterns_group_whole) - ), - tar_target( - data_igs_partial_whole, - calc_igs( - patterns_indiv_whole |> - mutate(pattern = map(pattern, get_resid, pattern_semantics)), - patterns_group_whole |> - mutate(pattern = map(pattern, get_resid, pattern_semantics)) - ) - ), - tarchetypes::tar_file_read( - subjs, - "data/subj_206.txt", - read = scan(!!.x) - ), - tarchetypes::tar_file_read( - mem_perf, - "data/behav/retrieval.tsv", - read = read_tsv(!!.x, show_col_types = FALSE) |> - mutate(acc = xor(old_new == 1, resp >= 3)) |> - preproc.iquizoo:::calc_sdt( - type_signal = 1, - by = "subj", - name_acc = "acc", - name_type = "old_new" - ) |> - mutate(subj_id = match(subj, subjs)) |> - filter(!is.na(subj_id)) |> - select(subj_id, dprime) - ), - tar_target( - stats_iss_mem_whole, - data_iss_whole |> - left_join(mem_perf, by = "subj_id") |> - summarise(broom::tidy(cor.test(atanh(iss), dprime)), .by = cca_id) - ), - tar_target( - comparison_iss_mem, - expand_grid(start = 1:3, end = 1:3) |> - filter(start > end) |> - mutate( - map2( - start, end, - \(x, y) { - with( - stats_iss_mem_whole, - as_tibble( - psych::r.test(206, estimate[[x]], estimate[[y]])[c("z", "p")] - ) - ) - } - ) |> - list_rbind() - ) - ), - tar_target( - stats_iss_mem_dynamic, - data_iss_dynamic |> - left_join(mem_perf, by = "subj_id") |> - summarise( - broom::tidy(cor.test(iss, dprime, use = "pairwise")), - .by = c(cca_id, time_id) - ) - ), - tarchetypes::tar_rep( - stats_iss_mem_dynamic_permuted, - data_iss_dynamic |> - left_join( - mem_perf |> - mutate(subj_id = sample(subj_id)), - by = "subj_id" - ) |> - summarise( - cor.test(iss, dprime, alternative = "greater", use = "pairwise") |> - broom::tidy(), - .by = c(cca_id, time_id) - ), - reps = 10, - batches = 100 - ), - tar_target( - clusters_stats_iss_mem, - stats_iss_mem_dynamic |> - mutate(p.value = convert_p2_p1(statistic, p.value)) |> - calc_clusters_stats(stats_iss_mem_dynamic_permuted) - ), - tar_target( - cca_y_halves_trials, - arrow::open_dataset(file_cca_y) |> - mutate(half = if_else(trial_id <= 75, "first", "second")) |> - filter(!is.nan(y)) |> - count(subj_id, cca_id, time_id, half) |> - distinct(subj_id, cca_id, half, n) |> - collect() - ), - tar_target( - cca_y_halves, - arrow::open_dataset(file_cca_y) |> - mutate(half = if_else(trial_id <= 75, "first", "second")) |> - filter(!is.nan(y)) |> - summarise( - y_avg = mean(y), - .by = c(subj_id, cca_id, time_id, half) - ) |> - collect() - ), - tar_target( - sync_inter_subjs, - cca_y_halves |> - filter(time_id >= index_onset) |> - pivot_wider(names_from = subj_id, values_from = y_avg) |> - reframe( - cor(pick(matches("^\\d+$")), use = "pairwise") |> - as_tibble(rownames = "row") |> - pivot_longer(cols = -row, names_to = "col", values_to = "r") |> - mutate(across(c(row, col), as.integer)) |> - filter(row < col), - .by = c(cca_id, half) - ) - ), - tar_target( - sync_inter_halves, - cca_y_halves |> - filter(time_id >= index_onset) |> - pivot_wider(names_from = half, values_from = y_avg) |> - reframe( - { - first <- pick(subj_id, time_id, first) |> - pivot_wider(names_from = subj_id, values_from = first) |> - column_to_rownames("time_id") - second <- pick(subj_id, time_id, second) |> - pivot_wider(names_from = subj_id, values_from = second) |> - column_to_rownames("time_id") - cor(first, second, use = "pairwise") |> - as_tibble(rownames = "first") |> - pivot_longer(cols = -first, names_to = "second", values_to = "r") |> - mutate(across(c(first, second), as.integer)) - }, - .by = cca_id - ) - ), - tar_target( - whole_erps, - arrow::read_parquet(file_cca_y) |> - summarise( - y_avg = mean(y, na.rm = TRUE), - .by = c(subj_id, cca_id, time_id) - ) - ), - tarchetypes::tar_file_read( - smc, - "data/behav/simil.rds", - read = readRDS(!!.x)$mat[[4]] - ), - tar_target( - sync_whole_trials, - whole_erps |> - filter(time_id >= index_onset) |> - pivot_wider(names_from = subj_id, values_from = y_avg) |> - summarise( - neu_sync = list(cor(pick(matches("^\\d+$")), use = "pairwise")), - .by = cca_id - ) - ), - tar_target( - sync_smc, - calc_sync_smc(sync_whole_trials, smc) - ), - tar_target( - sync_dynamic, - whole_erps |> - pivot_wider(names_from = subj_id, values_from = y_avg) |> - reframe( - pick(!time_id) |> - slider::slide( - \(x) as.dist(cor(x, use = "pairwise")), - .before = 25, - .after = 25, - .step = 5, - .complete = TRUE - ) |> - enframe(name = "time_id", value = "neu_sync") |> - filter(!map_lgl(neu_sync, is.null)), - .by = cca_id - ) - ), - tar_target( - sync_smc_dynamic, - calc_sync_smc(sync_dynamic, smc) - ), - tarchetypes::tar_rep( - sync_smc_dynamic_permuted, - calc_sync_smc( - sync_dynamic, - seriation::permute(smc, sample.int(206L)) - ), - reps = 10, - batches = 100 - ), - tarchetypes::tar_rep2( - stats_sync_smc_dynamic_permuted, - sync_smc_dynamic_permuted |> - mutate( - map(mantel, tidy_mantel) |> - list_rbind(), - .keep = "unused" - ), - sync_smc_dynamic_permuted - ), - tar_target( - stats_sync_smc_dynamic, - sync_smc_dynamic |> - mutate( - map(mantel, tidy_mantel) |> - list_rbind(), - .keep = "unused" - ) - ), - tar_target( - clusters_stats_sync_smc_dynamic, - stats_sync_smc_dynamic |> - calc_clusters_stats(stats_sync_smc_dynamic_permuted) - ) -) diff --git a/_targets.R b/_targets.R index cdce5f0..bc3274c 100644 --- a/_targets.R +++ b/_targets.R @@ -1,346 +1,370 @@ library(targets) + tar_option_set( packages = c("tidyverse"), - format = "qs", controller = crew::crew_controller_local( name = "local", - workers = 20 + workers = 12 ), - memory = "transient" + garbage_collection = TRUE, + memory = "transient", + retrieval = "worker", + storage = "worker" ) -if (Sys.info()["sysname"] == "Windows") { - future::plan(future.callr::callr) -} else { - future::plan(future::multicore) -} + tar_source() -# config: check inter-subject similarity ---- -if (FALSE) { # we do not need to compare windowed results - # compare abstract and concrete and subsequent memory effect - inter_check_window <- tarchetypes::tar_map( - hypers_rs_window |> - dplyr::filter(type == "inter"), - names = c(type, acq, region), - tar_target( - rsa_inter_common_trials, - lapply( - tar_name_files, - filter_shared, - response_shared - ), - pattern = map(tar_name_files) - ), - tar_target( - summary_word_cat, - lapply( - rsa_inter_common_trials, - summarise, - mean_se(fisher_z), - .by = c(region_id, word_category, window_id) - ) |> - list_rbind(), - pattern = map(rsa_inter_common_trials) - ), - tar_target( - summary_word_mem, - lapply( - rsa_inter_common_trials, - summarise, - mean_se(fisher_z), - .by = c(region_id, response_type_shared, window_id) - ) |> - list_rbind(), - pattern = map(rsa_inter_common_trials) - ) - ) -} +# for whole times series analysis, we would remove the first 200 ms baseline +index_onset <- floor(256 * (200 / 1000)) -# config: predict memory performance ---- -targets_pred_perf <- tarchetypes::tar_map( - hypers_pred_perf, - names = index_name, +list( + tar_target( + file_cca_y, + "data/CorCAExtra/cca_y_subjs206.parquet", + format = "file" + ), + tar_target( + subj_id_loop, + arrow::open_dataset(file_cca_y) |> + distinct(subj_id) |> + pull(subj_id, as_vector = TRUE) + ), tar_target( - cur_mem_perf, - filter(mem_perf, .data[["index_name"]] == index_name) + patterns_indiv_dynamic, + arrow::open_dataset(file_cca_y) |> + filter(subj_id == subj_id_loop) |> + collect() |> + pivot_wider(names_from = trial_id, values_from = y) |> + reframe( + pick(!time_id) |> + slider::slide( + \(x) as.dist(cor(x, use = "pairwise")), + .before = 25, + .after = 25, + .step = 5, + .complete = TRUE + ) |> + enframe(name = "time_id", value = "pattern") |> + filter(!map_lgl(pattern, is.null)), + .by = c(subj_id, cca_id) + ), + pattern = map(subj_id_loop) ), + tar_target(file_seq, "config/sem_sequence.mat", format = "file"), + tar_target(file_w2v, "data/stimuli/words_w2v.txt", format = "file"), tar_target( - stats_pred_perf, - extract_stats_pred_perf(avg_rs_group_window, cur_mem_perf) + pattern_semantics, + raveio::read_mat(file_seq)$SM[, 1:2] |> + as_tibble(.name_repair = ~ c("trial_id", "word_id")) |> + left_join( + read_table(file_w2v, show_col_types = FALSE, col_names = FALSE), + by = c("word_id" = "X1") + ) |> + filter(trial_id > 0) |> + select(-word_id, -X2) |> + column_to_rownames("trial_id") |> + proxy::simil(method = "cosine") ), + tar_target(data_iss_dynamic, calc_iss(patterns_indiv_dynamic, pattern_semantics)), + tar_target(stats_iss_dynamic, calc_iss_stats(data_iss_dynamic)), tarchetypes::tar_rep( - stats_pred_perf_perm, - extract_stats_pred_perf( - avg_rs_group_window, - permutate_behav(cur_mem_perf, "subj_id") + data_iss_dynamic_permuted, + calc_iss( + patterns_indiv_dynamic, + seriation::permute(pattern_semantics, sample.int(150L)) ), - batches = num_batches, - reps = num_reps + reps = 10, + batches = 100, + iteration = "list" + ), + tarchetypes::tar_rep2( + stats_iss_dynamic_permuted, + calc_iss_stats(data_iss_dynamic_permuted, alternative = "greater"), + data_iss_dynamic_permuted ), tar_target( - clusters_p_pred_perf, - extract_cluster_p(stats_pred_perf, stats_pred_perf_perm) |> - add_column(index_name = index_name, .before = 1L) - ) -) - -# config: predict shared memory content ---- -targets_pred_content <- tarchetypes::tar_map( - hypers_prep_shared, - names = c(resp_trans, include), + clusters_stats_iss, + stats_iss_dynamic |> + mutate(p.value = convert_p2_p1(statistic, p.value)) |> + calc_clusters_stats(stats_iss_dynamic_permuted) + ), tar_target( - resp_mat, - events_retrieval |> - transform_resp() |> - prepare_resp_mat(include) - ), - tarchetypes::tar_map( - hypers_dist_shared, - tar_target( - simil_content, - calc_dist_resp_mat(resp_mat, method = method), - deployment = "main" - ), - tarchetypes::tar_map( - tibble::tribble( - ~mantel, ~covariate, - "mantel", NULL, - "partial", quote(dist_mem_perf) - ), - names = mantel, - tar_target( - stats_pred_content, - extract_stats_pred_content( - avg_rs_inter_trial, - simil_content, - covariate = covariate, - keep_perms = TRUE - ) - ) - ) - ) -) - -# config: representational space ---- -targets_rps <- c( - tarchetypes::tar_map( - hypers_pred_perf, - names = index_name, - tar_target( - file_pred_perf_rps, - config_files_pred_perf_rps(index_name_sjt) - ), - tar_target( - stats_pred_perf_rps, - extract_cluster_p_rps( - file_pred_perf_rps, - index_name = index_name + patterns_indiv_whole, + arrow::open_dataset(file_cca_y) |> + filter(time_id >= index_onset) |> + collect() |> + pivot_wider(names_from = trial_id, values_from = y) |> + summarise( + pattern = list(as.dist(cor(pick(matches("^\\d+$")), use = "pairwise"))), + .by = c(subj_id, cca_id) ) - ) ), - tarchetypes::tar_map( - hypers_dist_shared, - tar_target( - file_pred_content_rps_real, - config_files_pred_content_rps(method, type = "real") - ), - tar_target( - file_pred_content_rps_perm, - config_files_pred_content_rps(method, type = "perm") - ), - tar_target( - clusters_p_pred_content_rps, - extract_cluster_p( - read_csv(file_pred_content_rps_real, show_col_types = FALSE) |> - rename(statistic.r = statistic_r), - read_csv(file_pred_content_rps_perm, show_col_types = FALSE), - cols_region = region, - cols_group = c("method", "include", "mantel_type"), - cols_perm = perm_id, - col_window = time, - col_statistic = statistic.r + tar_target(data_iss_whole, calc_iss(patterns_indiv_whole, pattern_semantics)), + tar_target(stats_iss_whole, calc_iss_stats(data_iss_whole, .by = cca_id)), + tar_target( + iss_comparison, + data_iss_whole |> + mutate(cca_id = factor(cca_id)) |> + lmerTest::lmer(iss ~ cca_id + (1 | subj_id), data = _) |> + emmeans::emmeans( + ~cca_id, + lmer.df = "satterthwaite", + lmerTest.limit = Inf + ) |> + emmeans::contrast("pairwise") |> + broom::tidy() |> + separate_wider_delim( + contrast, " - ", + names = c("start", "end") + ) |> + mutate(across(c("start", "end"), parse_number)) + ), + tar_target( + patterns_group_whole, + # `na.rm` not supported in `open_dataset()` + # https://github.com/apache/arrow/issues/44089 + arrow::read_parquet(file_cca_y) |> + filter(time_id >= index_onset) |> + summarise( + y_avg = mean(y, na.rm = TRUE), + .by = c(cca_id, trial_id, time_id) + ) |> + arrange(trial_id) |> + pivot_wider( + names_from = trial_id, + values_from = y_avg + ) |> + summarise( + pattern = list( + atanh(as.dist(cor(pick(matches("\\d+")), use = "pairwise"))) + ), + .by = cca_id ) + ), + tar_target( + data_igs_whole, + calc_igs(patterns_indiv_whole, patterns_group_whole) + ), + tar_target( + data_igs_partial_whole, + calc_igs( + patterns_indiv_whole |> + mutate(pattern = map(pattern, get_resid, pattern_semantics)), + patterns_group_whole |> + mutate(pattern = map(pattern, get_resid, pattern_semantics)) ) - ) -) - -# main targets definition ---- -list( - # prepare files and paths ---- + ), tarchetypes::tar_file_read( - events_encoding, - "data/group_task-wordencoding_events.csv", - read = readr::read_csv(!!.x, show_col_types = FALSE) + subjs, + "data/subj_206.txt", + read = scan(!!.x) ), tarchetypes::tar_file_read( - events_retrieval, - "data/group_task-wordretrieval_events.csv", - read = readr::read_csv(!!.x, show_col_types = FALSE) - ), - tarchetypes::tar_eval( - tarchetypes::tar_files_input( - tar_name_path, - config_files_rs(type, acq), - batches = batches_file - ), - hypers_rs - ), - tarchetypes::tar_map( - dplyr::filter(hypers_rs, acq != "whole"), - names = c(type, acq), - tar_target( - avg_rs, - lapply( - tar_name_path, - average_rs_trials, - scalar_rs = type == "group" + mem_perf, + "data/behav/retrieval.tsv", + read = read_tsv(!!.x, show_col_types = FALSE) |> + mutate(acc = xor(old_new == 1, resp >= 3)) |> + preproc.iquizoo:::calc_sdt( + type_signal = 1, + by = "subj", + name_acc = "acc", + name_type = "old_new" ) |> - list_rbind(), - pattern = map(tar_name_path) - ) + mutate(subj_id = match(subj, subjs)) |> + filter(!is.na(subj_id)) |> + select(subj_id, dprime) ), - # check inter-subject similarity ---- tar_target( - response_shared, - extract_response_shared(events_encoding, events_retrieval) + stats_iss_mem_whole, + data_iss_whole |> + left_join(mem_perf, by = "subj_id") |> + summarise(broom::tidy(cor.test(atanh(iss), dprime)), .by = cca_id) ), tar_target( - rsa_inter_common_trials, - filter_shared(file_rs_inter_trial, response_shared) + comparison_iss_mem, + expand_grid(start = 1:3, end = 1:3) |> + filter(start > end) |> + mutate( + map2( + start, end, + \(x, y) { + with( + stats_iss_mem_whole, + as_tibble( + psych::r.test(206, estimate[[x]], estimate[[y]])[c("z", "p")] + ) + ) + } + ) |> + list_rbind() + ) ), tar_target( - rsa_inter_avg_by_category, - rsa_inter_common_trials |> + stats_iss_mem_dynamic, + data_iss_dynamic |> + left_join(mem_perf, by = "subj_id") |> summarise( - mean_fisher_z = mean(fisher_z, na.rm = TRUE), - .by = c(region_id, subj_id_col, subj_id_row, word_category) + broom::tidy(cor.test(iss, dprime, use = "pairwise")), + .by = c(cca_id, time_id) ) ), - tar_target( - rsa_inter_avg_by_resp, - rsa_inter_common_trials |> + tarchetypes::tar_rep( + stats_iss_mem_dynamic_permuted, + data_iss_dynamic |> + left_join( + mem_perf |> + mutate(subj_id = sample(subj_id)), + by = "subj_id" + ) |> summarise( - mean_fisher_z = mean(fisher_z, na.rm = TRUE), - .by = c(region_id, subj_id_col, subj_id_row, response_type_shared) - ) + cor.test(iss, dprime, alternative = "greater", use = "pairwise") |> + broom::tidy(), + .by = c(cca_id, time_id) + ), + reps = 10, + batches = 100 ), tar_target( - stats_rsa_inter_by_resp, - extract_stats_sme(rsa_inter_avg_by_resp) - ), - if (FALSE) list( - inter_check_window, - tarchetypes::tar_combine( - summary_word_cat_rsa_inter_common_trials_window, - inter_check_window$summary_word_cat - ), - tarchetypes::tar_combine( - summary_word_mem_rsa_inter_common_trials_window, - inter_check_window$summary_word_mem - ) + clusters_stats_iss_mem, + stats_iss_mem_dynamic |> + mutate(p.value = convert_p2_p1(statistic, p.value)) |> + calc_clusters_stats(stats_iss_mem_dynamic_permuted) ), - # predict memory performance ---- - tar_target(mem_perf, calc_mem_perf(events_retrieval)), - tar_target(dist_mem_perf, calc_dist_mem_perf(mem_perf)), - targets_pred_perf, - tarchetypes::tar_combine( - stats_pred_perf, - targets_pred_perf$stats_pred_perf - ), - tarchetypes::tar_combine( - clusters_p_pred_perf, - targets_pred_perf$clusters_p_pred_perf - ), - # predict shared memory content ---- - targets_pred_content, - tar_combine_with_meta( - stats_pred_content, - select_list( - targets_pred_content, - starts_with("stats_pred_content") - ), - cols_targets = c("mantel", "method", "resp_trans", "include"), - prefix = "stats_pred_content" + tar_target( + cca_y_halves_trials, + arrow::open_dataset(file_cca_y) |> + mutate(half = if_else(trial_id <= 75, "first", "second")) |> + filter(!is.nan(y)) |> + count(subj_id, cca_id, time_id, half) |> + distinct(subj_id, cca_id, half, n) |> + collect() ), tar_target( - file_stats_pred_content_real, - "data/pred_content/res_par-mantel_isc_rps_trial_avg_smc_gower.csv", - format = "file" + cca_y_halves, + arrow::open_dataset(file_cca_y) |> + mutate(half = if_else(trial_id <= 75, "first", "second")) |> + filter(!is.nan(y)) |> + summarise( + y_avg = mean(y), + .by = c(subj_id, cca_id, time_id, half) + ) |> + collect() ), - tarchetypes::tar_group_by( - stats_pred_content_real, - read_csv(file_stats_pred_content_real, show_col_types = FALSE) |> - rename(statistic.r = statistic_r), - mantel_type, method, include + tar_target( + sync_inter_subjs, + cca_y_halves |> + filter(time_id >= index_onset) |> + pivot_wider(names_from = subj_id, values_from = y_avg) |> + reframe( + cor(pick(matches("^\\d+$")), use = "pairwise") |> + as_tibble(rownames = "row") |> + pivot_longer(cols = -row, names_to = "col", values_to = "r") |> + mutate(across(c(row, col), as.integer)) |> + filter(row < col), + .by = c(cca_id, half) + ) ), tar_target( - file_stats_pred_content_perm, - "data/pred_content/res_par-mantel_isc_rps_trial-avg_smc_rand1000_gower.csv", - format = "file" + sync_inter_halves, + cca_y_halves |> + filter(time_id >= index_onset) |> + pivot_wider(names_from = half, values_from = y_avg) |> + reframe( + { + first <- pick(subj_id, time_id, first) |> + pivot_wider(names_from = subj_id, values_from = first) |> + column_to_rownames("time_id") + second <- pick(subj_id, time_id, second) |> + pivot_wider(names_from = subj_id, values_from = second) |> + column_to_rownames("time_id") + cor(first, second, use = "pairwise") |> + as_tibble(rownames = "first") |> + pivot_longer(cols = -first, names_to = "second", values_to = "r") |> + mutate(across(c(first, second), as.integer)) + }, + .by = cca_id + ) + ), + tar_target( + whole_erps, + arrow::read_parquet(file_cca_y) |> + summarise( + y_avg = mean(y, na.rm = TRUE), + .by = c(subj_id, cca_id, time_id) + ) ), - tarchetypes::tar_group_by( - stats_pred_content_perm, - read_csv(file_stats_pred_content_perm, show_col_types = FALSE), - mantel_type, method, include + tarchetypes::tar_file_read( + smc, + "data/behav/simil.rds", + read = readRDS(!!.x)$mat[[4]] ), tar_target( - clusters_p_pred_content, - extract_cluster_p( - stats_pred_content_real, - stats_pred_content_perm, - cols_group = c(method, include, mantel_type), - cols_perm = perm_id, - col_statistic = statistic.r - ), - pattern = map(stats_pred_content_real, stats_pred_content_perm) - ), - # representational space ---- - targets_rps, - tarchetypes::tar_combine( - stats_pred_perf_rps, - targets_rps$stats_pred_perf_rps, - command = { - res <- list(!!!.x) - setNames(nm = c("stats_real", "clusters_p")) |> - purrr::map( - ~ purrr::map(res, .x) |> - bind_rows() - ) - } - ), - tarchetypes::tar_combine( - clusters_p_pred_content_rps, - targets_rps$clusters_p_pred_content_rps + sync_whole_trials, + whole_erps |> + filter(time_id >= index_onset) |> + pivot_wider(names_from = subj_id, values_from = y_avg) |> + summarise( + neu_sync = list(cor(pick(matches("^\\d+$")), use = "pairwise")), + .by = cca_id + ) ), tar_target( - file_pred_content_rps_real_manhattan, - fs::path( - "data", "representational_space", - "res_par-mantel_isc_rps_spc_memory_content_rem_allitems.csv" - ), - format = "file" + sync_smc, + calc_sync_smc(sync_whole_trials, smc) ), tar_target( - file_pred_content_rps_perm_manhattan, - fs::path( - "data", "representational_space", - "res_par-mantel_isc_rps_spc_memory_content_rand1000_rem_allitems.csv" + sync_dynamic, + whole_erps |> + pivot_wider(names_from = subj_id, values_from = y_avg) |> + reframe( + pick(!time_id) |> + slider::slide( + \(x) as.dist(cor(x, use = "pairwise")), + .before = 25, + .after = 25, + .step = 5, + .complete = TRUE + ) |> + enframe(name = "time_id", value = "neu_sync") |> + filter(!map_lgl(neu_sync, is.null)), + .by = cca_id + ) + ), + tar_target( + sync_smc_dynamic, + calc_sync_smc(sync_dynamic, smc) + ), + tarchetypes::tar_rep( + sync_smc_dynamic_permuted, + calc_sync_smc( + sync_dynamic, + seriation::permute(smc, sample.int(206L)) ), - format = "file" + reps = 10, + batches = 100 + ), + tarchetypes::tar_rep2( + stats_sync_smc_dynamic_permuted, + sync_smc_dynamic_permuted |> + mutate( + map(mantel, tidy_mantel) |> + list_rbind(), + .keep = "unused" + ), + sync_smc_dynamic_permuted ), tar_target( - clusters_p_pred_content_rps_manhattan, - extract_cluster_p( - read_csv(file_pred_content_rps_real_manhattan, show_col_types = FALSE) |> - rename(statistic.r = statistic_r), - read_csv(file_pred_content_rps_perm_manhattan, show_col_types = FALSE), - cols_region = region, - cols_group = c(memory_precision, mantel_type), - cols_perm = perm_id, - col_window = time, - col_statistic = statistic.r - ) + stats_sync_smc_dynamic, + sync_smc_dynamic |> + mutate( + map(mantel, tidy_mantel) |> + list_rbind(), + .keep = "unused" + ) ), - # render website ---- - tarchetypes::tar_quarto(website) + tar_target( + clusters_stats_sync_smc_dynamic, + stats_sync_smc_dynamic |> + calc_clusters_stats(stats_sync_smc_dynamic_permuted) + ) )