updated function inputs to better match fdasrvf + some clean up of do…

…cumentation + adding badges
sandialabs · Jan 11, 2025 · feab578 · feab578
1 parent 147874c
commit feab578
Show file tree

Hide file tree

Showing 37 changed files with 723 additions and 89 deletions.
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,51 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+
+name: R-CMD-check.yaml
+
+permissions: read-all
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macos-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,   r: 'oldrel-1'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: veesa
 Type: Package
 Title: VEESA Pipeline for Explainable Machine Learning with Functional Data
-Version: 0.1.2
+Version: 0.1.3
 Author: Katherine Goode, J. Derek Tucker
 Maintainer: Katherine Goode <[email protected]>
 Description: Contains functions for applying the VEESA pipeline in R.
@@ -18,3 +18,5 @@ Imports:
   stats,
   stringr,
   tidyr
+Suggests:
+  randomForest
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# veesa 0.1.3
+
+- Updated inputs in prep_training_data to match fdasrvf
+- Added examples to documentation
+- Cleaned up wording in documentation a bit
+
 # veesa 0.1.2
 
 - Added shifted peaks data
@@ -10,4 +16,4 @@
 
 # veesa 0.1.0
 
-Initial version of package
+- Initial version of package
diff --git a/R/compute_pfi.R b/R/compute_pfi.R
@@ -16,7 +16,84 @@
 #' @importFrom purrr map2
 #' @importFrom stats setNames
 #' 
-#' @return xxx
+#' @return List containing
+#' \itemize{
+#'   \item \code{pfi}: Vector of PFI values (averaged over replicates)
+#'   \item \code{pfi_single_reps}: Matrix of containing the feature importance values from each replicate (rows associated with reps; columns associated with data observations)
+#' }
+#' 
+#' @examples
+#' # Load packages
+#' library(dplyr)
+#' library(tidyr)
+#' library(randomForest)
+#' 
+#' # Select a subset of functions from shifted peaks data
+#' sub_ids <- 
+#'   shifted_peaks$data |>
+#'   select(data, group, id) |> 
+#'   distinct() |>
+#'   group_by(data, group) |> 
+#'   slice(1:5) |> 
+#'   ungroup()
+#'   
+#' # Create a smaller version of shifted data
+#' shifted_peaks_sub <-
+#'   shifted_peaks$data |>
+#'   filter(id %in% sub_ids$id)
+#'   
+#' # Extract times
+#' shifted_peaks_times = unique(shifted_peaks_sub$t)
+#' 
+#' # Convert training data to matrix
+#' shifted_peaks_train_matrix <-
+#'   shifted_peaks_sub |> 
+#'   filter(data == "Training") |>
+#'   select(-t) |>
+#'   mutate(index = paste0("t", index)) |>
+#'   pivot_wider(names_from = index, values_from = y) |>
+#'   select(-data, -id, -group) |> 
+#'   as.matrix() |> 
+#'   t()
+#'  
+#' # Obtain veesa pipeline training data
+#' veesa_train <- 
+#'   prep_training_data(
+#'     f = shifted_peaks_train_matrix, 
+#'     time = shifted_peaks_times,
+#'     fpca_method = "jfpca"
+#'   )
+#'   
+#' # Obtain response variable values
+#' model_output <-
+#'   shifted_peaks_sub |> 
+#'   filter(data == "Training") |>
+#'   select(id, group) |> 
+#'   distinct()
+#'   
+#' # Prepare data for model
+#' model_data <- 
+#'   veesa_train$fpca_res$coef |> 
+#'   data.frame() |> 
+#'   mutate(group = factor(model_output$group))
+#'   
+#' # Train model
+#' set.seed(20210301)
+#' rf <-
+#'   randomForest(
+#'     formula = group ~ .,
+#'     data = model_data
+#'   )
+#'    
+#' # Compute feature importance values
+#' pfi <-
+#'   compute_pfi(
+#'     x = model_data |> select(-group),
+#'     y = model_data$group,
+#'     f = rf,
+#'     K = 5,
+#'     metric = "accuracy"
+#'  )
 
 compute_pfi <- function(x, y, f, K, metric, eps = 1e-15) {
 

diff --git a/R/plot_pc_directions.R b/R/plot_pc_directions.R
@@ -7,10 +7,10 @@
 #' @param fpca_method Character string specifying the type of elastic fPCA method to use ('jfpca', 'hfpca', or 'vfpca')
 #' @param times Optional vector of times (if not included, times will be represented on the interval from 0 to 1)
 #' @param digits Number of digits to print in the title for the proportion of variability explained by a PC
-#' @param alpha XXX
 #' @param nrow Number of rows to use when creating a grid of plots
-#' @param linesizes XXX
-#' @param linetype XXX
+#' @param alpha Vector of alpha values associated with lines in plot (length must match number of lines in plot)
+#' @param linesizes Vector of line widths associated with lines in plot (length must match number of lines in plot)
+#' @param linetype Vector of line types (e.g., "solid" or "dashed") associated with lines in plot (length must match number of lines in plot)
 #' @param freey Indicator for whether y-axis should be freed across facets
 #'  
 #' @export plot_pc_directions
@@ -23,9 +23,71 @@
 #' @importFrom stringr str_replace
 #' @importFrom tidyr pivot_longer
 #' 
-#' @return XXX
+#' @return ggplot2 plot of specified principal component directions
+#' 
+#' @examples
+#' # Load packages
+#' library(dplyr)
+#' library(tidyr)
+#' 
+#' # Select a subset of functions from shifted peaks data
+#' sub_ids <- 
+#'   shifted_peaks$data |>
+#'   select(data, group, id) |> 
+#'   distinct() |>
+#'   group_by(data, group) |> 
+#'   slice(1:5) |> 
+#'   ungroup()
+#'   
+#' # Create a smaller version of shifted data
+#' shifted_peaks_sub <-
+#'   shifted_peaks$data |>
+#'   filter(id %in% sub_ids$id)
+#'   
+#' # Extract times
+#' shifted_peaks_times = unique(shifted_peaks_sub$t)
+#' 
+#' # Convert training data to matrix
+#' shifted_peaks_train_matrix <-
+#'   shifted_peaks_sub |> 
+#'   filter(data == "Training") |>
+#'   select(-t) |>
+#'   mutate(index = paste0("t", index)) |>
+#'   pivot_wider(names_from = index, values_from = y) |>
+#'   select(-data, -id, -group) |> 
+#'   as.matrix() |> 
+#'   t()
+#'  
+#' # Obtain veesa pipeline training data
+#' veesa_train <- 
+#'   prep_training_data(
+#'     f = shifted_peaks_train_matrix, 
+#'     time = shifted_peaks_times,
+#'     fpca_method = "jfpca"
+#'   )
+#'
+#' # Plot principal directions of PC1
+#' plot_pc_directions(
+#'   fpcs = 1,
+#'   fdasrvf = veesa_train$fpca_res,
+#'   fpca_method = "jfpca",
+#'   times = -shifted_peaks_times, 
+#'   linesizes = rep(0.75,5),
+#'   alpha = 0.9
+#'  )
 
-plot_pc_directions <- function(fpcs, fdasrvf, fpca_method, times = NULL, digits = 0, alpha = 1, nrow = 1, linesizes = NULL, linetype = TRUE, freey = F) {
+plot_pc_directions <- function(
+    fpcs, 
+    fdasrvf, 
+    fpca_method, 
+    times = NULL, 
+    digits = 0, 
+    alpha = 1, 
+    nrow = 1, 
+    linesizes = NULL, 
+    linetype = TRUE,
+    freey = F
+  ) {
 
   # Compute prop var
   prop_var = (fdasrvf$latent)^2 / sum((fdasrvf$latent)^2)

diff --git a/R/prep_testing_data.R b/R/prep_testing_data.R
@@ -1,32 +1,101 @@
 #' Align test data and apply fPCA using elastic method applied to training data
 #'
-#' Applies steps 2 and 3 of the VEESA pipeline (alignment and elastic fPCA (jfpca, hfpca, or vfpca)) to 
-#'     the testing data based on the training data prepared using "prep_training_data".
+#' Applies steps 2 and 3 of the VEESA pipeline (alignment and elastic fPCA
+#'     (jfpca, hfpca, or vfpca)) to the testing data based on the training 
+#'     data prepared using "prep_training_data".
 #'     
-#' @param f Test data matrix (M x N) of N functions with M samples (already smoothed)
+#' @param f Matrix (size M x N) of test data with N functions and M samples.
 #' @param time Vector of size M describing the sample points
-#' @param train_prep Object returned from applying "prep_training_data" to training data
-#' @param optim_method Method used for optimization when computing the Karcher mean (DP, DP2, RBFGS, or DPo)
+#' @param train_prep Object returned from applying "prep_training_data" to 
+#'        training data.
+#' @param optim_method Method used for optimization when computing the Karcher 
+#'        mean. "DP", "DPo", and "RBFGS".
 #' 
 #' @export prep_testing_data
 #'
-#' @importFrom fdasrvf f_to_srvf gradient optimum.reparam time_warping warp_f_gamma
+#' @importFrom fdasrvf f_to_srvf gradient optimum.reparam time_warping 
+#'             warp_f_gamma
 #' @importFrom purrr map map2 pmap
 #' 
 #' @return List containing (varies slightly based on fpca method used): 
 #' \itemize{
 #'   \item time: vector of times when functions are observed (length of M)
-#'   \item f0: original test data functions - matrix (M x N) of N functions with M samples
+#'   \item f0: original test data functions - matrix (M x N) of N functions 
+#'         with M samples
 #'   \item fn: aligned test data functions - similar structure to f0
 #'   \item q0: original test data SRSFs - similar structure to f0
 #'   \item qn: aligned test data SRSFs - similar structure to f0
-#'   \item mqn: training data SRSF mean (test data functions are aligned to this function)
+#'   \item mqn: training data SRSF mean (test data functions are aligned to 
+#'         this function)
 #'   \item gam: test data warping functions - similar structure to f0
 #'   \item coef: test data principal component coefficients
-#'   \item psi: test data warping function SRVFs - similar structure to f0 (jfpca and hfpca only)
-#'   \item nu: test data shooting functions - similar structure to f0 (jfpca and hfpca only)
-#'   \item g: test data combination of aligned and shooting functions (jfpca only)
+#'   \item psi: test data warping function SRVFs - similar structure to f0 
+#'         (jfpca and hfpca only)
+#'   \item nu: test data shooting functions - similar structure to f0 (jfpca 
+#'         and hfpca only)
+#'   \item g: test data combination of aligned and shooting functions (jfpca 
+#'         only)
 #' }
+#' 
+#' @examples
+#' # Load packages
+#' library(dplyr)
+#' library(tidyr)
+#' 
+#' # Select a subset of functions from shifted peaks data
+#' sub_ids <- 
+#'   shifted_peaks$data |>
+#'   select(data, group, id) |> 
+#'   distinct() |>
+#'   group_by(data, group) |> 
+#'   slice(1:5) |> 
+#'   ungroup()
+#'   
+#' # Create a smaller version of shifted data
+#' shifted_peaks_sub <-
+#'   shifted_peaks$data |>
+#'   filter(id %in% sub_ids$id)
+#'   
+#' # Extract times
+#' shifted_peaks_times = unique(shifted_peaks_sub$t)
+#' 
+#' # Convert training data to matrix
+#' shifted_peaks_train_matrix <-
+#'   shifted_peaks_sub |> 
+#'   filter(data == "Training") |>
+#'   select(-t) |>
+#'   mutate(index = paste0("t", index)) |>
+#'   pivot_wider(names_from = index, values_from = y) |>
+#'   select(-data, -id, -group) |> 
+#'   as.matrix() |> 
+#'   t()
+#'  
+#' # Obtain veesa pipeline training data
+#' veesa_train <- 
+#'   prep_training_data(
+#'     f = shifted_peaks_train_matrix, 
+#'     time = shifted_peaks_times,
+#'     fpca_method = "jfpca"
+#'   )
+#'   
+#' # Convert testing data to matrix
+#' shifted_peaks_test_matrix <-
+#'   shifted_peaks_sub |> 
+#'   filter(data == "Testing") |>
+#'   select(-t) |>
+#'   mutate(index = paste0("t", index)) |>
+#'   pivot_wider(names_from = index, values_from = y) |>
+#'   select(-data, -id, -group) |> 
+#'   as.matrix() |> 
+#'   t()
+#'   
+#' # Obtain veesa pipeline testing data
+#' veesa_test <- prep_testing_data(
+#'   f = shifted_peaks_test_matrix,
+#'   time = shifted_peaks_times,
+#'   train_prep = veesa_train,
+#'   optim_method = "DP"
+#'  )
 
 prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {
 
@@ -82,7 +151,11 @@ prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {
   if (fpca_type %in% c("jfpca", "hfpca")) {
     # Compute SRSFs of test data warping functions:
     psi <-
-      purrr::map(.x = gamma, .f = fdasrvf::gradient, binsize = mean(diff(time))) %>%
+      purrr::map(
+        .x = gamma, 
+        .f = fdasrvf::gradient, 
+        binsize = mean(diff(time))
+      ) %>%
       purrr::map(.f = sqrt)
     # Compute test data shooting functions:
     if (fpca_type == "jfpca") {
@@ -96,7 +169,10 @@ prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {
   # 2. If applying jfpca or vfpca, obtain id value:
   if (fpca_type %in% c("jfpca", "vfpca")) {
     f_id = purrr::map(.x = fn, .f = fpca_train$id)
-    q_id = purrr::map(.x = f_id, .f = function(f_id) sign(f_id) * sqrt(abs(f_id)))
+    q_id = purrr::map(
+      .x = f_id, 
+      .f = function(f_id) sign(f_id) * sqrt(abs(f_id))
+    )
   }
 
   # 3. Compute the principal components for the test data: