Skip to content

Commit

Permalink
updated function inputs to better match fdasrvf + some clean up of do…
Browse files Browse the repository at this point in the history
…cumentation + adding badges
  • Loading branch information
goodekat committed Jan 11, 2025
1 parent 147874c commit feab578
Show file tree
Hide file tree
Showing 37 changed files with 723 additions and 89 deletions.
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
51 changes: 51 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:

name: R-CMD-check.yaml

permissions: read-all

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: veesa
Type: Package
Title: VEESA Pipeline for Explainable Machine Learning with Functional Data
Version: 0.1.2
Version: 0.1.3
Author: Katherine Goode, J. Derek Tucker
Maintainer: Katherine Goode <[email protected]>
Description: Contains functions for applying the VEESA pipeline in R.
Expand All @@ -18,3 +18,5 @@ Imports:
stats,
stringr,
tidyr
Suggests:
randomForest
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# veesa 0.1.3

- Updated inputs in prep_training_data to match fdasrvf
- Added examples to documentation
- Cleaned up wording in documentation a bit

# veesa 0.1.2

- Added shifted peaks data
Expand All @@ -10,4 +16,4 @@

# veesa 0.1.0

Initial version of package
- Initial version of package
79 changes: 78 additions & 1 deletion R/compute_pfi.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,84 @@
#' @importFrom purrr map2
#' @importFrom stats setNames
#'
#' @return xxx
#' @return List containing
#' \itemize{
#' \item \code{pfi}: Vector of PFI values (averaged over replicates)
#' \item \code{pfi_single_reps}: Matrix of containing the feature importance values from each replicate (rows associated with reps; columns associated with data observations)
#' }
#'
#' @examples
#' # Load packages
#' library(dplyr)
#' library(tidyr)
#' library(randomForest)
#'
#' # Select a subset of functions from shifted peaks data
#' sub_ids <-
#' shifted_peaks$data |>
#' select(data, group, id) |>
#' distinct() |>
#' group_by(data, group) |>
#' slice(1:5) |>
#' ungroup()
#'
#' # Create a smaller version of shifted data
#' shifted_peaks_sub <-
#' shifted_peaks$data |>
#' filter(id %in% sub_ids$id)
#'
#' # Extract times
#' shifted_peaks_times = unique(shifted_peaks_sub$t)
#'
#' # Convert training data to matrix
#' shifted_peaks_train_matrix <-
#' shifted_peaks_sub |>
#' filter(data == "Training") |>
#' select(-t) |>
#' mutate(index = paste0("t", index)) |>
#' pivot_wider(names_from = index, values_from = y) |>
#' select(-data, -id, -group) |>
#' as.matrix() |>
#' t()
#'
#' # Obtain veesa pipeline training data
#' veesa_train <-
#' prep_training_data(
#' f = shifted_peaks_train_matrix,
#' time = shifted_peaks_times,
#' fpca_method = "jfpca"
#' )
#'
#' # Obtain response variable values
#' model_output <-
#' shifted_peaks_sub |>
#' filter(data == "Training") |>
#' select(id, group) |>
#' distinct()
#'
#' # Prepare data for model
#' model_data <-
#' veesa_train$fpca_res$coef |>
#' data.frame() |>
#' mutate(group = factor(model_output$group))
#'
#' # Train model
#' set.seed(20210301)
#' rf <-
#' randomForest(
#' formula = group ~ .,
#' data = model_data
#' )
#'
#' # Compute feature importance values
#' pfi <-
#' compute_pfi(
#' x = model_data |> select(-group),
#' y = model_data$group,
#' f = rf,
#' K = 5,
#' metric = "accuracy"
#' )

compute_pfi <- function(x, y, f, K, metric, eps = 1e-15) {

Expand Down
72 changes: 67 additions & 5 deletions R/plot_pc_directions.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
#' @param fpca_method Character string specifying the type of elastic fPCA method to use ('jfpca', 'hfpca', or 'vfpca')
#' @param times Optional vector of times (if not included, times will be represented on the interval from 0 to 1)
#' @param digits Number of digits to print in the title for the proportion of variability explained by a PC
#' @param alpha XXX
#' @param nrow Number of rows to use when creating a grid of plots
#' @param linesizes XXX
#' @param linetype XXX
#' @param alpha Vector of alpha values associated with lines in plot (length must match number of lines in plot)
#' @param linesizes Vector of line widths associated with lines in plot (length must match number of lines in plot)
#' @param linetype Vector of line types (e.g., "solid" or "dashed") associated with lines in plot (length must match number of lines in plot)
#' @param freey Indicator for whether y-axis should be freed across facets
#'
#' @export plot_pc_directions
Expand All @@ -23,9 +23,71 @@
#' @importFrom stringr str_replace
#' @importFrom tidyr pivot_longer
#'
#' @return XXX
#' @return ggplot2 plot of specified principal component directions
#'
#' @examples
#' # Load packages
#' library(dplyr)
#' library(tidyr)
#'
#' # Select a subset of functions from shifted peaks data
#' sub_ids <-
#' shifted_peaks$data |>
#' select(data, group, id) |>
#' distinct() |>
#' group_by(data, group) |>
#' slice(1:5) |>
#' ungroup()
#'
#' # Create a smaller version of shifted data
#' shifted_peaks_sub <-
#' shifted_peaks$data |>
#' filter(id %in% sub_ids$id)
#'
#' # Extract times
#' shifted_peaks_times = unique(shifted_peaks_sub$t)
#'
#' # Convert training data to matrix
#' shifted_peaks_train_matrix <-
#' shifted_peaks_sub |>
#' filter(data == "Training") |>
#' select(-t) |>
#' mutate(index = paste0("t", index)) |>
#' pivot_wider(names_from = index, values_from = y) |>
#' select(-data, -id, -group) |>
#' as.matrix() |>
#' t()
#'
#' # Obtain veesa pipeline training data
#' veesa_train <-
#' prep_training_data(
#' f = shifted_peaks_train_matrix,
#' time = shifted_peaks_times,
#' fpca_method = "jfpca"
#' )
#'
#' # Plot principal directions of PC1
#' plot_pc_directions(
#' fpcs = 1,
#' fdasrvf = veesa_train$fpca_res,
#' fpca_method = "jfpca",
#' times = -shifted_peaks_times,
#' linesizes = rep(0.75,5),
#' alpha = 0.9
#' )

plot_pc_directions <- function(fpcs, fdasrvf, fpca_method, times = NULL, digits = 0, alpha = 1, nrow = 1, linesizes = NULL, linetype = TRUE, freey = F) {
plot_pc_directions <- function(
fpcs,
fdasrvf,
fpca_method,
times = NULL,
digits = 0,
alpha = 1,
nrow = 1,
linesizes = NULL,
linetype = TRUE,
freey = F
) {

# Compute prop var
prop_var = (fdasrvf$latent)^2 / sum((fdasrvf$latent)^2)
Expand Down
102 changes: 89 additions & 13 deletions R/prep_testing_data.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,101 @@
#' Align test data and apply fPCA using elastic method applied to training data
#'
#' Applies steps 2 and 3 of the VEESA pipeline (alignment and elastic fPCA (jfpca, hfpca, or vfpca)) to
#' the testing data based on the training data prepared using "prep_training_data".
#' Applies steps 2 and 3 of the VEESA pipeline (alignment and elastic fPCA
#' (jfpca, hfpca, or vfpca)) to the testing data based on the training
#' data prepared using "prep_training_data".
#'
#' @param f Test data matrix (M x N) of N functions with M samples (already smoothed)
#' @param f Matrix (size M x N) of test data with N functions and M samples.
#' @param time Vector of size M describing the sample points
#' @param train_prep Object returned from applying "prep_training_data" to training data
#' @param optim_method Method used for optimization when computing the Karcher mean (DP, DP2, RBFGS, or DPo)
#' @param train_prep Object returned from applying "prep_training_data" to
#' training data.
#' @param optim_method Method used for optimization when computing the Karcher
#' mean. "DP", "DPo", and "RBFGS".
#'
#' @export prep_testing_data
#'
#' @importFrom fdasrvf f_to_srvf gradient optimum.reparam time_warping warp_f_gamma
#' @importFrom fdasrvf f_to_srvf gradient optimum.reparam time_warping
#' warp_f_gamma
#' @importFrom purrr map map2 pmap
#'
#' @return List containing (varies slightly based on fpca method used):
#' \itemize{
#' \item time: vector of times when functions are observed (length of M)
#' \item f0: original test data functions - matrix (M x N) of N functions with M samples
#' \item f0: original test data functions - matrix (M x N) of N functions
#' with M samples
#' \item fn: aligned test data functions - similar structure to f0
#' \item q0: original test data SRSFs - similar structure to f0
#' \item qn: aligned test data SRSFs - similar structure to f0
#' \item mqn: training data SRSF mean (test data functions are aligned to this function)
#' \item mqn: training data SRSF mean (test data functions are aligned to
#' this function)
#' \item gam: test data warping functions - similar structure to f0
#' \item coef: test data principal component coefficients
#' \item psi: test data warping function SRVFs - similar structure to f0 (jfpca and hfpca only)
#' \item nu: test data shooting functions - similar structure to f0 (jfpca and hfpca only)
#' \item g: test data combination of aligned and shooting functions (jfpca only)
#' \item psi: test data warping function SRVFs - similar structure to f0
#' (jfpca and hfpca only)
#' \item nu: test data shooting functions - similar structure to f0 (jfpca
#' and hfpca only)
#' \item g: test data combination of aligned and shooting functions (jfpca
#' only)
#' }
#'
#' @examples
#' # Load packages
#' library(dplyr)
#' library(tidyr)
#'
#' # Select a subset of functions from shifted peaks data
#' sub_ids <-
#' shifted_peaks$data |>
#' select(data, group, id) |>
#' distinct() |>
#' group_by(data, group) |>
#' slice(1:5) |>
#' ungroup()
#'
#' # Create a smaller version of shifted data
#' shifted_peaks_sub <-
#' shifted_peaks$data |>
#' filter(id %in% sub_ids$id)
#'
#' # Extract times
#' shifted_peaks_times = unique(shifted_peaks_sub$t)
#'
#' # Convert training data to matrix
#' shifted_peaks_train_matrix <-
#' shifted_peaks_sub |>
#' filter(data == "Training") |>
#' select(-t) |>
#' mutate(index = paste0("t", index)) |>
#' pivot_wider(names_from = index, values_from = y) |>
#' select(-data, -id, -group) |>
#' as.matrix() |>
#' t()
#'
#' # Obtain veesa pipeline training data
#' veesa_train <-
#' prep_training_data(
#' f = shifted_peaks_train_matrix,
#' time = shifted_peaks_times,
#' fpca_method = "jfpca"
#' )
#'
#' # Convert testing data to matrix
#' shifted_peaks_test_matrix <-
#' shifted_peaks_sub |>
#' filter(data == "Testing") |>
#' select(-t) |>
#' mutate(index = paste0("t", index)) |>
#' pivot_wider(names_from = index, values_from = y) |>
#' select(-data, -id, -group) |>
#' as.matrix() |>
#' t()
#'
#' # Obtain veesa pipeline testing data
#' veesa_test <- prep_testing_data(
#' f = shifted_peaks_test_matrix,
#' time = shifted_peaks_times,
#' train_prep = veesa_train,
#' optim_method = "DP"
#' )

prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {

Expand Down Expand Up @@ -82,7 +151,11 @@ prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {
if (fpca_type %in% c("jfpca", "hfpca")) {
# Compute SRSFs of test data warping functions:
psi <-
purrr::map(.x = gamma, .f = fdasrvf::gradient, binsize = mean(diff(time))) %>%
purrr::map(
.x = gamma,
.f = fdasrvf::gradient,
binsize = mean(diff(time))
) %>%
purrr::map(.f = sqrt)
# Compute test data shooting functions:
if (fpca_type == "jfpca") {
Expand All @@ -96,7 +169,10 @@ prep_testing_data <- function(f, time, train_prep, optim_method = "DP") {
# 2. If applying jfpca or vfpca, obtain id value:
if (fpca_type %in% c("jfpca", "vfpca")) {
f_id = purrr::map(.x = fn, .f = fpca_train$id)
q_id = purrr::map(.x = f_id, .f = function(f_id) sign(f_id) * sqrt(abs(f_id)))
q_id = purrr::map(
.x = f_id,
.f = function(f_id) sign(f_id) * sqrt(abs(f_id))
)
}

# 3. Compute the principal components for the test data:
Expand Down
Loading

0 comments on commit feab578

Please sign in to comment.