Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 86 - added sample malahanobis distance fxn based on svd lowD & allow for diff input in get_design_tbl() #87

Merged
merged 3 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: romic
Type: Package
Title: R for High-Dimensional Omic Data
Version: 1.2.2
Version: 1.2.3
Authors@R: c(
person(
given = "Sean",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(add_pcs)
export(app_flow)
export(app_heatmap)
export(app_pcs)
export(calculate_sample_mahalanobis_distances)
export(center_tomic)
export(check_design)
export(check_tomic)
Expand Down
16 changes: 13 additions & 3 deletions R/design.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,25 @@
#'
#' Get a tabular summary of all variables.
#'
#' @inheritParams tomic_to
#' @param tomic_or_design Either a \code{tomic} object or its embedded design list
#'
#' @returns a tibble reflecting the \code{tomic} object's design.
#'
#' @examples
#' get_design_tbl(brauer_2008_triple)
#' get_design_tbl(brauer_2008_triple$design)
#'
#' @export
get_design_tbl <- function(tomic) {
tomic$design[c("features", "samples", "measurements")] %>%
get_design_tbl <- function(tomic_or_design) {

if (inherits(tomic_or_design, "tomic")) {
design <- tomic_or_design$design
} else {
check_design(tomic_or_design)
design <- tomic_or_design
}

design[c("features", "samples", "measurements")] %>%
{
purrr::map2(unname(.), names(.), function(x, y) {
x %>%
Expand Down
74 changes: 74 additions & 0 deletions R/dim_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,77 @@ find_triple_omic_missing_values <- function(

return(output)
}

#' Calculate Sample Mahalanobis Distances
#'
#' Determine each samples distance from the center of the data using Mahalanobis distance.
#'
#' @inheritParams tomic_to
#' @param value_var the measurement variable to use for calculating distances
#' @param max_pcs the maximum number of principal components to used for
#' representing the covariance matrix.
#' @param scale if TRUE then the data will be scaled before calculating distances
#'
#' @returns The samples tibble with a new column `pc_distance` which contains the
#' Mahalanobis distances of individual samples from the PC elipsoid
#'
#' @details
#' Since `romic` is built around using tall data where there are more features than
#' samples calculating Mahalanobis distance off of the covariance matrix is not
#' possible. Instead, we use SVD to create a low-dimensional representation of the
#' covariance matrix and calculate distances from the center of the data in this
#' space. This essentially involves weighting the principal components by their
#' loadings.
#'
#' @examples
#' calculate_sample_mahalanobis_distances(brauer_2008_tidy)
#' @export
calculate_sample_mahalanobis_distances <- function (
tomic,
value_var = NULL,
max_pcs = 10,
scale = FALSE
) {

checkmate::assertClass(tomic, "tomic")
value_var <- value_var_handler(value_var, tomic$design)
checkmate::assertIntegerish(max_pcs, len = 1)

n_features <- nrow(romic::get_tomic_table(tomic, "features"))
n_samples <- nrow(romic::get_tomic_table(tomic, "samples"))
npcs <- pmin(max_pcs, n_features, n_samples)

X_std <- tomic %>%
# remove features with missing values
romic::remove_missing_values(value_var, "drop_features", verbose = FALSE) %>%
# convert to a matrix
romic::tomic_to_matrix(value_var) %>%
{scale(t(.), scale = scale) %>% t()}

# if X is constant for a feature (such all reads are 0) then
# scaling will put it to NaN. We need to remove these features
valid_X_std <- X_std[rowSums(is.nan(X_std)) == 0,]

# perform a singular value decomposition
svd_results <- svd(valid_X_std, nv = npcs)

# re-weight principal components by eigenvalues
X_std_v <- svd_results$v * matrix(svd_results$d[1:npcs], ncol = npcs, nrow = ncol(valid_X_std), byrow = TRUE)
rownames(X_std_v) <- colnames(valid_X_std)

pc_distances_by_sample <- tibble::tibble(
!!rlang::sym(tomic$design$sample_pk) := rownames(X_std_v),
pc_distance = rowSums(X_std_v^2)
) %>%
dplyr::arrange(dplyr::desc(pc_distance))

# validate calculation
varex_leading_components <- sum(svd_results$d^2) * sum((svd_results$d^2 / sum(svd_results$d^2))[1:npcs])

stopifnot((sum(pc_distances_by_sample$pc_distance) - varex_leading_components)/varex_leading_components < 0.01)

samples_w_pc_distances <- romic::get_tomic_table(tomic, "samples") %>%
dplyr::left_join(pc_distances_by_sample, by = tomic$design$sample_pk)

return(samples_w_pc_distances)
}
8 changes: 5 additions & 3 deletions R/mutates.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ center <- function(x) {
#' select(-DR)
#' new_variable_tables <- c("new_sample_var" = "samples")
#' @export
update_tidy_omic <- function(tidy_omic,
updated_tidy_data,
new_variable_tables = c()) {
update_tidy_omic <- function(
tidy_omic,
updated_tidy_data,
new_variable_tables = c()
) {
checkmate::assertClass(tidy_omic, "tomic")
checkmate::assertClass(tidy_omic, "tidy_omic")
checkmate::assertDataFrame(updated_tidy_data)
Expand Down
1 change: 1 addition & 0 deletions R/romic-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ utils::globalVariables(c(
"sample_label",
"ordered_sampleId",
"orderedId",
"pc_distance",
"valid_tables",
"NAME",
"name",
Expand Down
41 changes: 41 additions & 0 deletions man/calculate_sample_mahalanobis_distances.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/get_design_tbl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading