From ba5ecd7b377fe7d1c5d15f89a466a8c415c59d3b Mon Sep 17 00:00:00 2001 From: albert-ying <59846322+albert-ying@users.noreply.github.com> Date: Sun, 25 Jun 2023 01:33:19 -0400 Subject: [PATCH] add option that allows clump locally --- R/calculate_SP.R | 4 ++-- R/gettingSP_ldscMR.R | 52 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/R/calculate_SP.R b/R/calculate_SP.R index 84c7f59..ad56513 100644 --- a/R/calculate_SP.R +++ b/R/calculate_SP.R @@ -28,7 +28,7 @@ #' #' @examples calculate_SP <- function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveRFiles=TRUE,hm3=NA,ld=NA,nStep=2, - SP_single=3,SP_pair=50,SNP_filter=10,SNP_filter_ldsc=NA,nCores=1,M=1e7){ + SP_single=3,SP_pair=50,SNP_filter=10,SNP_filter_ldsc=NA,nCores=1,M=1e7, b_file=NULL, plink_path=NULL){ if(nStep>2 || nStep<1){ cat(print("Please choose 1 or 2 for the number of analysis steps\n")) @@ -54,7 +54,7 @@ calculate_SP <- function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveRFil OUT = trait.names[2] # Get estimates of starting points from LDSC and standard MR method (IVW) - SP = gettingSP_ldscMR(input.df,trait.names,run_ldsc,run_MR,saveRFiles,SNP_filter_ldsc,hm3,ld) + SP = gettingSP_ldscMR(input.df,trait.names,run_ldsc,run_MR,saveRFiles,SNP_filter_ldsc,hm3,ld, b_file = b_file, plink_path = plink_path) i_XY = as.numeric(SP[[1]]) axy_MR = as.numeric(SP[[2]]) ayx_MR = as.numeric(SP[[3]]) diff --git a/R/gettingSP_ldscMR.R b/R/gettingSP_ldscMR.R index 59e3078..fad3e9d 100644 --- a/R/gettingSP_ldscMR.R +++ b/R/gettingSP_ldscMR.R @@ -14,7 +14,44 @@ #' @return #' @keywords internal # NOT EXPORTED @export -gettingSP_ldscMR = function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveRFiles=TRUE,SNP_filter_ldsc,hm3,ld){ +ld_clump_local <- function(dat, clump_kb = 10000, clump_r2 = 0.001, clump_p1 = 1, clump_p2 = 1, pop = "EUR", b_file, plink_path) { + pval_column <- "pval.exposure" + if (!is.data.frame(dat)) { + stop("Expecting data frame returned from format_data") + } + if ("pval.exposure" %in% names(dat) & "pval.outcome" %in% + names(dat)) { + message("pval.exposure and pval.outcome columns present. Using pval.exposure for clumping.") + } else if (!"pval.exposure" %in% names(dat) & "pval.outcome" %in% + names(dat)) { + message("pval.exposure column not present, using pval.outcome column for clumping.") + pval_column <- "pval.outcome" + } else if (!"pval.exposure" %in% names(dat)) { + message("pval.exposure not present, setting clumping p-value to 0.99 for all variants") + dat$pval.exposure <- 0.99 + } else { + pval_column <- "pval.exposure" + } + if (!"id.exposure" %in% names(dat)) { + dat$id.exposure <- random_string(1) + } + d <- data.frame( + rsid = dat$SNP, pval = dat[[pval_column]], + id = dat$id.exposure + ) + out <- ieugwasr::ld_clump(d, + clump_kb = clump_kb, clump_r2 = clump_r2, + clump_p = clump_p1, pop = pop, + bfile = b_file, plink_bin = plink_path + ) + keep <- paste(dat$SNP, dat$id.exposure) %in% paste( + out$rsid, + out$id + ) + return(dat[keep, ]) +} + +gettingSP_ldscMR = function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveRFiles=TRUE,SNP_filter_ldsc,hm3,ld,plink_path=NULL,b_file=NULL){ EXP = trait.names[1] OUT = trait.names[2] @@ -146,7 +183,12 @@ gettingSP_ldscMR = function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveR exp_data = c() for (x in 1:length(clump_bin)) { temp = exp_dat[exp_dat$SNP %in% clump_bin[[x]]$SNP,] - temp1 = TwoSampleMR::clump_data(temp) + if (is.null(plink_path)) { + temp1 = TwoSampleMR::clump_data(temp) + } else { + temp1 = ld_clump_local(temp, plink_path=plink_path,b_file = b_file) + + } exp_data=rbind(exp_data,temp1) } @@ -237,7 +279,11 @@ gettingSP_ldscMR = function(input.df,trait.names,run_ldsc=TRUE,run_MR=TRUE,saveR exp_data = c() for (x in 1:length(clump_bin)) { temp = exp_dat[exp_dat$SNP %in% clump_bin[[x]]$SNP,] - temp1 = TwoSampleMR::clump_data(temp) + if (is.null(plink_path)) { + temp1 = TwoSampleMR::clump_data(temp) + } else { + temp1 = ld_clump_local(temp, plink_path=plink_path,b_file = b_file) + } exp_data=rbind(exp_data,temp1) }