diff --git a/.gitignore b/.gitignore
index 583cfde..56ccab1 100755
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@ slurm*
 Rcode/.Rproj.user
 Rcode/markdowns/*
 Rcode/*.html
+Rcode/.Rhistory
+Rcode/Rprofile
 !Rcode/markdowns/template.Rmd
 experiments/.snakemake/*
 .Rapp.history
diff --git a/Rcode/.Rhistory b/Rcode/.Rhistory
index a56c7aa..f9265e5 100644
--- a/Rcode/.Rhistory
+++ b/Rcode/.Rhistory
@@ -1,512 +1,512 @@
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-ab_line(intercept = coef(fit[1]), slope = coef(fit[2]))+
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit[1]), slope = coef(fit[2]))+
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-?geom_abline
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])+
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-geom_line(data = data.frame(x = X), aes(x = log(x), y = (coef(fit)[1] + coef(fit)[2] * log(x))-log(x) )) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-#  geom_line(data = data.frame(x = X), aes(x = log(x), y = (coef(fit)[1] + coef(fit)[2] * log(x))-log(x) )) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-geom_line(data = data.frame(x = X), aes(x = log(x), y = (coef(fit)[1] + coef(fit)[2] * log(x))-log(x) )) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-#  geom_line(data = data.frame(x = X), aes(x = log(x), y = (coef(fit)[1] + coef(fit)[2] * log(x))-log(x) )) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = mean(prop_av), response = inv.logit(empirical_link(fraction_of_clusters, family = binomial(link = 'log')))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(q)', y = 'log(p)') +
-geom_line(data = data.frame(x = X), aes(x = x, y = inv.logit(coef(fit)[1] + coef(fit)[2] * logit(x))))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = mean(prop_av), response = inv.logit(empirical_link(fraction_of_clusters, family = binomial(link = 'log')))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(q)', y = 'log(p)') +
-geom_line(data = data.frame(x = X), aes(x = x, y = exp(coef(fit)[1] + coef(fit)[2] * log(x))))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_smooth(method = glm, se = TRUE, family = binomial(link = 'log'))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = mean(prop_av), response = inv.logit(empirical_link(fraction_of_clusters, family = binomial(link = 'log')))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(q)', y = 'log(p)') +
-geom_line(data = data.frame(x = X), aes(x = x, y = exp(coef(fit)[1] + coef(fit)[2] * log(x))))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = mean(prop_av), response = inv.logit(empirical_link(fraction_of_clusters, family = binomial(link = 'log'))))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = mean(prop_av), response = exp(empirical_link(fraction_of_clusters, family = binomial(link = 'log')))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'tumorVAF', y = 'log(p)') +
-geom_line(data = data.frame(x = X), aes(x = x, y = exp(coef(fit)[1] + coef(fit)[2] * log(x))))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_smooth(method = glm, se = TRUE, family = binomial(link = 'log'))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_smooth(method = glm, se = TRUE, family = binomial(link = 'log'))
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_smooth(method = glm, se = TRUE, family = binomial(link = 'log'))
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1)
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_smooth(method = glm, se = TRUE, family = binomial(link = 'log')) +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-library(tidyverse)
-setwd('~/Documents/projects/CTC_backup/validation_experiment/')
-data <- readRDS('combi_df.rds')
-View(data)
-data <- data %>% mutate(counts/freq, complexity = as.numeric(complexity))
-summary(data)
-data2 <- data %>% group_by(observations) %>% slice(rep(1:n(), counts/freq)) %>% mutate(present = row_number() <= first(counts)) %>%
-ungroup()
-fit <- glm(present ~ log(prop_av), data = data2, family = binomial(link = 'log'))
-summary(fit)
-confint(fit)
-coef(fit)
-beta1 <- coef(fit)[2]
-beta0 <- coef(fit)[1]
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-summary(fit)
-??bin_by_quantile
-library(regressinator)
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)') +
-geom_smooth(method = 'glm',method.args = list(family = binomial(link = 'log')))
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)') +
-geom_smooth(method = 'glm',method.args = list(family = binomial(link = 'log'))) +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log')) - log(mean(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av)))
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) %>%
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, family = binomial(link = 'log'), color = 'red')
-data %>%
-ggplot(aes(y = freq, x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, family = binomial(link = 'log'), color = 'red')
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, family = binomial(link = 'log'), color = 'red')
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, family = binomial(link = 'log'), color = 'red')
-?geom_smooth
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, method.args = list(family = binomial(link = 'log')), color = 'red')
-data %>%
-ggplot(aes(y = freq, x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, method.args = list(family = binomial(link = 'log')), color = 'red')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]) +
-geom_smooth(method = 'glm', se = TRUE, method.args = list(family = binomial(link = 'log')), color = 'red')
-data %>%
-ggplot(aes(y = freq, x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data2 %>% group_by(prop_av) %>% dplyr::summarize(fraction_of_clusters = sum(present)/n()) %>%
-bin_by_quantile(prop_av, breaks = 100) %>%
-summarize(x = log(mean(prop_av)), response = empirical_link(fraction_of_clusters, family = binomial(link = 'log'))) %>%
-ggplot(aes(x = x, y = response)) + geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-library(tidyverse)
-setwd('~/Documents/projects/CTC_backup/validation_experiment/')
-data <- readRDS('combi_df.rds')
-View(data)
-data <- data %>% mutate(counts/freq, complexity = as.numeric(complexity))
-summary(data)
-data2 <- data %>% group_by(observations) %>% slice(rep(1:n(), counts/freq)) %>% mutate(present = row_number() <= first(counts)) %>%
-ungroup()
-fit <- glm(present ~ log(prop_av), data = data2, family = binomial(link = 'log'))
-summary(fit)
-confint(fit)
-coef(fit)
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-fit <- glm(present ~ log(prop_av), data = data2, family = binomial(link = 'log'))
-fit
-summary(fit)
-confint(fit)
-summary(fit)
-summary(fit)
-(coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error'])
-coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error'])
-coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-(coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-critical_value <- qt(0.975, df)
-df <- nrow(data)-ncol(model.matrix(fit))
-df
-df <- nrow(data2)-ncol(model.matrix(fit))
-df
-summary(fit)
-critical_value <- qt(0.975, df)
-critical_value
-t-value <- (coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-t-value
-t-value <- (coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-t.value <- (coef(fit)[1]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-df <- nrow(data2)-ncol(model.matrix(fit))
-p.value <- 2* pt(0.975, df)
-p.value <- 2* 1-pt(abs(t.value), df)
-p.value
-t.value <- (coef(fit)[2]-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-coef(fit)
-t.value <- (coef(fit)['log(prop_av)']-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-t.value
-coef(fit)['log(prop_av)']
-t.value <- (coef(fit)['log(prop_av)']-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-df <- nrow(data2)-ncol(model.matrix(fit))
-p.value <- 2* 1-pt(abs(t.value), df)
-p.value
-summary(fit)
-df
-p.value <- 2* (1-pt(abs(t.value), df))
-p.value
-p.value <- 2* (1-pt(abs(t.value), df))
-pt(abs(t.value), df)
-p.value <- 2* (1-pt(abs(t.value), df))
-fit <- glm(present ~ log(prop_av), data = data2, offset = 1,family = binomial(link = 'log'))
-t.value <- (coef(fit)['log(prop_av)']-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-t.value
-p.value <- 2* (1-pt(abs(t.value), df))
-p.value
-t.value
-df
-coef(fit)
-coef(fit)['log(prop_av)']
-coef(fit)['log(prop_av)']-1)
-summary(fit)$coefficients['log(prop_av)','Std. Error']
-?confint
-confint(fit, level = 0.99999999)
-confint(fit, level = 0.999999999999)
-?glm
-fit <- glm(present ~ log(prop_av), offset = 1,data = data2, family = binomial(link = 'log'))
-fit <- glm(present ~ log(prop_av), data = data2, family = binomial(link = 'log'))
-t.value <- (coef(fit)['log(prop_av)']-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-df <- nrow(data2)-ncol(model.matrix(fit))
-p.value <- 2* (1-pt(abs(t.value), df))
-p.value
-summary(fit)
-p.value <- 2.2e-16
-p.value <- 2* (1-pt(abs(t.value), df))
-print(p.value)
-p.value <- 2.2e-16
-t
-t
-t
-t
-library(tidyverse)
-setwd('~/Documents/projects/CTC_backup/validation_experiment/')
-data <- readRDS('combi_df.rds')
-View(data)
-data <- data %>% mutate(counts/freq, complexity = as.numeric(complexity))
-summary(data)
-data2 <- data %>% group_by(observations) %>% slice(rep(1:n(), counts/freq)) %>% mutate(present = row_number() <= first(counts)) %>%
-ungroup()
-fit <- glm(present ~ log(prop_av), data = data2, family = binomial(link = 'log'))
-summary(fit)
-confint(fit)
-coef(fit)
-data %>%
-ggplot(aes(y = log(freq), x = log(prop_av))) +
-geom_point() +
-labs(x = 'log(tumorVAF)', y = 'log(fraction among CTC clusters)') +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2])
-data %>%
-ggplot(aes(y = log(ratio), x = log(prop_av))) +
-geom_point() +
-geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2]-1) +
-labs(x = 'log(tumorVAF)', y = 'log(fold-change in prevalence)')
-summary(fit)
-t.value <- (coef(fit)['log(prop_av)']-1)/summary(fit)$coefficients['log(prop_av)','Std. Error']
-df <- nrow(data2)-ncol(model.matrix(fit))
-p.value <- 2* (1-pt(abs(t.value), df))
-print(p.value)
+},
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("*", "**", "***", "-"), type = "points", labels = c("< 0.05", "<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+columns_annotation <- c()
+for(value in p_values_columns){
+if(value < 0.001){
+columns_annotation <- c(columns_annotation, "***")
+}
+else if(value < 0.01){
+columns_annotation <- c(columns_annotation, "**")
+}
+else if(value < 0.05){
+columns_annotation <- c(columns_annotation, "*")
+}
+else {
+columns_annotation <- c(columns_annotation, "")
+}
+}
+row_annotation <- c()
+for(value in p_values_rows){
+if(value < 0.001){
+row_annotation <- c(row_annotation, "***")
+}
+else if(value < 0.01){
+row_annotation <- c(row_annotation, "**")
+}
+else if(value < 0.05){
+row_annotation <- c(row_annotation, "*")
+}
+else {
+row_annotation <- c(row_annotation, "")
+}
+}
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE),
+annotation_label = c("Sample"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y)
+}
+},
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE),
+annotation_label = c("Cluster size"))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Sample"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y)
+}
+},
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Cluster size"))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Sample"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y)
+}
+},
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+},
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+},
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+columns_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+columns_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+?Heatmap
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 3),
+columns_names_gp = gpar(fontsize = 3),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 1),
+columns_names_gp = gpar(fontsize = 1),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 1),
+columns_names_gp = gpar(fontsize = 1),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 5),
+columns_names_gp = gpar(fontsize = 5),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+?Heatmap
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 5),
+column_names_gp = gpar(fontsize = 5),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+column_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Cluster size", gpar(fontsize = 5)))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Sample"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+column_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Cluster size", gp = gpar(fontsize = 5)))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Sample"))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+column_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Cluster size"))
+ha_col <- HeatmapAnnotation(foo = anno_text(x = columns_annotation, location = 1, rot = 45, show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Cluster size"), annotation_name_gp= gpar(fontsize = 10))
+ha_row = rowAnnotation(foo = anno_text(x = row_annotation, which = "row", show_name= TRUE, gp = gpar(fontsize = 8)),
+annotation_label = c("Sample"), annotation_name_gp= gpar(fontsize = 10))
+ht <- Heatmap(frequency_table, cell_fun = function(j, i, x, y, w, h, fill) {
+if(!is.na(p_values_table[i, j])){
+if(p_values_table[i, j] < 0.001) {
+grid.text("***", x, y, gp = gpar(fontsize = 8) )
+} else if(p_values_table[i, j] < 0.01) {
+grid.text("**", x, y, gp = gpar(fontsize = 8))
+}
+else if (p_values_table[i, j] < 0.05) {
+grid.text("*", x, y, gp = gpar(fontsize = 8))
+}
+}
+else if(is.na(p_values_table[i, j])) {
+grid.text("-", x, y, gp = gpar(fontsize = 8))
+}
+},
+row_names_gp = gpar(fontsize = 8),
+column_names_gp = gpar(fontsize = 8),
+cluster_rows = FALSE,
+cluster_columns = FALSE,
+col = col_fun,
+heatmap_legend_param = list(title = "% mono",
+at = c(0, 0.5, 1), labels = c("0", "50", "100")),
+top_annotation = ha_col,
+left_annotation = ha_row
+)
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+?Legend
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"), labels_gp = gpar(fontsize = 10))
+draw(ht, annotation_legend_list = list(lgd_sig))
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"), labels_gp = gpar(fontsize = 8))
+draw(ht, annotation_legend_list = list(lgd_sig))
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"))
+draw(ht, annotation_legend_list = list(lgd_sig))
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"), legend_gp = gpar(fontsize = 10))
+draw(ht, annotation_legend_list = list(lgd_sig))
+lgd_sig = Legend(pch = c("**", "***", "-"), type = "points", labels = c("<0.01", "<0.001", "NA"), legend_gp = gpar(fontsize = 8))
+draw(ht, annotation_legend_list = list(lgd_sig))
+all_pvalues
+fisher(unlist(all_pvalues))
+?ComplexHeatmap
+?Heatmap
+?Legend
diff --git a/Rcode/.Rprofile b/Rcode/.Rprofile
deleted file mode 100644
index 81b960f..0000000
--- a/Rcode/.Rprofile
+++ /dev/null
@@ -1 +0,0 @@
-source("renv/activate.R")
diff --git a/Rcode/.Rproj.user/shared/notebooks/paths b/Rcode/.Rproj.user/shared/notebooks/paths
index 51b1f34..49ded41 100644
--- a/Rcode/.Rproj.user/shared/notebooks/paths
+++ b/Rcode/.Rproj.user/shared/notebooks/paths
@@ -1,2 +1,12 @@
+/Users/jgawron/Desktop/get_validation_p_values.R="0D6D114C"
 /Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/Rcode/renv/activate.R="42F1FF6D"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/Rcode/simulateCTCclusters.R="38C0ECFF"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/Rcode/statistical_test_source.R="1386B5D8"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/Rcode/validation_cluster_data.R="B0DE1791"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/Rcode/validation_statistical_test.R="CE4D8CE4"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/experiments/data/markdowns/Br11.Rmd="2C66048E"
+/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/experiments/workflow/resources/template.Rmd="3914C49A"
+/Users/jgawron/Documents/projects/CTC_backup/software/Rcode/validation_cluster_data.R="645DA8AD"
+/Users/jgawron/Documents/projects/CTC_backup/software/Rcode/validation_statistical_test.R="7B50B5CC"
+/Users/jgawron/Documents/projects/CTC_backup/topSeparatingMutations/Br16_AC_topSeparators.Rmd="37A59E67"
 /Users/jgawron/Documents/projects/CTC_backup/validation_experiment/foldchange_in_barcode_prevalence.R="097865BC"
diff --git a/Rcode/statistical_test_source.R b/Rcode/statistical_test_source.R
index 8f0c06d..aa69fff 100644
--- a/Rcode/statistical_test_source.R
+++ b/Rcode/statistical_test_source.R
@@ -25,9 +25,9 @@ load_cluster_df <- function(filename) {
 
 
 
-files <- list.files(path = "../../validation_experiment/Cluster_csv_files/", pattern = "\\.csv$")
+files <- list.files(path = "../../../CTC_backup/validation_experiment/Cluster_csv_files/", pattern = "\\.csv$")
 
-files <- map_chr(files, reverse_paste, "../../validation_experiment/Cluster_csv_files/")
+files <- map_chr(files, reverse_paste, "../../../CTC_backup/validation_experiment/Cluster_csv_files/")
 
 myfiles <- map(files, load_cluster_df)
 
@@ -48,14 +48,14 @@ myfiles <- map(files, load_cluster_df)
 #' @examples load_cluster_files()
 #' @noRD
 load_cluster_files <- function() {
-  files <- list.files(path = "../../validation_experiment/Cluster_csv_files/", pattern = "\\.csv$")
+  files <- list.files(path = "../../../CTC_backup/validation_experiment/Cluster_csv_files/", pattern = "\\.csv$")
 
 
 
-  files <- map_chr(files, reverse_paste, "../../validation_experiment/Cluster_csv_files/")
+  files <- map_chr(files, reverse_paste, "../../../CTC_backup/validation_experiment/Cluster_csv_files/")
 
   df_list <- map(files, load_cluster_df)
-  save(df_list, file = "../../validation")
+  # save(df_list, file = "../../../CTC_backup/validatovalidation")
 
 
   # create empty data frame to store summary information
@@ -96,7 +96,7 @@ load_cluster_files <- function() {
     )
     summary_df <- rbind(summary_df, summary_row)
   }
-  save(summary_df, file = "../../validation_experiment/output/summary_df_nine_ninefive.rds")
+  save(summary_df, file = "../../../CTC_backup/validation_experiment/output/summary_df_nine_ninefive.rds")
   return(list("summary" = summary_df, "cluster_data" = df_list))
 }
 
@@ -115,7 +115,7 @@ df_list <- list()
 
 
 basename <- gsub("\\.csv", "", "50k_7_910_b_S276_R2_001.fastq.gz_stats.csv")
-df <- read_delim(paste0("../../validation_experiment/Cluster_csv_files/", file), delim = "\t", col_names = F)
+df <- read_delim(paste0("../../../CTC_backup/validation_experiment/Cluster_csv_files/", file), delim = "\t", col_names = F)
 colnames(df)[1] <- "barcodes"
 colnames(df)[3] <- "counts"
 df_sorted <- df %>% arrange(desc(counts))
@@ -127,7 +127,7 @@ for (file in files) {
   basename <- gsub("\\.csv", "", file)
   tryCatch(
     {
-      df <- read_delim(paste0("../../validation_experiment/Cluster_csv_files/", file), delim = "\t", col_names = F)
+      df <- read_delim(paste0("../../../CTC_backup/validation_experiment/Cluster_csv_files/", file), delim = "\t", col_names = F)
       colnames(df)[1] <- "barcodes"
       colnames(df)[3] <- "counts"
       df_sorted <- df %>% arrange(desc(counts))