diff --git a/Old Code/not-a-toy.R b/Old Code/not-a-toy.R deleted file mode 100644 index b177648..0000000 --- a/Old Code/not-a-toy.R +++ /dev/null @@ -1,293 +0,0 @@ -# Analysis code for the SERC/UMD 13CH4-labeling incubation -# Adapted from von Fischer and Hedin 2002, 10.1029/2001GB001448 -# Kendalynn Morris -# October 2022 - -library(dplyr) -library(ggplot2) -library(ggpmisc) -library(lubridate) -library(readr) -library(scales) -library(tibble) -library(tidyr) - -theme_set(theme_bw()) - - -# ----- Read in and clean up data ----- - -# Get names of data files -files <- list.files("picarro/", pattern = "*.csv", full.names = TRUE) -# Helper function -read_file <- function(f) { - message("Reading ", f) - read_csv(f, col_types = "ccdcddcddddddddddddddddddddddccccc") %>% - mutate(File = basename(f)) -} -# Read in and pre-process data -lapply(files, read_file) %>% - bind_rows() %>% - # filter out standards and clean up columns - filter(! id %in% c("SERC100ppm", "UMDzero", "SERCzero", "desert5000")) %>% - mutate(Timestamp = mdy_hm(`Date/Time`, tz = "UTC")) %>% - select(Timestamp, id, round, vol, - `HR 12CH4 Mean`, `HR 12CH4 Std`, - `HR 13CH4 Mean`, `HR 13CH4 Std`, - `HR Delta iCH4 Mean`, `HR Delta iCH4 Std`, - `12CO2 Mean`, `12CO2 Std`, - `13CO2 Mean`, `13CO2 Std`, - `Delta 13CO2 Mean`, `Delta 13CO2 Std`, - notes) %>% - # calculate elapsed time for each sample - arrange(id, round) %>% - group_by(id) %>% - mutate(time_days = difftime(Timestamp, min(Timestamp), - units = "days"), - time_days = as.numeric(time_days)) -> - incdat_raw - - -# ----- Constants ----- - -FRAC_K <- 0.98 # 13C consumption as a fraction of 12C consumption (alpha in Eq. 11) -FRAC_P <- 0.01 # 13C production as a fraction of 12C production -AP_P <- FRAC_P / (1 + FRAC_P) * 100 # 13C atom percent of total methane production -VOL_ML <- 130 - -# ----- Unit conversion ----- - -incdat_raw %>% - # Volume of jar = 130 ml or 0.130 l, 1 ppm = 0.001 ml/l, sample is diluted 1:1 - # The Picarro takes 20 ml, but 10 ml injected (see vol); - # therefore ppm to ml = ppm * 0.001 * VOL_ML/1000 * 2 - mutate(cal12CH4ml = `HR 12CH4 Mean` * 0.001 * VOL_ML/1000 * 2 * 1000, - cal13CH4ml = `HR 13CH4 Mean` * 0.001 * VOL_ML/1000 * 2 * 1000, - # for each 10 ml sample. 10 ml of zero air injected - # remaining gas is jar is diluted 12:1 - cal12CH4ml = if_else(round != "T0", cal12CH4ml * 1.083, cal12CH4ml), - cal13CH4ml = if_else(round != "T0", cal13CH4ml * 1.083, cal13CH4ml), - # calculate atom percent (AP) of 13C methane in sample over time - AP_obs = cal13CH4ml / (cal12CH4ml + cal13CH4ml) * 100, - sdC = `HR 12CH4 Std`, sdD = `HR Delta iCH4 Std`) -> - incdat - - -#now we generate sample specific scaling factors for feeding into the cost function -#see vFH2002 Eqs. 12, 13, & 14 -#need standard deviation of isotopic and mass across all measurements for each sample -> sd_obs -#and standard deviation of the analytical precision (instrument stdev) for those obsrvations -#coined here as sd_inst -incdat %>% - group_by(id) %>% - mutate(sd_obsDelta = sd(`HR Delta iCH4 Mean`), - sd_instDelta = sd(sdD), #sdD is the instrument stdev for the delta measurement - sd_obsMass = sd(`HR 12CH4 Mean`), - sd_instMass = sd(sdC), #sdC is the instrument stdev for the 12C mass measurement - Nd = sd_obsDelta/sd_instDelta, - Nm = sd_obsMass/sd_instMass) -> incdat - -# ----- Data visualization ----- - -incdat %>% - mutate(id_numeric = as.factor(id)) %>% - pivot_longer(cols = c(cal12CH4ml, cal13CH4ml, AP_obs)) %>% - ggplot(aes(round, value, group = id, color = id)) + - geom_point() + geom_line() + - ggtitle("POST DATA EXCLUSION") + - facet_wrap( ~ name, scales = "free") -> - p -print(p) -ggsave("./outputs/over_time.png", width = 8, height = 5) - - -# ----- Model-fitting functions ----- - -# Prediction function -# time: vector of time values, numeric (e.g. days); first should be zero -# m0: amount of total methane at time zero -# n0: amount of labeled methane at time zero -# P: production rate of total methane, unit gas/unit time -# k: first-order rate constant for methane consumption, 1/unit time -# Returns a data frame with mt, nt, and AP (atom percent) predictions for each t -ap_prediction <- function(time, m0, n0, P, k) { - # Combined, this is Eq. 11 - # ...except modified for what I think are two mistakes - # 1. Added *100 so that the left part is correctly a percent (per their Appendix A) - # 2. We've just predicted nt and mt, so now doesn't APt flow directly from them?!? - # How does it make sense to add AP_P (as in vF&H eq. 10 and 11)? - - # Equation 9 (and numerator in Eq. 11) is simplified in vFH2002, because as - # the authors assume (in paragraph 15) that - # "there is no production of labeled methane during incubation" - # This may not be true, and thus the following equation for labeled methane - # tracks Equation 5, i.e. it includes both production and consumption terms - kfrac <- k * FRAC_K - pfrac <- P * FRAC_P - nt <- pfrac/kfrac - (pfrac/kfrac - n0) * exp(-kfrac * time) - # Equation 5 (and denominator in Eq. 11): - mt <- P/k - (P/k - m0) * exp(-k * time) - - tibble(mt = mt, - nt = nt, - # Modified Equation 10/11 - AP_pred = nt / mt * 100) # + AP_P -} - -# Cost function called by optim() -# params: named vector holding optimizer-assigned values for P and k -# time: vector of time values, numeric (e.g. days); first should be zero -# m: observed total methane values, same length as time -# n: observed labeled methane values, same length as time -# Nd: normalization factor for delta values, see Eq. 12 -# Nm: normalization factor for methane mass, see Eq. 13 -# Returns the sum of squares between predicted and observed m and AP -cost_function <- function(params, time, m, n, Nd, Nm) { - # message(params["P"], ",", params["k"]) - pred <- ap_prediction(time = time, - m0 = m[1], - n0 = n[1], - P = params["P"], - k = params["k"] - ) - sum((abs(m - pred$mt)/sd(m))*Nm + (abs(n - pred$nt)/sd(n))*Nd) - } - - -# ----- Main ----- - -pk_results <- list() -incdat_out <- list() - -for(i in unique(incdat$id)) { - message("------------------- ", i) - # Isolate this sample's data - incdat %>% - filter(id == i) %>% - select(id, round, vol, time_days, cal12CH4ml, cal13CH4ml, - AP_obs, Nd, Nm) -> - dat - - # Estimate starting k by slope of 13C. This follows para. 21: - # "We then calculate k as the slope of the linear regression of ln(n) - # versus time... - m <- lm(log(cal13CH4ml) ~ time_days, data = dat) - m_slope <- unname(m$coefficients["time_days"]) - message("m_slope = ", m_slope) - # Generally, this slope is negative (net 13CH4 consumption) - # If not, our k0 estimate below won't work - # For now, just ensure it's positive; there's probably a more sophisticated - # way to estimate k0 in this case but save that for the future - m_slope <- -abs(m_slope) - - # "...multiplied by 1/a to correct for fractionation against the - # labeled methane." - # BBL: this should be "1/-a" (see equation 8) - k0 = m_slope / -FRAC_K - message("k0 = ", k0) - - # Let optim() try different values for P and k until it finds best fit to data - result <- optim(par = c("P" = 0.001, "k"= k0), - fn = cost_function, - # Constrain the optimizer so it can't produce <0 values - # for P, nor values <=0 for k - method = "L-BFGS-B", - lower = c("P" = 0.0, "k"= 0.0001), - upper = c("P" = Inf, "k"= Inf), - - # "..." that the optimizer will pass to cost_function: - time = dat$time_days, - m = dat$cal12CH4ml + dat$cal13CH4ml, - n = dat$cal13CH4ml, - Nd = dat$Nd, - Nm = dat$Nm) - - message("Optimizer solution:") - print(result) - P <- result$par["P"] - pk_results[[i]] <- tibble(P = P, - k = result$par["k"], - k0 = k0, - convergence = result$convergence, - message = result$message) - - # Predict based on the optimized parameters - pred <- ap_prediction(time = dat$time_days, - m0 = dat$cal12CH4ml[1] + dat$cal13CH4ml[1], - n0 = dat$cal13CH4ml[1], - P = P, - k = result$par["k"]) - dat <- bind_cols(dat, pred) - - # Calculate implied consumption (ml/day) based on predictions - # Ct = (P*time - ([CH4t] - [CH4t-1]))/time - # or expressed in the notation of Equation 4: dm/dt = P - C - # so C = P - dm/dt - # (could also just use equation 2, but this is a good check on things) - total_methane <- dat$cal12CH4ml + dat$cal13CH4ml - change_methane <- c(0, diff(total_methane)) - change_time <- c(0, diff(dat$time_days)) - dat$Pt <- P * change_time - dat$Ct <- (-change_methane + (P * change_time)) / change_time - - incdat_out[[i]] <- dat -} - -pk_results <- bind_rows(pk_results, .id = "id") -incdat_out <- bind_rows(incdat_out) - -incdat_out %>% - # compute correlation between predictions and observations - group_by(id) %>% - summarise(m_cor = cor(cal12CH4ml + cal13CH4ml, mt), - ap_cor = cor(AP_obs, AP_pred)) -> - performance_summary - -performance_summary %>% - right_join(pk_results, by = "id") -> - pk_results -performance_summary %>% - right_join(incdat_out, by = "id") -> - incdat_out - -message("Done with optimization") - -# ----- Plot AP results ----- - -ap_pred <- ggplot(incdat_out, aes(time_days)) + - geom_point(aes(y = AP_obs)) + - geom_line(aes(y = AP_pred, color = ap_cor), size = 1) + - facet_wrap(~as.numeric(id), scales = "free") -print(ap_pred) -ggsave("./outputs/ap_pred.png", width = 8, height = 6) - -# ----- Plot total methane results ----- - -m_pred <- ggplot(incdat_out, aes(time_days)) + - geom_point(aes(y = cal12CH4ml + cal13CH4ml)) + - geom_line(aes(y = mt, color = m_cor,), size = 1) + - facet_wrap(~as.numeric(id), scales = "free") -print(m_pred) -ggsave("./outputs/m_pred.png", width = 8, height = 6) - -# ----- Visualize data, coloring by fit ----- - -incdat_out %>% - pivot_longer(cols = c(cal12CH4ml, cal13CH4ml, AP_obs)) %>% - ggplot(aes(round, value, group = id, color = ap_cor)) + - geom_point() + geom_line() + - facet_wrap(~name, scales = "free") -> - ap_fits -print(ap_fits) -ggsave("./outputs/ap_fits.png", width = 8, height = 6) - -ggplot(pk_results, - aes(ap_cor, - fill = as.factor(convergence))) + - geom_histogram() - - -print(pk_results) - -message("All done.") - diff --git a/misc/Methane play.xlsx b/misc/Methane play.xlsx deleted file mode 100644 index 8289d09..0000000 Binary files a/misc/Methane play.xlsx and /dev/null differ diff --git a/picarro/KMspike_screenshot.png b/picarro/KMspike_screenshot.png deleted file mode 100644 index b379d82..0000000 Binary files a/picarro/KMspike_screenshot.png and /dev/null differ diff --git a/picarro/Trial1_C.jpeg b/picarro/Trial1_C.jpeg deleted file mode 100644 index fbd9743..0000000 Binary files a/picarro/Trial1_C.jpeg and /dev/null differ diff --git a/picarro/Trial1_D.jpeg b/picarro/Trial1_D.jpeg deleted file mode 100644 index ca74e11..0000000 Binary files a/picarro/Trial1_D.jpeg and /dev/null differ