diff --git a/.lintr b/.lintr index b6e900b0..e0df7d03 100644 --- a/.lintr +++ b/.lintr @@ -5,5 +5,7 @@ linters: linters_with_defaults( object_name_linter(c("snake_case", "SNAKE_CASE")), commented_code_linter = NULL ) -exclusions: list() +exclusions: list( + "R/create_dataset.R" + ) encoding: "UTF-8" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9dd611d5..c019686a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,7 @@ repos: hooks: - id: style-files args: [--style_fun=tidyverse_style] + exclude: .R/create_dataset.R - id: roxygenize additional_dependencies: - git2r @@ -47,6 +48,7 @@ repos: data/.*| )$ - id: lintr + exclude: .R/create_dataset.R - id: readme-rmd-rendered - id: parsable-R - id: no-browser-statement diff --git a/R/create_dataset.R b/R/create_dataset.R index dccc443a..57ade248 100644 --- a/R/create_dataset.R +++ b/R/create_dataset.R @@ -1,180 +1,186 @@ #' assign the classes to the dataset list -#' @param data_list list of the header information for each block -create_dataset <- function(data_list) { - block_type <- data_list$block_type - text_type <- data_list$text_type - channel_type <- data_list$channel_type - additional_type <- data_list$additional_type - - # select read_class and block_type_name - # TODO: if there is a simpler method, happy to change! - if (block_type == 0) { - read_class <- "text" - - - if (text_type == 8) { - read_class <- "parameter" - block_type_name <- "info_block" - } else if (text_type == 104) { - block_type_name <- "history" - } else if (text_type == 152) { - block_type_name <- "curve_fit" - } else if (text_type == 168) { - block_type_name <- "signature" - } else if (text_type == 240) { - block_type_name <- "integration_method" - } else { - block_type_name <- "text_information" - } - } else if (block_type == 7) { - read_class <- "data" - - if (channel_type == 4) { - block_type_name <- "sc_sample" - } else if (channel_type == 8) { - block_type_name <- "ig_sample" - } else if (channel_type == 12) { - block_type_name <- "ph_sample" - } - } else if (block_type == 11) { - read_class <- "data" - - if (channel_type == 4) { - block_type_name <- "sc_ref" - } else if (channel_type == 8) { - block_type_name <- "ig_ref" - } else { - stop("There is a new block type, open an issue.") - } - } else if (block_type == 15) { - if (channel_type == 16) { - if (text_type == 112) { - read_class <- "parameter" - block_type_name <- "quant_report_ab" - } else if (text_type == 104) { - read_class <- "parameter" - block_type_name <- "me_test_report_ab" - } else { - read_class <- "data" - if (additional_type == 64) { - block_type_name <- "ab_no_atm_comp" - } else if (additional_type == 0) { - block_type_name <- "ab" - } else { - stop("There is a new block type, open an issue.") - } - } - } else if (channel_type == 48) { - if (text_type == 112) { - read_class <- "parameter" - block_type_name <- "quant_report_refl" - } else if (text_type == 104) { - read_class <- "parameter" - block_type_name <- "me_test_report_refl" - } else { - read_class <- "data" - if (additional_type == 64) { - block_type_name <- "refl_no_atm_comp" - } else if (additional_type == 0) { - block_type_name <- "refl" - } else { - stop("There is a new block type, open an issue.") - } - } - } else if (channel_type == 88) { - read_class <- "data" - block_type_name <- "match" - } else if (channel_type == 216) { - read_class <- "data" - block_type_name <- "match_2_chn" - } else { - stop("There is a new block type, open an issue.") - } - } else if (block_type == 23) { - read_class <- "parameter" - - if (channel_type == 4) { - block_type_name <- "sc_sample_data_param" - } else if (channel_type == 8) { - block_type_name <- "ig_sample_data_param" - } else if (channel_type == 12) { - block_type_name <- "ph_sample_data_param" - } else { - stop("There is a new block type, open an issue.") - } - } else if (block_type == 27) { - read_class <- "parameter" - - if (channel_type == 4) { - block_type_name <- "sc_ref_data_param" - } else if (channel_type == 8) { - block_type_name <- "ig_ref_data_param" - } else { - stop("There is a new block type, open an issue.") - } - } else { - read_class <- "parameter" - - if (block_type == 31) { - if (channel_type == 16) { - if (additional_type == 64) { - block_type_name <- "ab_no_atm_comp_data_param" - } else if (additional_type == 0) { - block_type_name <- "ab_data_param" - } else { - stop("There is a new block type, open an issue.") - } - } else if (channel_type == 48) { - if (additional_type == 64) { - block_type_name <- "refl_no_atm_comp_data_param" - } else if (additional_type == 0) { - block_type_name <- "refl_data_param" - } else { - stop("There is a new block type, open an issue.") - } - } else if (channel_type == 88) { - block_type_name <- "match_data_param" - } else if (channel_type == 216) { - block_type_name <- "match_2_chn_data_param" - } else { - stop("There is a new block type, open an issue.") - } - } else if (block_type == 32) { - block_type_name <- "instrument" - } else if (block_type == 40) { - block_type_name <- "instrument_ref" - } else if (block_type == 48) { - block_type_name <- "acquisition" - } else if (block_type == 56) { - block_type_name <- "acquisition_ref" - } else if (block_type == 64) { - block_type_name <- "fourier_transformation" - } else if (block_type == 72) { - block_type_name <- "fourier_transformation_ref" - } else if (block_type == 96) { - block_type_name <- "optics" - } else if (block_type == 104) { - block_type_name <- "optics_ref" - } else if (block_type == 160) { - block_type_name <- "sample" - } else if (block_type == 176) { - if (additional_type == 64) { - block_type_name <- "lab_and_process_param_raw" - } else if (additional_type == 0) { - block_type_name <- "lab_and_process_param_processed" - } else { - stop("There is a new block type, open an issue.") - } - } else { - stop("block not known") +#' @param header_data list of the header information for each block +create_dataset <- function(header_data) { + block_type <- as.character(header_data$block_type) + text_type <- as.character(header_data$text_type) + channel_type <- as.character(header_data$channel_type) + additional_type <- as.character(header_data$additional_type) + + # define compoosite key for chunk + composite_key <- paste( + paste0("b", block_type), paste0("c", channel_type), + paste0("t", text_type), paste0("a", additional_type), + sep = "-" + ) + + # nolint start + + # keys ("---") : + # values (read_class, block_type_name) + # general mapping rules for codes (keys): + # - additional type = 64 => default, no macro processing + # - additional type = 0 => extra processing, found atmospheric compensation + key_value_map <- list( + # block code 0, channel code 0 ------------------------------------------------------------ + # additional information and/or OPUS processing macros + "b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"), + "b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"), + "b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"), + "b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"), + "b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"), + "b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"), + # guess general text + "b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"), + + # block code 7 ----------------------------------------------------------------------------- + # spectrum types of sample + "b7-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_sample"), + "b7-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_sample"), + "b7-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_sample"), + + # block code 11 ---------------------------------------------------------------------------- + # spectrum types of reference (background) + "b11-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_ref"), + "b11-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_ref"), + "b11-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_ref"), + # block code 15 ----------------------------------------------------------------------------- + # spectrum report blocs + # channel code 15: save (apparent) absorbance + "b15-c16-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_ab"), + "b15-c16-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_ab"), + "b15-c16-t0-a64" = c(read_class = "data", block_type_name = "ab_no_atm_comp"), + "b15-c16-t0-a0" = c(read_class = "data", block_type_name = "ab"), + # channel code 48: save reflectance (settings + "b15-c48-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_refl"), + "b15-c48-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_refl"), # check "a1" + "b15-c48-t0-a64" = c(read_class = "data", block_type_name = "refl_no_atm_comp"), + "b15-c48-t0-a0" = c(read_class = "data", block_type_name = "refl"), + # channel code 88 and 216: spectra matching + "b15-c88-t0-a(0|64)" = c(read_class = "data", block_type_name = "match"), + "b15-c216-t0-a(0|64)" = c(read_class = "data", block_type_name = "match_2_chn"), + + # block code 23 ----------------------------------------------------------------------------- + # data parameters (metadata) for spectrum types of sample + "b23-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_sample_data_param"), + "b23-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_sample_data_param"), + "b23-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_sample_data_param"), + + # block code 27 ----------------------------------------------------------------------------- + # data parameters (metadata) for spectrum types of reference (background) + "b27-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_ref_data_param"), + "b27-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_ref_data_param"), + "b27-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_ref_data_param"), + + # block code 31 ----------------------------------------------------------------------------- + # data parameters (metadata) when spectra (normalized single channels) saved in apparent absorbance + "b31-c16-t0-a64" = c(read_class = "parameter", block_type_name = "ab_no_atm_comp_data_param"), + "b31-c16-t0-a0" = c(read_class = "parameter", block_type_name = "ab_data_param"), + + # data parameters (metadata) when spectra (normalized single channels) saved in reflectance + "b31-c48-t0-a64" = c(read_class = "parameter", block_type_name = "refl_no_atm_comp_data_param"), + "b31-c48-t0-a0" = c(read_class = "parameter", block_type_name = "refl_data_param"), + + # data parameters (metadata) for spectra matching + "b31-c88-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_data_param"), + "b31-c216-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_2_chn_data_param"), + + ## General metadata blocks + + # block code 32 ----------------------------------------------------------------------------- + "b32-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument"), + + # block code 40 ----------------------------------------------------------------------------- + "b40-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument_ref"), + + # block code 48 ----------------------------------------------------------------------------- + "b48-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition"), + + # block code 56 ----------------------------------------------------------------------------- + "b56-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition_ref"), + + # block code 64 ----------------------------------------------------------------------------- + "b64-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation"), + + # block code 72 ----------------------------------------------------------------------------- + "b72-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation_ref"), + + # block code 96 ----------------------------------------------------------------------------- + "b96-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics"), + + # block code 104 ----------------------------------------------------------------------------- + "b104-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics_ref"), + + # block code 160 ----------------------------------------------------------------------------- + "b160-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sample"), + + # block code 176 ----------------------------------------------------------------------------- + "b176-c0-t0-a64" = c(read_class = "parameter", block_type_name = "lab_and_process_param_raw"), + "b176-c0-t0-a0" = c(read_class = "parameter", block_type_name = "lab_and_process_param_processed") + ) + + # nolint end + + # check key names for matching composite key + key_names <- names(key_value_map) + is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key))) + key_value_match <- key_value_map[is_match] + nm_matches <- names(key_value_match) + # because of "b0-c0-t\\d+-a(0|64)" regex + is_match_guess <- grepl("\\\\d\\+", nm_matches) + + if (length(key_value_match) == 1L) { + if (any(is_match_guess)) { + message(paste( + "Guessing header entry for block type 0 to be text information:\n", + "* Composite key :=", composite_key + )) } + } else if (length(key_value_match) > 1L) { + # in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)") + # has to be removed + key_value_match[is_match_guess] <- NULL + } else if (length(key_value_match) == 0L) { + # inform about details and what to do for improving {opusreader2} + stop_proactively(composite_key) } + key_value_match_vec <- key_value_match[[1]] + read_class <- unname(key_value_match_vec["read_class"]) + block_type_name <- unname(key_value_match_vec["block_type_name"]) + # create a dataset ds <- structure( - c(data_list, list(block_type_name = block_type_name)), + c(header_data, list(block_type_name = block_type_name)), class = c(read_class) ) return(ds) } + +stop_proactively <- function(composite_key) { + stop( + paste( + "Unknown header entry.\n The following 'composite key' is not yet", + "mapped in the {opusreader2} key-value map of the header:\n", + "*", paste0('"', composite_key, '"'), "\nWe encourage your contribution", + "to feature this new OPUS block by opening a new issue on + https://github.com/spectral-cockpit/opusreader2/issues", + "\nPlease\n", + "1. report reproducibly, using short code with {opusreader2} + (recommended: https://reprex.tidyverse.org)", "\n", + "2. describe briefly\n", + " a) Bruker instrument used\n", + " b) equipment\n", + " c) measurement mode and spectral blocks saved (OPUS settings)\n", + " d) OPUS software version\n", + " e) your general workflow for spectroscopic diagnostics\n", + "3. provide an example OPUS binary file uploaded for public access\n", + " on GitHub (best in issue)\n", + "4. to facilitate widespread support of Bruker devices in open source\n", + " based infrastructure, show the data blocks as print screens in the\n", + " Bruker OPUS software (right-click in Viewer). Please upload the\n", + " contents of all OPUS blocks in individual screenshots." + ), + call. = FALSE + ) +} diff --git a/R/parse_opus.R b/R/parse_opus.R index 40c265c1..b9fb9675 100644 --- a/R/parse_opus.R +++ b/R/parse_opus.R @@ -26,7 +26,7 @@ #' data block (`refl`). #' * **`refl_no_atm_comp`**: class "data" (spectrum; viewer: "Refl"). #' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra -#' (`:= sc_sample_corr / sc_ref_corr`). Note that this element is the +#' (`:= sc_sample / sc_ref`). Note that this element is the #' untreated spectra before an eventual "atmospheric compensation" #' routine is applied. #' * **`refl_data_param`** : class "parameter" (viewer: "Data Parameters Refl"). @@ -54,7 +54,7 @@ #' data block (spectrum; see `ab` output). #' * **`ab_no_atm_comp`**: class "data" (spectrum; viewer: "Refl"). #' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra -#' (`:= sc_sample_corr / sc_ref_corr`). +#' (`:= sc_sample/ sc_ref`). #' * **`ab_data_param`** : class "parameter" (viewer: "Data Parameters Refl"). #' Parameter list with metadata for `ab` data block (spectrum; see `ab`). #' Note that this element only results if "atmospheric compensation" was diff --git a/R/prepare_spectra.R b/R/prepare_spectra.R index df3af3cd..f14229e5 100644 --- a/R/prepare_spectra.R +++ b/R/prepare_spectra.R @@ -1,10 +1,15 @@ prepare_spectra <- function(ds_list, data_type) { data_pattern <- paste0(data_type, "$") + # block names containing "ab" can also be class "parameters", rather than + # "data" required (e.g., "quant_report_ab", "me_test_report_ab") + pat_match <- grepl(data_pattern, names(ds_list)) + data_class <- vapply(ds_list, function(x) class(x) == "data", logical(1)) + data_match <- pat_match & data_class - ds_data <- ds_list[grepl(data_pattern, names(ds_list))] - ds_param <- ds_list[grepl(paste0(data_type, "_data_param"), names(ds_list))] + index <- which(data_match) - index <- which(grepl(data_pattern, names(ds_list))) + ds_data <- ds_list[data_match] + ds_param <- ds_list[grepl(paste0(data_type, "_data_param"), names(ds_list))] NPT <- ds_param[[1]]$parameters$NPT$parameter_value FXV <- ds_param[[1]]$parameters$FXV$parameter_value @@ -38,7 +43,7 @@ prepare_spectra <- function(ds_list, data_type) { get_data_types <- function(ds_list) { block_names <- names(ds_list) - data_types <- block_names[grepl("sc|ig|ph|^ab|^refl", block_names)] + data_types <- block_names[grepl("sc|ig|ph|^ab|^refl|^match", block_names)] data_types <- unique(gsub("_data_param", "", data_types)) return(data_types) diff --git a/man/create_dataset.Rd b/man/create_dataset.Rd index e02a92bf..2acff7dc 100644 --- a/man/create_dataset.Rd +++ b/man/create_dataset.Rd @@ -4,10 +4,10 @@ \alias{create_dataset} \title{assign the classes to the dataset list} \usage{ -create_dataset(data_list) +create_dataset(header_data) } \arguments{ -\item{data_list}{list of the header information for each block} +\item{header_data}{list of the header information for each block} } \description{ assign the classes to the dataset list diff --git a/man/parse_opus.Rd b/man/parse_opus.Rd index 9bc1a068..ac2c976f 100644 --- a/man/parse_opus.Rd +++ b/man/parse_opus.Rd @@ -32,7 +32,7 @@ information provided include: data block (\code{refl}). \item \strong{\code{refl_no_atm_comp}}: class "data" (spectrum; viewer: "Refl"). Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra -(\verb{:= sc_sample_corr / sc_ref_corr}). Note that this element is the +(\verb{:= sc_sample / sc_ref}). Note that this element is the untreated spectra before an eventual "atmospheric compensation" routine is applied. \item \strong{\code{refl_data_param}} : class "parameter" (viewer: "Data Parameters Refl"). @@ -60,7 +60,7 @@ the header parsing algorithm. data block (spectrum; see \code{ab} output). \item \strong{\code{ab_no_atm_comp}}: class "data" (spectrum; viewer: "Refl"). Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra -(\verb{:= sc_sample_corr / sc_ref_corr}). +(\verb{:= sc_sample/ sc_ref}). \item \strong{\code{ab_data_param}} : class "parameter" (viewer: "Data Parameters Refl"). Parameter list with metadata for \code{ab} data block (spectrum; see \code{ab}). Note that this element only results if "atmospheric compensation" was