Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New key-value mapping to simplify header decoding (& fix CI code check) #61

Merged
merged 13 commits into from
Jan 2, 2023
4 changes: 3 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ linters: linters_with_defaults(
object_name_linter(c("snake_case", "SNAKE_CASE")),
commented_code_linter = NULL
)
exclusions: list()
exclusions: list(
"R/create_dataset.R"
)
encoding: "UTF-8"
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ repos:
hooks:
- id: style-files
args: [--style_fun=tidyverse_style]
exclude: .R/create_dataset.R
- id: roxygenize
additional_dependencies:
- git2r
Expand Down Expand Up @@ -47,6 +48,7 @@ repos:
data/.*|
)$
- id: lintr
exclude: .R/create_dataset.R
- id: readme-rmd-rendered
- id: parsable-R
- id: no-browser-statement
Expand Down
344 changes: 175 additions & 169 deletions R/create_dataset.R
Original file line number Diff line number Diff line change
@@ -1,180 +1,186 @@
#' assign the classes to the dataset list
#' @param data_list list of the header information for each block
create_dataset <- function(data_list) {
block_type <- data_list$block_type
text_type <- data_list$text_type
channel_type <- data_list$channel_type
additional_type <- data_list$additional_type

# select read_class and block_type_name
# TODO: if there is a simpler method, happy to change!
if (block_type == 0) {
read_class <- "text"


if (text_type == 8) {
read_class <- "parameter"
block_type_name <- "info_block"
} else if (text_type == 104) {
block_type_name <- "history"
} else if (text_type == 152) {
block_type_name <- "curve_fit"
} else if (text_type == 168) {
block_type_name <- "signature"
} else if (text_type == 240) {
block_type_name <- "integration_method"
} else {
block_type_name <- "text_information"
}
} else if (block_type == 7) {
read_class <- "data"

if (channel_type == 4) {
block_type_name <- "sc_sample"
} else if (channel_type == 8) {
block_type_name <- "ig_sample"
} else if (channel_type == 12) {
block_type_name <- "ph_sample"
}
} else if (block_type == 11) {
read_class <- "data"

if (channel_type == 4) {
block_type_name <- "sc_ref"
} else if (channel_type == 8) {
block_type_name <- "ig_ref"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 15) {
if (channel_type == 16) {
if (text_type == 112) {
read_class <- "parameter"
block_type_name <- "quant_report_ab"
} else if (text_type == 104) {
read_class <- "parameter"
block_type_name <- "me_test_report_ab"
} else {
read_class <- "data"
if (additional_type == 64) {
block_type_name <- "ab_no_atm_comp"
} else if (additional_type == 0) {
block_type_name <- "ab"
} else {
stop("There is a new block type, open an issue.")
}
}
} else if (channel_type == 48) {
if (text_type == 112) {
read_class <- "parameter"
block_type_name <- "quant_report_refl"
} else if (text_type == 104) {
read_class <- "parameter"
block_type_name <- "me_test_report_refl"
} else {
read_class <- "data"
if (additional_type == 64) {
block_type_name <- "refl_no_atm_comp"
} else if (additional_type == 0) {
block_type_name <- "refl"
} else {
stop("There is a new block type, open an issue.")
}
}
} else if (channel_type == 88) {
read_class <- "data"
block_type_name <- "match"
} else if (channel_type == 216) {
read_class <- "data"
block_type_name <- "match_2_chn"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 23) {
read_class <- "parameter"

if (channel_type == 4) {
block_type_name <- "sc_sample_data_param"
} else if (channel_type == 8) {
block_type_name <- "ig_sample_data_param"
} else if (channel_type == 12) {
block_type_name <- "ph_sample_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 27) {
read_class <- "parameter"

if (channel_type == 4) {
block_type_name <- "sc_ref_data_param"
} else if (channel_type == 8) {
block_type_name <- "ig_ref_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else {
read_class <- "parameter"

if (block_type == 31) {
if (channel_type == 16) {
if (additional_type == 64) {
block_type_name <- "ab_no_atm_comp_data_param"
} else if (additional_type == 0) {
block_type_name <- "ab_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (channel_type == 48) {
if (additional_type == 64) {
block_type_name <- "refl_no_atm_comp_data_param"
} else if (additional_type == 0) {
block_type_name <- "refl_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (channel_type == 88) {
block_type_name <- "match_data_param"
} else if (channel_type == 216) {
block_type_name <- "match_2_chn_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 32) {
block_type_name <- "instrument"
} else if (block_type == 40) {
block_type_name <- "instrument_ref"
} else if (block_type == 48) {
block_type_name <- "acquisition"
} else if (block_type == 56) {
block_type_name <- "acquisition_ref"
} else if (block_type == 64) {
block_type_name <- "fourier_transformation"
} else if (block_type == 72) {
block_type_name <- "fourier_transformation_ref"
} else if (block_type == 96) {
block_type_name <- "optics"
} else if (block_type == 104) {
block_type_name <- "optics_ref"
} else if (block_type == 160) {
block_type_name <- "sample"
} else if (block_type == 176) {
if (additional_type == 64) {
block_type_name <- "lab_and_process_param_raw"
} else if (additional_type == 0) {
block_type_name <- "lab_and_process_param_processed"
} else {
stop("There is a new block type, open an issue.")
}
} else {
stop("block not known")
#' @param header_data list of the header information for each block
create_dataset <- function(header_data) {
block_type <- as.character(header_data$block_type)
text_type <- as.character(header_data$text_type)
channel_type <- as.character(header_data$channel_type)
additional_type <- as.character(header_data$additional_type)

# define compoosite key for chunk
composite_key <- paste(
paste0("b", block_type), paste0("c", channel_type),
paste0("t", text_type), paste0("a", additional_type),
sep = "-"
)

# nolint start

# keys ("<block>-<text>-<channel>-<additional_type>") :
# values (read_class, block_type_name)
# general mapping rules for codes (keys):
# - additional type = 64 => default, no macro processing
# - additional type = 0 => extra processing, found atmospheric compensation
key_value_map <- list(
# block code 0, channel code 0 ------------------------------------------------------------
# additional information and/or OPUS processing macros
"b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),
"b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"),
"b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"),
"b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"),
"b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"),
"b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"),
# guess general text
"b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),

# block code 7 -----------------------------------------------------------------------------
# spectrum types of sample
"b7-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_sample"),
"b7-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_sample"),
"b7-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_sample"),

# block code 11 ----------------------------------------------------------------------------
# spectrum types of reference (background)
"b11-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_ref"),
"b11-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_ref"),
"b11-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_ref"),
# block code 15 -----------------------------------------------------------------------------
# spectrum report blocs
# channel code 15: save (apparent) absorbance
"b15-c16-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_ab"),
"b15-c16-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_ab"),
"b15-c16-t0-a64" = c(read_class = "data", block_type_name = "ab_no_atm_comp"),
"b15-c16-t0-a0" = c(read_class = "data", block_type_name = "ab"),
# channel code 48: save reflectance (settings
"b15-c48-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_refl"),
"b15-c48-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_refl"), # check "a1"
"b15-c48-t0-a64" = c(read_class = "data", block_type_name = "refl_no_atm_comp"),
"b15-c48-t0-a0" = c(read_class = "data", block_type_name = "refl"),
# channel code 88 and 216: spectra matching
"b15-c88-t0-a(0|64)" = c(read_class = "data", block_type_name = "match"),
"b15-c216-t0-a(0|64)" = c(read_class = "data", block_type_name = "match_2_chn"),

# block code 23 -----------------------------------------------------------------------------
# data parameters (metadata) for spectrum types of sample
"b23-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_sample_data_param"),
"b23-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_sample_data_param"),
"b23-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_sample_data_param"),

# block code 27 -----------------------------------------------------------------------------
# data parameters (metadata) for spectrum types of reference (background)
"b27-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_ref_data_param"),
"b27-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_ref_data_param"),
"b27-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_ref_data_param"),

# block code 31 -----------------------------------------------------------------------------
# data parameters (metadata) when spectra (normalized single channels) saved in apparent absorbance
"b31-c16-t0-a64" = c(read_class = "parameter", block_type_name = "ab_no_atm_comp_data_param"),
"b31-c16-t0-a0" = c(read_class = "parameter", block_type_name = "ab_data_param"),

# data parameters (metadata) when spectra (normalized single channels) saved in reflectance
"b31-c48-t0-a64" = c(read_class = "parameter", block_type_name = "refl_no_atm_comp_data_param"),
"b31-c48-t0-a0" = c(read_class = "parameter", block_type_name = "refl_data_param"),

# data parameters (metadata) for spectra matching
"b31-c88-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_data_param"),
"b31-c216-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_2_chn_data_param"),

## General metadata blocks

# block code 32 -----------------------------------------------------------------------------
"b32-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument"),

# block code 40 -----------------------------------------------------------------------------
"b40-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument_ref"),

# block code 48 -----------------------------------------------------------------------------
"b48-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition"),

# block code 56 -----------------------------------------------------------------------------
"b56-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition_ref"),

# block code 64 -----------------------------------------------------------------------------
"b64-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation"),

# block code 72 -----------------------------------------------------------------------------
"b72-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation_ref"),

# block code 96 -----------------------------------------------------------------------------
"b96-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics"),

# block code 104 -----------------------------------------------------------------------------
"b104-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics_ref"),

# block code 160 -----------------------------------------------------------------------------
"b160-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sample"),

# block code 176 -----------------------------------------------------------------------------
"b176-c0-t0-a64" = c(read_class = "parameter", block_type_name = "lab_and_process_param_raw"),
"b176-c0-t0-a0" = c(read_class = "parameter", block_type_name = "lab_and_process_param_processed")
)

# nolint end

# check key names for matching composite key
key_names <- names(key_value_map)
is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key)))
key_value_match <- key_value_map[is_match]
nm_matches <- names(key_value_match)
# because of "b0-c0-t\\d+-a(0|64)" regex
is_match_guess <- grepl("\\\\d\\+", nm_matches)

if (length(key_value_match) == 1L) {
if (any(is_match_guess)) {
message(paste(
"Guessing header entry for block type 0 to be text information:\n",
"* Composite key :=", composite_key
))
}
} else if (length(key_value_match) > 1L) {
# in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)")
# has to be removed
key_value_match[is_match_guess] <- NULL
} else if (length(key_value_match) == 0L) {
# inform about details and what to do for improving {opusreader2}
stop_proactively(composite_key)
}

key_value_match_vec <- key_value_match[[1]]
read_class <- unname(key_value_match_vec["read_class"])
block_type_name <- unname(key_value_match_vec["block_type_name"])

# create a dataset
ds <- structure(
c(data_list, list(block_type_name = block_type_name)),
c(header_data, list(block_type_name = block_type_name)),
class = c(read_class)
)

return(ds)
}

stop_proactively <- function(composite_key) {
stop(
paste(
"Unknown header entry.\n The following 'composite key' is not yet",
"mapped in the {opusreader2} key-value map of the header:\n",
"*", paste0('"', composite_key, '"'), "\nWe encourage your contribution",
"to feature this new OPUS block by opening a new issue on
https://github.com/spectral-cockpit/opusreader2/issues",
"\nPlease\n",
"1. report reproducibly, using short code with {opusreader2}
(recommended: https://reprex.tidyverse.org)", "\n",
"2. describe briefly\n",
" a) Bruker instrument used\n",
" b) equipment\n",
" c) measurement mode and spectral blocks saved (OPUS settings)\n",
" d) OPUS software version\n",
" e) your general workflow for spectroscopic diagnostics\n",
"3. provide an example OPUS binary file uploaded for public access\n",
" on GitHub (best in issue)\n",
"4. to facilitate widespread support of Bruker devices in open source\n",
" based infrastructure, show the data blocks as print screens in the\n",
" Bruker OPUS software (right-click in Viewer). Please upload the\n",
" contents of all OPUS blocks in individual screenshots."
),
call. = FALSE
)
}
4 changes: 2 additions & 2 deletions R/parse_opus.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' data block (`refl`).
#' * **`refl_no_atm_comp`**: class "data" (spectrum; viewer: "Refl").
#' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra
#' (`:= sc_sample_corr / sc_ref_corr`). Note that this element is the
#' (`:= sc_sample / sc_ref`). Note that this element is the
#' untreated spectra before an eventual "atmospheric compensation"
#' routine is applied.
#' * **`refl_data_param`** : class "parameter" (viewer: "Data Parameters Refl").
Expand Down Expand Up @@ -54,7 +54,7 @@
#' data block (spectrum; see `ab` output).
#' * **`ab_no_atm_comp`**: class "data" (spectrum; viewer: "Refl").
#' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra
#' (`:= sc_sample_corr / sc_ref_corr`).
#' (`:= sc_sample/ sc_ref`).
#' * **`ab_data_param`** : class "parameter" (viewer: "Data Parameters Refl").
#' Parameter list with metadata for `ab` data block (spectrum; see `ab`).
#' Note that this element only results if "atmospheric compensation" was
Expand Down
Loading