-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #61 from spectral-cockpit/opt-55-simplify-create-d…
…ataset New key-value mapping to simplify header decoding (& fix CI code check)
- Loading branch information
Showing
7 changed files
with
195 additions
and
180 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,180 +1,186 @@ | ||
#' assign the classes to the dataset list | ||
#' @param data_list list of the header information for each block | ||
create_dataset <- function(data_list) { | ||
block_type <- data_list$block_type | ||
text_type <- data_list$text_type | ||
channel_type <- data_list$channel_type | ||
additional_type <- data_list$additional_type | ||
|
||
# select read_class and block_type_name | ||
# TODO: if there is a simpler method, happy to change! | ||
if (block_type == 0) { | ||
read_class <- "text" | ||
|
||
|
||
if (text_type == 8) { | ||
read_class <- "parameter" | ||
block_type_name <- "info_block" | ||
} else if (text_type == 104) { | ||
block_type_name <- "history" | ||
} else if (text_type == 152) { | ||
block_type_name <- "curve_fit" | ||
} else if (text_type == 168) { | ||
block_type_name <- "signature" | ||
} else if (text_type == 240) { | ||
block_type_name <- "integration_method" | ||
} else { | ||
block_type_name <- "text_information" | ||
} | ||
} else if (block_type == 7) { | ||
read_class <- "data" | ||
|
||
if (channel_type == 4) { | ||
block_type_name <- "sc_sample" | ||
} else if (channel_type == 8) { | ||
block_type_name <- "ig_sample" | ||
} else if (channel_type == 12) { | ||
block_type_name <- "ph_sample" | ||
} | ||
} else if (block_type == 11) { | ||
read_class <- "data" | ||
|
||
if (channel_type == 4) { | ||
block_type_name <- "sc_ref" | ||
} else if (channel_type == 8) { | ||
block_type_name <- "ig_ref" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (block_type == 15) { | ||
if (channel_type == 16) { | ||
if (text_type == 112) { | ||
read_class <- "parameter" | ||
block_type_name <- "quant_report_ab" | ||
} else if (text_type == 104) { | ||
read_class <- "parameter" | ||
block_type_name <- "me_test_report_ab" | ||
} else { | ||
read_class <- "data" | ||
if (additional_type == 64) { | ||
block_type_name <- "ab_no_atm_comp" | ||
} else if (additional_type == 0) { | ||
block_type_name <- "ab" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} | ||
} else if (channel_type == 48) { | ||
if (text_type == 112) { | ||
read_class <- "parameter" | ||
block_type_name <- "quant_report_refl" | ||
} else if (text_type == 104) { | ||
read_class <- "parameter" | ||
block_type_name <- "me_test_report_refl" | ||
} else { | ||
read_class <- "data" | ||
if (additional_type == 64) { | ||
block_type_name <- "refl_no_atm_comp" | ||
} else if (additional_type == 0) { | ||
block_type_name <- "refl" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} | ||
} else if (channel_type == 88) { | ||
read_class <- "data" | ||
block_type_name <- "match" | ||
} else if (channel_type == 216) { | ||
read_class <- "data" | ||
block_type_name <- "match_2_chn" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (block_type == 23) { | ||
read_class <- "parameter" | ||
|
||
if (channel_type == 4) { | ||
block_type_name <- "sc_sample_data_param" | ||
} else if (channel_type == 8) { | ||
block_type_name <- "ig_sample_data_param" | ||
} else if (channel_type == 12) { | ||
block_type_name <- "ph_sample_data_param" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (block_type == 27) { | ||
read_class <- "parameter" | ||
|
||
if (channel_type == 4) { | ||
block_type_name <- "sc_ref_data_param" | ||
} else if (channel_type == 8) { | ||
block_type_name <- "ig_ref_data_param" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else { | ||
read_class <- "parameter" | ||
|
||
if (block_type == 31) { | ||
if (channel_type == 16) { | ||
if (additional_type == 64) { | ||
block_type_name <- "ab_no_atm_comp_data_param" | ||
} else if (additional_type == 0) { | ||
block_type_name <- "ab_data_param" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (channel_type == 48) { | ||
if (additional_type == 64) { | ||
block_type_name <- "refl_no_atm_comp_data_param" | ||
} else if (additional_type == 0) { | ||
block_type_name <- "refl_data_param" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (channel_type == 88) { | ||
block_type_name <- "match_data_param" | ||
} else if (channel_type == 216) { | ||
block_type_name <- "match_2_chn_data_param" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else if (block_type == 32) { | ||
block_type_name <- "instrument" | ||
} else if (block_type == 40) { | ||
block_type_name <- "instrument_ref" | ||
} else if (block_type == 48) { | ||
block_type_name <- "acquisition" | ||
} else if (block_type == 56) { | ||
block_type_name <- "acquisition_ref" | ||
} else if (block_type == 64) { | ||
block_type_name <- "fourier_transformation" | ||
} else if (block_type == 72) { | ||
block_type_name <- "fourier_transformation_ref" | ||
} else if (block_type == 96) { | ||
block_type_name <- "optics" | ||
} else if (block_type == 104) { | ||
block_type_name <- "optics_ref" | ||
} else if (block_type == 160) { | ||
block_type_name <- "sample" | ||
} else if (block_type == 176) { | ||
if (additional_type == 64) { | ||
block_type_name <- "lab_and_process_param_raw" | ||
} else if (additional_type == 0) { | ||
block_type_name <- "lab_and_process_param_processed" | ||
} else { | ||
stop("There is a new block type, open an issue.") | ||
} | ||
} else { | ||
stop("block not known") | ||
#' @param header_data list of the header information for each block | ||
create_dataset <- function(header_data) { | ||
block_type <- as.character(header_data$block_type) | ||
text_type <- as.character(header_data$text_type) | ||
channel_type <- as.character(header_data$channel_type) | ||
additional_type <- as.character(header_data$additional_type) | ||
|
||
# define compoosite key for chunk | ||
composite_key <- paste( | ||
paste0("b", block_type), paste0("c", channel_type), | ||
paste0("t", text_type), paste0("a", additional_type), | ||
sep = "-" | ||
) | ||
|
||
# nolint start | ||
|
||
# keys ("<block>-<text>-<channel>-<additional_type>") : | ||
# values (read_class, block_type_name) | ||
# general mapping rules for codes (keys): | ||
# - additional type = 64 => default, no macro processing | ||
# - additional type = 0 => extra processing, found atmospheric compensation | ||
key_value_map <- list( | ||
# block code 0, channel code 0 ------------------------------------------------------------ | ||
# additional information and/or OPUS processing macros | ||
"b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"), | ||
"b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"), | ||
"b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"), | ||
"b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"), | ||
"b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"), | ||
"b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"), | ||
# guess general text | ||
"b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"), | ||
|
||
# block code 7 ----------------------------------------------------------------------------- | ||
# spectrum types of sample | ||
"b7-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_sample"), | ||
"b7-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_sample"), | ||
"b7-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_sample"), | ||
|
||
# block code 11 ---------------------------------------------------------------------------- | ||
# spectrum types of reference (background) | ||
"b11-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_ref"), | ||
"b11-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_ref"), | ||
"b11-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_ref"), | ||
# block code 15 ----------------------------------------------------------------------------- | ||
# spectrum report blocs | ||
# channel code 15: save (apparent) absorbance | ||
"b15-c16-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_ab"), | ||
"b15-c16-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_ab"), | ||
"b15-c16-t0-a64" = c(read_class = "data", block_type_name = "ab_no_atm_comp"), | ||
"b15-c16-t0-a0" = c(read_class = "data", block_type_name = "ab"), | ||
# channel code 48: save reflectance (settings | ||
"b15-c48-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_refl"), | ||
"b15-c48-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_refl"), # check "a1" | ||
"b15-c48-t0-a64" = c(read_class = "data", block_type_name = "refl_no_atm_comp"), | ||
"b15-c48-t0-a0" = c(read_class = "data", block_type_name = "refl"), | ||
# channel code 88 and 216: spectra matching | ||
"b15-c88-t0-a(0|64)" = c(read_class = "data", block_type_name = "match"), | ||
"b15-c216-t0-a(0|64)" = c(read_class = "data", block_type_name = "match_2_chn"), | ||
|
||
# block code 23 ----------------------------------------------------------------------------- | ||
# data parameters (metadata) for spectrum types of sample | ||
"b23-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_sample_data_param"), | ||
"b23-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_sample_data_param"), | ||
"b23-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_sample_data_param"), | ||
|
||
# block code 27 ----------------------------------------------------------------------------- | ||
# data parameters (metadata) for spectrum types of reference (background) | ||
"b27-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_ref_data_param"), | ||
"b27-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_ref_data_param"), | ||
"b27-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_ref_data_param"), | ||
|
||
# block code 31 ----------------------------------------------------------------------------- | ||
# data parameters (metadata) when spectra (normalized single channels) saved in apparent absorbance | ||
"b31-c16-t0-a64" = c(read_class = "parameter", block_type_name = "ab_no_atm_comp_data_param"), | ||
"b31-c16-t0-a0" = c(read_class = "parameter", block_type_name = "ab_data_param"), | ||
|
||
# data parameters (metadata) when spectra (normalized single channels) saved in reflectance | ||
"b31-c48-t0-a64" = c(read_class = "parameter", block_type_name = "refl_no_atm_comp_data_param"), | ||
"b31-c48-t0-a0" = c(read_class = "parameter", block_type_name = "refl_data_param"), | ||
|
||
# data parameters (metadata) for spectra matching | ||
"b31-c88-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_data_param"), | ||
"b31-c216-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_2_chn_data_param"), | ||
|
||
## General metadata blocks | ||
|
||
# block code 32 ----------------------------------------------------------------------------- | ||
"b32-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument"), | ||
|
||
# block code 40 ----------------------------------------------------------------------------- | ||
"b40-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument_ref"), | ||
|
||
# block code 48 ----------------------------------------------------------------------------- | ||
"b48-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition"), | ||
|
||
# block code 56 ----------------------------------------------------------------------------- | ||
"b56-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition_ref"), | ||
|
||
# block code 64 ----------------------------------------------------------------------------- | ||
"b64-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation"), | ||
|
||
# block code 72 ----------------------------------------------------------------------------- | ||
"b72-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation_ref"), | ||
|
||
# block code 96 ----------------------------------------------------------------------------- | ||
"b96-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics"), | ||
|
||
# block code 104 ----------------------------------------------------------------------------- | ||
"b104-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics_ref"), | ||
|
||
# block code 160 ----------------------------------------------------------------------------- | ||
"b160-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sample"), | ||
|
||
# block code 176 ----------------------------------------------------------------------------- | ||
"b176-c0-t0-a64" = c(read_class = "parameter", block_type_name = "lab_and_process_param_raw"), | ||
"b176-c0-t0-a0" = c(read_class = "parameter", block_type_name = "lab_and_process_param_processed") | ||
) | ||
|
||
# nolint end | ||
|
||
# check key names for matching composite key | ||
key_names <- names(key_value_map) | ||
is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key))) | ||
key_value_match <- key_value_map[is_match] | ||
nm_matches <- names(key_value_match) | ||
# because of "b0-c0-t\\d+-a(0|64)" regex | ||
is_match_guess <- grepl("\\\\d\\+", nm_matches) | ||
|
||
if (length(key_value_match) == 1L) { | ||
if (any(is_match_guess)) { | ||
message(paste( | ||
"Guessing header entry for block type 0 to be text information:\n", | ||
"* Composite key :=", composite_key | ||
)) | ||
} | ||
} else if (length(key_value_match) > 1L) { | ||
# in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)") | ||
# has to be removed | ||
key_value_match[is_match_guess] <- NULL | ||
} else if (length(key_value_match) == 0L) { | ||
# inform about details and what to do for improving {opusreader2} | ||
stop_proactively(composite_key) | ||
} | ||
|
||
key_value_match_vec <- key_value_match[[1]] | ||
read_class <- unname(key_value_match_vec["read_class"]) | ||
block_type_name <- unname(key_value_match_vec["block_type_name"]) | ||
|
||
# create a dataset | ||
ds <- structure( | ||
c(data_list, list(block_type_name = block_type_name)), | ||
c(header_data, list(block_type_name = block_type_name)), | ||
class = c(read_class) | ||
) | ||
|
||
return(ds) | ||
} | ||
|
||
stop_proactively <- function(composite_key) { | ||
stop( | ||
paste( | ||
"Unknown header entry.\n The following 'composite key' is not yet", | ||
"mapped in the {opusreader2} key-value map of the header:\n", | ||
"*", paste0('"', composite_key, '"'), "\nWe encourage your contribution", | ||
"to feature this new OPUS block by opening a new issue on | ||
https://github.com/spectral-cockpit/opusreader2/issues", | ||
"\nPlease\n", | ||
"1. report reproducibly, using short code with {opusreader2} | ||
(recommended: https://reprex.tidyverse.org)", "\n", | ||
"2. describe briefly\n", | ||
" a) Bruker instrument used\n", | ||
" b) equipment\n", | ||
" c) measurement mode and spectral blocks saved (OPUS settings)\n", | ||
" d) OPUS software version\n", | ||
" e) your general workflow for spectroscopic diagnostics\n", | ||
"3. provide an example OPUS binary file uploaded for public access\n", | ||
" on GitHub (best in issue)\n", | ||
"4. to facilitate widespread support of Bruker devices in open source\n", | ||
" based infrastructure, show the data blocks as print screens in the\n", | ||
" Bruker OPUS software (right-click in Viewer). Please upload the\n", | ||
" contents of all OPUS blocks in individual screenshots." | ||
), | ||
call. = FALSE | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.