Skip to content

Commit

Permalink
Merge pull request #61 from spectral-cockpit/opt-55-simplify-create-d…
Browse files Browse the repository at this point in the history
…ataset

New key-value mapping to simplify header decoding (& fix CI code check)
  • Loading branch information
philipp-baumann authored Jan 2, 2023
2 parents ecd4d5f + dfa29b4 commit c4be741
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 180 deletions.
4 changes: 3 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ linters: linters_with_defaults(
object_name_linter(c("snake_case", "SNAKE_CASE")),
commented_code_linter = NULL
)
exclusions: list()
exclusions: list(
"R/create_dataset.R"
)
encoding: "UTF-8"
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ repos:
hooks:
- id: style-files
args: [--style_fun=tidyverse_style]
exclude: .R/create_dataset.R
- id: roxygenize
additional_dependencies:
- git2r
Expand Down Expand Up @@ -47,6 +48,7 @@ repos:
data/.*|
)$
- id: lintr
exclude: .R/create_dataset.R
- id: readme-rmd-rendered
- id: parsable-R
- id: no-browser-statement
Expand Down
344 changes: 175 additions & 169 deletions R/create_dataset.R
Original file line number Diff line number Diff line change
@@ -1,180 +1,186 @@
#' assign the classes to the dataset list
#' @param data_list list of the header information for each block
create_dataset <- function(data_list) {
block_type <- data_list$block_type
text_type <- data_list$text_type
channel_type <- data_list$channel_type
additional_type <- data_list$additional_type

# select read_class and block_type_name
# TODO: if there is a simpler method, happy to change!
if (block_type == 0) {
read_class <- "text"


if (text_type == 8) {
read_class <- "parameter"
block_type_name <- "info_block"
} else if (text_type == 104) {
block_type_name <- "history"
} else if (text_type == 152) {
block_type_name <- "curve_fit"
} else if (text_type == 168) {
block_type_name <- "signature"
} else if (text_type == 240) {
block_type_name <- "integration_method"
} else {
block_type_name <- "text_information"
}
} else if (block_type == 7) {
read_class <- "data"

if (channel_type == 4) {
block_type_name <- "sc_sample"
} else if (channel_type == 8) {
block_type_name <- "ig_sample"
} else if (channel_type == 12) {
block_type_name <- "ph_sample"
}
} else if (block_type == 11) {
read_class <- "data"

if (channel_type == 4) {
block_type_name <- "sc_ref"
} else if (channel_type == 8) {
block_type_name <- "ig_ref"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 15) {
if (channel_type == 16) {
if (text_type == 112) {
read_class <- "parameter"
block_type_name <- "quant_report_ab"
} else if (text_type == 104) {
read_class <- "parameter"
block_type_name <- "me_test_report_ab"
} else {
read_class <- "data"
if (additional_type == 64) {
block_type_name <- "ab_no_atm_comp"
} else if (additional_type == 0) {
block_type_name <- "ab"
} else {
stop("There is a new block type, open an issue.")
}
}
} else if (channel_type == 48) {
if (text_type == 112) {
read_class <- "parameter"
block_type_name <- "quant_report_refl"
} else if (text_type == 104) {
read_class <- "parameter"
block_type_name <- "me_test_report_refl"
} else {
read_class <- "data"
if (additional_type == 64) {
block_type_name <- "refl_no_atm_comp"
} else if (additional_type == 0) {
block_type_name <- "refl"
} else {
stop("There is a new block type, open an issue.")
}
}
} else if (channel_type == 88) {
read_class <- "data"
block_type_name <- "match"
} else if (channel_type == 216) {
read_class <- "data"
block_type_name <- "match_2_chn"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 23) {
read_class <- "parameter"

if (channel_type == 4) {
block_type_name <- "sc_sample_data_param"
} else if (channel_type == 8) {
block_type_name <- "ig_sample_data_param"
} else if (channel_type == 12) {
block_type_name <- "ph_sample_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 27) {
read_class <- "parameter"

if (channel_type == 4) {
block_type_name <- "sc_ref_data_param"
} else if (channel_type == 8) {
block_type_name <- "ig_ref_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else {
read_class <- "parameter"

if (block_type == 31) {
if (channel_type == 16) {
if (additional_type == 64) {
block_type_name <- "ab_no_atm_comp_data_param"
} else if (additional_type == 0) {
block_type_name <- "ab_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (channel_type == 48) {
if (additional_type == 64) {
block_type_name <- "refl_no_atm_comp_data_param"
} else if (additional_type == 0) {
block_type_name <- "refl_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (channel_type == 88) {
block_type_name <- "match_data_param"
} else if (channel_type == 216) {
block_type_name <- "match_2_chn_data_param"
} else {
stop("There is a new block type, open an issue.")
}
} else if (block_type == 32) {
block_type_name <- "instrument"
} else if (block_type == 40) {
block_type_name <- "instrument_ref"
} else if (block_type == 48) {
block_type_name <- "acquisition"
} else if (block_type == 56) {
block_type_name <- "acquisition_ref"
} else if (block_type == 64) {
block_type_name <- "fourier_transformation"
} else if (block_type == 72) {
block_type_name <- "fourier_transformation_ref"
} else if (block_type == 96) {
block_type_name <- "optics"
} else if (block_type == 104) {
block_type_name <- "optics_ref"
} else if (block_type == 160) {
block_type_name <- "sample"
} else if (block_type == 176) {
if (additional_type == 64) {
block_type_name <- "lab_and_process_param_raw"
} else if (additional_type == 0) {
block_type_name <- "lab_and_process_param_processed"
} else {
stop("There is a new block type, open an issue.")
}
} else {
stop("block not known")
#' @param header_data list of the header information for each block
create_dataset <- function(header_data) {
block_type <- as.character(header_data$block_type)
text_type <- as.character(header_data$text_type)
channel_type <- as.character(header_data$channel_type)
additional_type <- as.character(header_data$additional_type)

# define compoosite key for chunk
composite_key <- paste(
paste0("b", block_type), paste0("c", channel_type),
paste0("t", text_type), paste0("a", additional_type),
sep = "-"
)

# nolint start

# keys ("<block>-<text>-<channel>-<additional_type>") :
# values (read_class, block_type_name)
# general mapping rules for codes (keys):
# - additional type = 64 => default, no macro processing
# - additional type = 0 => extra processing, found atmospheric compensation
key_value_map <- list(
# block code 0, channel code 0 ------------------------------------------------------------
# additional information and/or OPUS processing macros
"b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),
"b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"),
"b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"),
"b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"),
"b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"),
"b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"),
# guess general text
"b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),

# block code 7 -----------------------------------------------------------------------------
# spectrum types of sample
"b7-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_sample"),
"b7-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_sample"),
"b7-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_sample"),

# block code 11 ----------------------------------------------------------------------------
# spectrum types of reference (background)
"b11-c4-t0-a(0|64)" = c(read_class = "data", block_type_name = "sc_ref"),
"b11-c8-t0-a(0|64)" = c(read_class = "data", block_type_name = "ig_ref"),
"b11-c12-t0-a(0|64)" = c(read_class = "data", block_type_name = "ph_ref"),
# block code 15 -----------------------------------------------------------------------------
# spectrum report blocs
# channel code 15: save (apparent) absorbance
"b15-c16-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_ab"),
"b15-c16-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_ab"),
"b15-c16-t0-a64" = c(read_class = "data", block_type_name = "ab_no_atm_comp"),
"b15-c16-t0-a0" = c(read_class = "data", block_type_name = "ab"),
# channel code 48: save reflectance (settings
"b15-c48-t112-a0" = c(read_class = "parameter", block_type_name = "quant_report_refl"),
"b15-c48-t104-a1" = c(read_class = "parameter", block_type_name = "me_test_report_refl"), # check "a1"
"b15-c48-t0-a64" = c(read_class = "data", block_type_name = "refl_no_atm_comp"),
"b15-c48-t0-a0" = c(read_class = "data", block_type_name = "refl"),
# channel code 88 and 216: spectra matching
"b15-c88-t0-a(0|64)" = c(read_class = "data", block_type_name = "match"),
"b15-c216-t0-a(0|64)" = c(read_class = "data", block_type_name = "match_2_chn"),

# block code 23 -----------------------------------------------------------------------------
# data parameters (metadata) for spectrum types of sample
"b23-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_sample_data_param"),
"b23-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_sample_data_param"),
"b23-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_sample_data_param"),

# block code 27 -----------------------------------------------------------------------------
# data parameters (metadata) for spectrum types of reference (background)
"b27-c4-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sc_ref_data_param"),
"b27-c8-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ig_ref_data_param"),
"b27-c12-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "ph_ref_data_param"),

# block code 31 -----------------------------------------------------------------------------
# data parameters (metadata) when spectra (normalized single channels) saved in apparent absorbance
"b31-c16-t0-a64" = c(read_class = "parameter", block_type_name = "ab_no_atm_comp_data_param"),
"b31-c16-t0-a0" = c(read_class = "parameter", block_type_name = "ab_data_param"),

# data parameters (metadata) when spectra (normalized single channels) saved in reflectance
"b31-c48-t0-a64" = c(read_class = "parameter", block_type_name = "refl_no_atm_comp_data_param"),
"b31-c48-t0-a0" = c(read_class = "parameter", block_type_name = "refl_data_param"),

# data parameters (metadata) for spectra matching
"b31-c88-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_data_param"),
"b31-c216-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "match_2_chn_data_param"),

## General metadata blocks

# block code 32 -----------------------------------------------------------------------------
"b32-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument"),

# block code 40 -----------------------------------------------------------------------------
"b40-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "instrument_ref"),

# block code 48 -----------------------------------------------------------------------------
"b48-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition"),

# block code 56 -----------------------------------------------------------------------------
"b56-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "acquisition_ref"),

# block code 64 -----------------------------------------------------------------------------
"b64-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation"),

# block code 72 -----------------------------------------------------------------------------
"b72-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "fourier_transformation_ref"),

# block code 96 -----------------------------------------------------------------------------
"b96-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics"),

# block code 104 -----------------------------------------------------------------------------
"b104-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "optics_ref"),

# block code 160 -----------------------------------------------------------------------------
"b160-c0-t0-a(0|64)" = c(read_class = "parameter", block_type_name = "sample"),

# block code 176 -----------------------------------------------------------------------------
"b176-c0-t0-a64" = c(read_class = "parameter", block_type_name = "lab_and_process_param_raw"),
"b176-c0-t0-a0" = c(read_class = "parameter", block_type_name = "lab_and_process_param_processed")
)

# nolint end

# check key names for matching composite key
key_names <- names(key_value_map)
is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key)))
key_value_match <- key_value_map[is_match]
nm_matches <- names(key_value_match)
# because of "b0-c0-t\\d+-a(0|64)" regex
is_match_guess <- grepl("\\\\d\\+", nm_matches)

if (length(key_value_match) == 1L) {
if (any(is_match_guess)) {
message(paste(
"Guessing header entry for block type 0 to be text information:\n",
"* Composite key :=", composite_key
))
}
} else if (length(key_value_match) > 1L) {
# in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)")
# has to be removed
key_value_match[is_match_guess] <- NULL
} else if (length(key_value_match) == 0L) {
# inform about details and what to do for improving {opusreader2}
stop_proactively(composite_key)
}

key_value_match_vec <- key_value_match[[1]]
read_class <- unname(key_value_match_vec["read_class"])
block_type_name <- unname(key_value_match_vec["block_type_name"])

# create a dataset
ds <- structure(
c(data_list, list(block_type_name = block_type_name)),
c(header_data, list(block_type_name = block_type_name)),
class = c(read_class)
)

return(ds)
}

stop_proactively <- function(composite_key) {
stop(
paste(
"Unknown header entry.\n The following 'composite key' is not yet",
"mapped in the {opusreader2} key-value map of the header:\n",
"*", paste0('"', composite_key, '"'), "\nWe encourage your contribution",
"to feature this new OPUS block by opening a new issue on
https://github.com/spectral-cockpit/opusreader2/issues",
"\nPlease\n",
"1. report reproducibly, using short code with {opusreader2}
(recommended: https://reprex.tidyverse.org)", "\n",
"2. describe briefly\n",
" a) Bruker instrument used\n",
" b) equipment\n",
" c) measurement mode and spectral blocks saved (OPUS settings)\n",
" d) OPUS software version\n",
" e) your general workflow for spectroscopic diagnostics\n",
"3. provide an example OPUS binary file uploaded for public access\n",
" on GitHub (best in issue)\n",
"4. to facilitate widespread support of Bruker devices in open source\n",
" based infrastructure, show the data blocks as print screens in the\n",
" Bruker OPUS software (right-click in Viewer). Please upload the\n",
" contents of all OPUS blocks in individual screenshots."
),
call. = FALSE
)
}
4 changes: 2 additions & 2 deletions R/parse_opus.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' data block (`refl`).
#' * **`refl_no_atm_comp`**: class "data" (spectrum; viewer: "Refl").
#' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra
#' (`:= sc_sample_corr / sc_ref_corr`). Note that this element is the
#' (`:= sc_sample / sc_ref`). Note that this element is the
#' untreated spectra before an eventual "atmospheric compensation"
#' routine is applied.
#' * **`refl_data_param`** : class "parameter" (viewer: "Data Parameters Refl").
Expand Down Expand Up @@ -54,7 +54,7 @@
#' data block (spectrum; see `ab` output).
#' * **`ab_no_atm_comp`**: class "data" (spectrum; viewer: "Refl").
#' Unprocessed (raw; i.e, not atmospherically compensated) reflectance spectra
#' (`:= sc_sample_corr / sc_ref_corr`).
#' (`:= sc_sample/ sc_ref`).
#' * **`ab_data_param`** : class "parameter" (viewer: "Data Parameters Refl").
#' Parameter list with metadata for `ab` data block (spectrum; see `ab`).
#' Note that this element only results if "atmospheric compensation" was
Expand Down
Loading

0 comments on commit c4be741

Please sign in to comment.