From 6184235accd290c26104c3005cadb4e4b427f310 Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 16 May 2024 18:09:01 +0200 Subject: [PATCH] :bug: regression: merging branches lead to name mismatch - in case of no categorical variables, the error surfaced: input_config has to be used! --- src/move/tasks/encode_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/move/tasks/encode_data.py b/src/move/tasks/encode_data.py index bd9936ed..f243518b 100644 --- a/src/move/tasks/encode_data.py +++ b/src/move/tasks/encode_data.py @@ -32,6 +32,7 @@ def encode_data(config: DataConfig): mappings = {} for dataset_name in config.categorical_names: + # ! the string representation seems to be the variable value (here a string) logger.info(f"Encoding '{dataset_name}'") filepath = raw_data_path / f"{dataset_name}.tsv" names, values = io.read_tsv(filepath, sample_names) @@ -45,8 +46,9 @@ def encode_data(config: DataConfig): for input_config in config.continuous_inputs: scale = not hasattr(input_config, "scale") or input_config.scale action_name = "Encoding" if scale else "Reading" - logger.info(f"{action_name} '{input_config.name}'") - filepath = raw_data_path / f"{input_config.name}.tsv" + dataset_name = input_config.name + logger.info(f"{action_name} '{dataset_name}'") + filepath = raw_data_path / f"{dataset_name}.tsv" names, values = io.read_tsv(filepath, sample_names) # Plotting the value distribution for all continuous datasets before preprocessing: