From d8ca8a5668fb2bfe2df215c298e3816a7df96a2a Mon Sep 17 00:00:00 2001 From: Mostafa Kalhor Date: Mon, 7 Aug 2023 13:49:16 +0000 Subject: [PATCH] rescoring is working but xl_fdr should be added --- spectrum_fundamentals/constants.py | 8 + .../metrics/fragments_ratio.py | 627 +++++++++++++----- spectrum_fundamentals/metrics/metric.py | 19 +- spectrum_fundamentals/metrics/percolator.py | 138 ++-- spectrum_fundamentals/metrics/similarity.py | 304 +++++++-- 5 files changed, 821 insertions(+), 275 deletions(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index b395607..01650e4 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -218,6 +218,14 @@ DOUBLE_CHARGED_MASK = np.tile([0, 1, 0, 0, 1, 0], SEQ_LEN - 1) TRIPLE_CHARGED_MASK = np.tile([0, 0, 1, 0, 0, 1], SEQ_LEN - 1) +B_ION_MASK_XL = np.tile([0, 0, 0, 1, 1, 1], (SEQ_LEN - 1)*2) +Y_ION_MASK_XL = np.tile([1, 1, 1, 0, 0, 0], (SEQ_LEN - 1)*2) +SINGLE_CHARGED_MASK_XL = np.tile([1, 0, 0, 1, 0, 0], (SEQ_LEN - 1)*2) +DOUBLE_CHARGED_MASK_XL = np.tile([0, 1, 0, 0, 1, 0], (SEQ_LEN - 1)*2) +TRIPLE_CHARGED_MASK_XL = np.tile([0, 0, 1, 0, 0, 1], (SEQ_LEN - 1)*2) + + + SHARED_DATA_COLUMNS = ["RAW_FILE", "SCAN_NUMBER"] META_DATA_ONLY_COLUMNS = [ diff --git a/spectrum_fundamentals/metrics/fragments_ratio.py b/spectrum_fundamentals/metrics/fragments_ratio.py index 26a1fa9..a8176f2 100644 --- a/spectrum_fundamentals/metrics/fragments_ratio.py +++ b/spectrum_fundamentals/metrics/fragments_ratio.py @@ -6,6 +6,9 @@ from .. import constants from .metric import Metric +from oktoberfest.utils.config import Config +from oktoberfest.constants_dir import CONFIG_PATH + class ObservationState(enum.IntEnum): @@ -44,10 +47,22 @@ def count_with_ion_mask( """ if ion_mask is None: ion_mask = [] - if len(ion_mask) == 0: - ion_mask = scipy.sparse.csr_matrix(np.ones((174, 1))) + + config = Config() + config.read(CONFIG_PATH) + if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + if len(ion_mask) == 0: + + ion_mask = scipy.sparse.csr_matrix(np.ones((348, 1))) + else: + ion_mask = scipy.sparse.csr_matrix(ion_mask).T + else: - ion_mask = scipy.sparse.csr_matrix(ion_mask).T + if len(ion_mask) == 0: + ion_mask = scipy.sparse.csr_matrix(np.ones((174, 1))) + else: + ion_mask = scipy.sparse.csr_matrix(ion_mask).T + return scipy.sparse.csr_matrix.dot(boolean_array, ion_mask).toarray().flatten() @staticmethod @@ -130,164 +145,458 @@ def get_observation_state( def calc(self): """Adds columns with count, fraction and fraction_predicted features to metrics_val dataframe.""" - mask_observed_valid = FragmentsRatio.get_mask_observed_valid(self.true_intensities) - observed_boolean = FragmentsRatio.make_boolean(self.true_intensities, mask_observed_valid) - predicted_boolean = FragmentsRatio.make_boolean(self.pred_intensities, mask_observed_valid, cutoff=0.05) - observation_state = FragmentsRatio.get_observation_state( - observed_boolean, predicted_boolean, mask_observed_valid - ) - valid_ions = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid)) - valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid, constants.B_ION_MASK)) - valid_ions_y = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid, constants.Y_ION_MASK)) - - # counting metrics - self.metrics_val["count_predicted"] = FragmentsRatio.count_with_ion_mask(predicted_boolean) - self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask( - predicted_boolean, constants.B_ION_MASK - ) - self.metrics_val["count_predicted_y"] = FragmentsRatio.count_with_ion_mask( - predicted_boolean, constants.Y_ION_MASK - ) - - self.metrics_val["count_observed"] = FragmentsRatio.count_with_ion_mask(observed_boolean) - self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask( - observed_boolean, constants.B_ION_MASK - ) - self.metrics_val["count_observed_y"] = FragmentsRatio.count_with_ion_mask( - observed_boolean, constants.Y_ION_MASK - ) - - self.metrics_val["count_observed_and_predicted"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_AND_PRED - ) - self.metrics_val["count_observed_and_predicted_b"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_AND_PRED, constants.B_ION_MASK - ) - self.metrics_val["count_observed_and_predicted_y"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK - ) - - self.metrics_val["count_not_observed_and_not_predicted"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_AND_NOT_PRED - ) - self.metrics_val["count_not_observed_and_not_predicted_b"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK - ) - self.metrics_val["count_not_observed_and_not_predicted_y"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK - ) - - self.metrics_val["count_observed_but_not_predicted"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_BUT_NOT_PRED - ) - self.metrics_val["count_observed_but_not_predicted_b"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK - ) - self.metrics_val["count_observed_but_not_predicted_y"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK - ) - - self.metrics_val["count_not_observed_but_predicted"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_BUT_PRED - ) - self.metrics_val["count_not_observed_but_predicted_b"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK - ) - self.metrics_val["count_not_observed_but_predicted_y"] = FragmentsRatio.count_observation_states( - observation_state, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK - ) - - # fractional count metrics - self.metrics_val["fraction_predicted"] = self.metrics_val["count_predicted"].values / valid_ions - self.metrics_val["fraction_predicted_b"] = self.metrics_val["count_predicted_b"] / valid_ions_b - self.metrics_val["fraction_predicted_y"] = self.metrics_val["count_predicted_y"] / valid_ions_y - - self.metrics_val["fraction_observed"] = self.metrics_val["count_observed"] / valid_ions - self.metrics_val["fraction_observed_b"] = self.metrics_val["count_observed_b"] / valid_ions_b - self.metrics_val["fraction_observed_y"] = self.metrics_val["count_observed_y"] / valid_ions_y - - self.metrics_val["fraction_observed_and_predicted"] = ( - self.metrics_val["count_observed_and_predicted"] / valid_ions - ) - self.metrics_val["fraction_observed_and_predicted_b"] = ( - self.metrics_val["count_observed_and_predicted_b"] / valid_ions_b - ) - self.metrics_val["fraction_observed_and_predicted_y"] = ( - self.metrics_val["count_observed_and_predicted_y"] / valid_ions_y - ) - - self.metrics_val["fraction_not_observed_and_not_predicted"] = ( - self.metrics_val["count_not_observed_and_not_predicted"] / valid_ions - ) - self.metrics_val["fraction_not_observed_and_not_predicted_b"] = ( - self.metrics_val["count_not_observed_and_not_predicted_b"] / valid_ions_b - ) - self.metrics_val["fraction_not_observed_and_not_predicted_y"] = ( - self.metrics_val["count_not_observed_and_not_predicted_y"] / valid_ions_y - ) - - self.metrics_val["fraction_observed_but_not_predicted"] = ( - self.metrics_val["count_observed_but_not_predicted"] / valid_ions - ) - self.metrics_val["fraction_observed_but_not_predicted_b"] = ( - self.metrics_val["count_observed_but_not_predicted_b"] / valid_ions_b - ) - self.metrics_val["fraction_observed_but_not_predicted_y"] = ( - self.metrics_val["count_observed_but_not_predicted_y"] / valid_ions_y - ) - - self.metrics_val["fraction_not_observed_but_predicted"] = ( - self.metrics_val["count_not_observed_but_predicted"] / valid_ions - ) - self.metrics_val["fraction_not_observed_but_predicted_b"] = ( - self.metrics_val["count_not_observed_but_predicted_b"] / valid_ions_b - ) - self.metrics_val["fraction_not_observed_but_predicted_y"] = ( - self.metrics_val["count_not_observed_but_predicted_y"] / valid_ions_y - ) + config = Config() + config.read(CONFIG_PATH) + if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + true_intensities_a = self.true_intensities[:,0:348] + true_intensities_b = self.true_intensities[:,348:] + pred_intensities_a = self.pred_intensities[:,0:348] + pred_intensities_b = self.pred_intensities[:,348:] + mask_observed_valid_a = FragmentsRatio.get_mask_observed_valid(true_intensities_a) + mask_observed_valid_b = FragmentsRatio.get_mask_observed_valid(true_intensities_b) + observed_boolean_a = FragmentsRatio.make_boolean(true_intensities_a, mask_observed_valid_a) + observed_boolean_b = FragmentsRatio.make_boolean(true_intensities_b, mask_observed_valid_b) + predicted_boolean_a = FragmentsRatio.make_boolean(pred_intensities_a, mask_observed_valid_a, cutoff=0.05) + predicted_boolean_b = FragmentsRatio.make_boolean(pred_intensities_b, mask_observed_valid_b, cutoff=0.05) + observation_state_a = FragmentsRatio.get_observation_state( + observed_boolean_a, predicted_boolean_a, mask_observed_valid_a + ) + observation_state_b = FragmentsRatio.get_observation_state( + observed_boolean_b, predicted_boolean_b, mask_observed_valid_b + ) + valid_ions_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a)) + valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b)) + valid_ions_b_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.B_ION_MASK_XL)) + valid_ions_b_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.B_ION_MASK_XL)) + valid_ions_y_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.Y_ION_MASK_XL)) + valid_ions_y_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.Y_ION_MASK_XL)) + # counting metrics + self.metrics_val["count_predicted_a"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_a) + self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_b) + self.metrics_val["count_predicted_b_a"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean_a, constants.B_ION_MASK_XL + ) + self.metrics_val["count_predicted_b_b"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean_b, constants.B_ION_MASK_XL + ) + self.metrics_val["count_predicted_y_a"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean_a, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_predicted_y_b"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean_b, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_a"] = FragmentsRatio.count_with_ion_mask(observed_boolean_a) + self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask(observed_boolean_b) + self.metrics_val["count_observed_b_a"] = FragmentsRatio.count_with_ion_mask( + observed_boolean_a, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_b_b"] = FragmentsRatio.count_with_ion_mask( + observed_boolean_b, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_y_a"] = FragmentsRatio.count_with_ion_mask( + observed_boolean_a, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_y_b"] = FragmentsRatio.count_with_ion_mask( + observed_boolean_b, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_and_predicted_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_AND_PRED + ) + self.metrics_val["count_observed_and_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_AND_PRED + ) + self.metrics_val["count_observed_and_predicted_b_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_and_predicted_b_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_and_predicted_y_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_and_predicted_y_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_not_observed_and_not_predicted_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED + ) + self.metrics_val["count_not_observed_and_not_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED + ) + self.metrics_val["count_not_observed_and_not_predicted_b_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_and_not_predicted_b_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_and_not_predicted_y_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_not_observed_and_not_predicted_y_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_but_not_predicted_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_BUT_NOT_PRED + ) + self.metrics_val["count_observed_but_not_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_BUT_NOT_PRED + ) + self.metrics_val["count_observed_but_not_predicted_b_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_but_not_predicted_b_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_but_not_predicted_y_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_observed_but_not_predicted_y_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_not_observed_but_predicted_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_BUT_PRED + ) + self.metrics_val["count_not_observed_but_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_BUT_PRED + ) + self.metrics_val["count_not_observed_but_predicted_b_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_but_predicted_b_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_but_predicted_y_a"] = FragmentsRatio.count_observation_states( + observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL + ) + self.metrics_val["count_not_observed_but_predicted_y_b"] = FragmentsRatio.count_observation_states( + observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL + ) + # fractional count metrics + self.metrics_val["fraction_predicted_a"] = self.metrics_val["count_predicted_a"].values / valid_ions_a + self.metrics_val["fraction_predicted_b"] = self.metrics_val["count_predicted_b"].values / valid_ions_b + self.metrics_val["fraction_predicted_b_a"] = self.metrics_val["count_predicted_b_a"] / valid_ions_b_a + self.metrics_val["fraction_predicted_b_b"] = self.metrics_val["count_predicted_b_b"] / valid_ions_b_b + self.metrics_val["fraction_predicted_y_a"] = self.metrics_val["count_predicted_y_a"] / valid_ions_y_a + self.metrics_val["fraction_predicted_y_b"] = self.metrics_val["count_predicted_y_b"] / valid_ions_y_b + self.metrics_val["fraction_observed_a"] = self.metrics_val["count_observed_a"] / valid_ions_a + self.metrics_val["fraction_observed_b"] = self.metrics_val["count_observed_b"] / valid_ions_b + self.metrics_val["fraction_observed_b_a"] = self.metrics_val["count_observed_b_a"] / valid_ions_b_a + self.metrics_val["fraction_observed_b_b"] = self.metrics_val["count_observed_b_b"] / valid_ions_b_b + self.metrics_val["fraction_observed_y_a"] = self.metrics_val["count_observed_y_a"] / valid_ions_y_a + self.metrics_val["fraction_observed_y_b"] = self.metrics_val["count_observed_y_b"] / valid_ions_y_b + self.metrics_val["fraction_observed_and_predicted_a"] = ( + self.metrics_val["count_observed_and_predicted_a"] / valid_ions_a + ) + self.metrics_val["fraction_observed_and_predicted_b"] = ( + self.metrics_val["count_observed_and_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_observed_and_predicted_b_a"] = ( + self.metrics_val["count_observed_and_predicted_b_a"] / valid_ions_b_a + ) + self.metrics_val["fraction_observed_and_predicted_b_b"] = ( + self.metrics_val["count_observed_and_predicted_b_b"] / valid_ions_b_b + ) + self.metrics_val["fraction_observed_and_predicted_y_a"] = ( + self.metrics_val["count_observed_and_predicted_y_a"] / valid_ions_y_a + ) + self.metrics_val["fraction_observed_and_predicted_y_b"] = ( + self.metrics_val["count_observed_and_predicted_y_b"] / valid_ions_y_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_a"] / valid_ions_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b_a"] / valid_ions_b_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b_b"] / valid_ions_b_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y_a"] / valid_ions_y_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y_b"] / valid_ions_y_b + ) + self.metrics_val["fraction_observed_but_not_predicted_a"] = ( + self.metrics_val["count_observed_but_not_predicted_a"] / valid_ions_a + ) + self.metrics_val["fraction_observed_but_not_predicted_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_observed_but_not_predicted_b_a"] = ( + self.metrics_val["count_observed_but_not_predicted_b_a"] / valid_ions_b_a + ) + self.metrics_val["fraction_observed_but_not_predicted_b_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b_b"] / valid_ions_b_b + ) + self.metrics_val["fraction_observed_but_not_predicted_y_a"] = ( + self.metrics_val["count_observed_but_not_predicted_y_a"] / valid_ions_y_a + ) + self.metrics_val["fraction_observed_but_not_predicted_y_b"] = ( + self.metrics_val["count_observed_but_not_predicted_y_b"] / valid_ions_y_b + ) + self.metrics_val["fraction_not_observed_but_predicted_a"] = ( + self.metrics_val["count_not_observed_but_predicted_a"] / valid_ions_a + ) + self.metrics_val["fraction_not_observed_but_predicted_b"] = ( + self.metrics_val["count_not_observed_but_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_not_observed_but_predicted_b_a"] = ( + self.metrics_val["count_not_observed_but_predicted_b_a"] / valid_ions_b_a + ) + self.metrics_val["fraction_not_observed_but_predicted_b_b"] = ( + self.metrics_val["count_not_observed_but_predicted_b_b"] / valid_ions_b_b + ) + self.metrics_val["fraction_not_observed_but_predicted_y_a"] = ( + self.metrics_val["count_not_observed_but_predicted_y_a"] / valid_ions_y_a + ) + self.metrics_val["fraction_not_observed_but_predicted_y_b"] = ( + self.metrics_val["count_not_observed_but_predicted_y_b"] / valid_ions_y_b + ) + # fractional count metrics relative to predictions + num_predicted_ions_a = np.maximum(1, self.metrics_val["count_predicted_a"]) + num_predicted_ions_b = np.maximum(1, self.metrics_val["count_predicted_b"]) + num_predicted_ions_b_a = np.maximum(1, self.metrics_val["count_predicted_b_a"]) + num_predicted_ions_b_b = np.maximum(1, self.metrics_val["count_predicted_b_b"]) + num_predicted_ions_y_a = np.maximum(1, self.metrics_val["count_predicted_y_a"]) + num_predicted_ions_y_b = np.maximum(1, self.metrics_val["count_predicted_y_b"]) + self.metrics_val["fraction_observed_and_predicted_vs_predicted_a"] = ( + self.metrics_val["count_observed_and_predicted_a"] / num_predicted_ions_a + ) + self.metrics_val["fraction_observed_and_predicted_vs_predicted_b"] = ( + self.metrics_val["count_observed_and_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_observed_and_predicted_b_vs_predicted_b_a"] = ( + self.metrics_val["count_observed_and_predicted_b_a"] / num_predicted_ions_b_a + ) + self.metrics_val["fraction_observed_and_predicted_b_vs_predicted_b_b"] = ( + self.metrics_val["count_observed_and_predicted_b_b"] / num_predicted_ions_b_b + ) + self.metrics_val["fraction_observed_and_predicted_y_vs_predicted_y_a"] = ( + self.metrics_val["count_observed_and_predicted_y_a"] / num_predicted_ions_y_a + ) + self.metrics_val["fraction_observed_and_predicted_y_vs_predicted_y_b"] = ( + self.metrics_val["count_observed_and_predicted_y_b"] / num_predicted_ions_y_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_vs_predicted_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_a"] / num_predicted_ions_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_vs_predicted_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b_vs_predicted_b_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b_a"] / num_predicted_ions_b_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b_vs_predicted_b_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b_b"] / num_predicted_ions_b_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y_vs_predicted_y_a"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y_a"] / num_predicted_ions_y_a + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y_vs_predicted_y_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y_b"] / num_predicted_ions_y_b + ) + self.metrics_val["fraction_observed_but_not_predicted_vs_predicted_a"] = ( + self.metrics_val["count_observed_but_not_predicted_a"] / num_predicted_ions_a + ) + self.metrics_val["fraction_observed_but_not_predicted_vs_predicted_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_observed_but_not_predicted_b_vs_predicted_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_observed_but_not_predicted_b_a_vs_predicted_b_a"] = ( + self.metrics_val["count_observed_but_not_predicted_b_a"] / num_predicted_ions_b_a + ) + self.metrics_val["fraction_observed_but_not_predicted_y_b_vs_predicted_y_b"] = ( + self.metrics_val["count_observed_but_not_predicted_y_b"] / num_predicted_ions_y_b + ) + # not needed, as these are simply (1 - fraction_observed_and_predicted_vs_predicted) + self.metrics_val["fraction_not_observed_but_predicted_vs_predicted_a"] = ( + self.metrics_val["count_not_observed_but_predicted_a"] / num_predicted_ions_a + ) + self.metrics_val["fraction_not_observed_but_predicted_vs_predicted_b"] = ( + self.metrics_val["count_not_observed_but_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_not_observed_but_predicted_b_a_vs_predicted_a"] = ( + self.metrics_val["count_not_observed_but_predicted_b_a"] / num_predicted_ions_b_a + ) + self.metrics_val["fraction_not_observed_but_predicted_b_b_vs_predicted_b"] = ( + self.metrics_val["count_not_observed_but_predicted_b_b"] / num_predicted_ions_b_b + ) + self.metrics_val["fraction_not_observed_but_predicted_y_a_vs_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted_y_a"] / num_predicted_ions_y_a + ) + self.metrics_val["fraction_not_observed_but_predicted_y_b_vs_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted_y_b"] / num_predicted_ions_y_b + ) + else: + mask_observed_valid = FragmentsRatio.get_mask_observed_valid(self.true_intensities) + observed_boolean = FragmentsRatio.make_boolean(self.true_intensities, mask_observed_valid) + predicted_boolean = FragmentsRatio.make_boolean(self.pred_intensities, mask_observed_valid, cutoff=0.05) + observation_state = FragmentsRatio.get_observation_state( + observed_boolean, predicted_boolean, mask_observed_valid + ) + valid_ions = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid)) + valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid, constants.B_ION_MASK_XL)) + valid_ions_y = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid, constants.Y_ION_MASK_XL)) + + # counting metrics + self.metrics_val["count_predicted"] = FragmentsRatio.count_with_ion_mask(predicted_boolean) + self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean, constants.B_ION_MASK_XL + ) + self.metrics_val["count_predicted_y"] = FragmentsRatio.count_with_ion_mask( + predicted_boolean, constants.Y_ION_MASK_XL + ) + + self.metrics_val["count_observed"] = FragmentsRatio.count_with_ion_mask(observed_boolean) + self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask( + observed_boolean, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_y"] = FragmentsRatio.count_with_ion_mask( + observed_boolean, constants.Y_ION_MASK_XL + ) + + self.metrics_val["count_observed_and_predicted"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_AND_PRED + ) + self.metrics_val["count_observed_and_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_and_predicted_y"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL + ) + + self.metrics_val["count_not_observed_and_not_predicted"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_AND_NOT_PRED + ) + self.metrics_val["count_not_observed_and_not_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_and_not_predicted_y"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL + ) + + self.metrics_val["count_observed_but_not_predicted"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_BUT_NOT_PRED + ) + self.metrics_val["count_observed_but_not_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_observed_but_not_predicted_y"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL + ) + + self.metrics_val["count_not_observed_but_predicted"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_BUT_PRED + ) + self.metrics_val["count_not_observed_but_predicted_b"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL + ) + self.metrics_val["count_not_observed_but_predicted_y"] = FragmentsRatio.count_observation_states( + observation_state, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL + ) + + # fractional count metrics + self.metrics_val["fraction_predicted"] = self.metrics_val["count_predicted"].values / valid_ions + self.metrics_val["fraction_predicted_b"] = self.metrics_val["count_predicted_b"] / valid_ions_b + self.metrics_val["fraction_predicted_y"] = self.metrics_val["count_predicted_y"] / valid_ions_y + + self.metrics_val["fraction_observed"] = self.metrics_val["count_observed"] / valid_ions + self.metrics_val["fraction_observed_b"] = self.metrics_val["count_observed_b"] / valid_ions_b + self.metrics_val["fraction_observed_y"] = self.metrics_val["count_observed_y"] / valid_ions_y + + self.metrics_val["fraction_observed_and_predicted"] = ( + self.metrics_val["count_observed_and_predicted"] / valid_ions + ) + self.metrics_val["fraction_observed_and_predicted_b"] = ( + self.metrics_val["count_observed_and_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_observed_and_predicted_y"] = ( + self.metrics_val["count_observed_and_predicted_y"] / valid_ions_y + ) + + self.metrics_val["fraction_not_observed_and_not_predicted"] = ( + self.metrics_val["count_not_observed_and_not_predicted"] / valid_ions + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y"] / valid_ions_y + ) + + self.metrics_val["fraction_observed_but_not_predicted"] = ( + self.metrics_val["count_observed_but_not_predicted"] / valid_ions + ) + self.metrics_val["fraction_observed_but_not_predicted_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_observed_but_not_predicted_y"] = ( + self.metrics_val["count_observed_but_not_predicted_y"] / valid_ions_y + ) + + self.metrics_val["fraction_not_observed_but_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted"] / valid_ions + ) + self.metrics_val["fraction_not_observed_but_predicted_b"] = ( + self.metrics_val["count_not_observed_but_predicted_b"] / valid_ions_b + ) + self.metrics_val["fraction_not_observed_but_predicted_y"] = ( + self.metrics_val["count_not_observed_but_predicted_y"] / valid_ions_y + ) + + # fractional count metrics relative to predictions + num_predicted_ions = np.maximum(1, self.metrics_val["count_predicted"]) + num_predicted_ions_b = np.maximum(1, self.metrics_val["count_predicted_b"]) + num_predicted_ions_y = np.maximum(1, self.metrics_val["count_predicted_y"]) + + self.metrics_val["fraction_observed_and_predicted_vs_predicted"] = ( + self.metrics_val["count_observed_and_predicted"] / num_predicted_ions + ) + self.metrics_val["fraction_observed_and_predicted_b_vs_predicted_b"] = ( + self.metrics_val["count_observed_and_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_observed_and_predicted_y_vs_predicted_y"] = ( + self.metrics_val["count_observed_and_predicted_y"] / num_predicted_ions_y + ) + + self.metrics_val["fraction_not_observed_and_not_predicted_vs_predicted"] = ( + self.metrics_val["count_not_observed_and_not_predicted"] / num_predicted_ions + ) + self.metrics_val["fraction_not_observed_and_not_predicted_b_vs_predicted_b"] = ( + self.metrics_val["count_not_observed_and_not_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_not_observed_and_not_predicted_y_vs_predicted_y"] = ( + self.metrics_val["count_not_observed_and_not_predicted_y"] / num_predicted_ions_y + ) + + self.metrics_val["fraction_observed_but_not_predicted_vs_predicted"] = ( + self.metrics_val["count_observed_but_not_predicted"] / num_predicted_ions + ) + self.metrics_val["fraction_observed_but_not_predicted_b_vs_predicted_b"] = ( + self.metrics_val["count_observed_but_not_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_observed_but_not_predicted_y_vs_predicted_y"] = ( + self.metrics_val["count_observed_but_not_predicted_y"] / num_predicted_ions_y + ) + + # not needed, as these are simply (1 - fraction_observed_and_predicted_vs_predicted) + self.metrics_val["fraction_not_observed_but_predicted_vs_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted"] / num_predicted_ions + ) + self.metrics_val["fraction_not_observed_but_predicted_b_vs_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted_b"] / num_predicted_ions_b + ) + self.metrics_val["fraction_not_observed_but_predicted_y_vs_predicted"] = ( + self.metrics_val["count_not_observed_but_predicted_y"] / num_predicted_ions_y + ) - # fractional count metrics relative to predictions - num_predicted_ions = np.maximum(1, self.metrics_val["count_predicted"]) - num_predicted_ions_b = np.maximum(1, self.metrics_val["count_predicted_b"]) - num_predicted_ions_y = np.maximum(1, self.metrics_val["count_predicted_y"]) - self.metrics_val["fraction_observed_and_predicted_vs_predicted"] = ( - self.metrics_val["count_observed_and_predicted"] / num_predicted_ions - ) - self.metrics_val["fraction_observed_and_predicted_b_vs_predicted_b"] = ( - self.metrics_val["count_observed_and_predicted_b"] / num_predicted_ions_b - ) - self.metrics_val["fraction_observed_and_predicted_y_vs_predicted_y"] = ( - self.metrics_val["count_observed_and_predicted_y"] / num_predicted_ions_y - ) - self.metrics_val["fraction_not_observed_and_not_predicted_vs_predicted"] = ( - self.metrics_val["count_not_observed_and_not_predicted"] / num_predicted_ions - ) - self.metrics_val["fraction_not_observed_and_not_predicted_b_vs_predicted_b"] = ( - self.metrics_val["count_not_observed_and_not_predicted_b"] / num_predicted_ions_b - ) - self.metrics_val["fraction_not_observed_and_not_predicted_y_vs_predicted_y"] = ( - self.metrics_val["count_not_observed_and_not_predicted_y"] / num_predicted_ions_y - ) - self.metrics_val["fraction_observed_but_not_predicted_vs_predicted"] = ( - self.metrics_val["count_observed_but_not_predicted"] / num_predicted_ions - ) - self.metrics_val["fraction_observed_but_not_predicted_b_vs_predicted_b"] = ( - self.metrics_val["count_observed_but_not_predicted_b"] / num_predicted_ions_b - ) - self.metrics_val["fraction_observed_but_not_predicted_y_vs_predicted_y"] = ( - self.metrics_val["count_observed_but_not_predicted_y"] / num_predicted_ions_y - ) - # not needed, as these are simply (1 - fraction_observed_and_predicted_vs_predicted) - self.metrics_val["fraction_not_observed_but_predicted_vs_predicted"] = ( - self.metrics_val["count_not_observed_but_predicted"] / num_predicted_ions - ) - self.metrics_val["fraction_not_observed_but_predicted_b_vs_predicted"] = ( - self.metrics_val["count_not_observed_but_predicted_b"] / num_predicted_ions_b - ) - self.metrics_val["fraction_not_observed_but_predicted_y_vs_predicted"] = ( - self.metrics_val["count_not_observed_but_predicted_y"] / num_predicted_ions_y - ) diff --git a/spectrum_fundamentals/metrics/metric.py b/spectrum_fundamentals/metrics/metric.py index 5d34d4d..5bfae90 100644 --- a/spectrum_fundamentals/metrics/metric.py +++ b/spectrum_fundamentals/metrics/metric.py @@ -12,14 +12,25 @@ class Metric: # check https://gitlab.lrz.de/proteomics/prosit_tools/oktoberfest/-/blob/develop/oktoberfest/rescoring/annotate.R # for all metrics pred_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists + #pred_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists + #pred_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists true_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists + #true_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists + #true_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] # list of lists metrics_val: pd.DataFrame def __init__( self, pred_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #pred_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #pred_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, true_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #true_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #true_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, mz: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #mz_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #mz_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + ): """ Initialize a Metric object. @@ -29,8 +40,14 @@ def __init__( :param mz: observed mz values """ self.pred_intensities = pred_intensities + #self.pred_intensities_a = pred_intensities_a + #self.pred_intensities_b = pred_intensities_b self.true_intensities = true_intensities + #self.true_intensities_a = true_intensities_a + #self.true_intensities_b = true_intensities_b self.mz = mz + #self.mz_a = mz_a + #self.mz_b = mz_b self.metrics_val = pd.DataFrame() @abstractmethod @@ -40,4 +57,4 @@ def calc(self, all_features: bool): def write_to_file(self, file_path: str): """Write to file_path.""" - self.metrics_val.to_csv(file_path, sep="\t", index=False) + self.metrics_val.to_csv(file_path, sep="\t", index=False) \ No newline at end of file diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index e36cb09..e3bcf35 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -1,8 +1,10 @@ import enum import hashlib import logging +import re +import subprocess from typing import Optional, Tuple, Union - +from pathlib import Path import numpy as np import pandas as pd import scipy.optimize as opt @@ -10,10 +12,11 @@ from moepy import lowess from scipy import interpolate from sklearn.discriminant_analysis import LinearDiscriminantAnalysis - +from oktoberfest.utils.config import Config from . import fragments_ratio as fr from . import similarity as sim from .metric import Metric +from oktoberfest.constants_dir import CONFIG_PATH logger = logging.getLogger(__name__) @@ -47,17 +50,26 @@ class Percolator(Metric): target_decoy_labels: np.ndarray input_type: str fdr_cutoff: float + config: Config def __init__( self, metadata: pd.DataFrame, input_type: str, pred_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #pred_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #pred_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, true_intensities: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #true_intensities_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #true_intensities_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, mz: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #mz_a: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + #mz_b: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None, + config_path: Optional[Union[str, Path]]= None, all_features_flag: bool = False, regression_method: str = "lowess", fdr_cutoff: float = 0.01, + percolator_version: Optional[float] = 3.05, ): """Initialize a Percolator obj.""" self.metadata = metadata @@ -65,7 +77,28 @@ def __init__( self.all_features_flag = all_features_flag self.regression_method = regression_method self.fdr_cutoff = fdr_cutoff + if config_path is None: + config_path = CONFIG_PATH + if isinstance(config_path, str): + config_path = Path(config_path) + self.config_path = config_path + self.config = Config() + self.config.read(config_path) + + + self._resolve_percolator_compatibility(percolator_version) super().__init__(pred_intensities, true_intensities, mz) + #super().__init__(pred_intensities,pred_intensities_a,pred_intensities_b, + #true_intensities, true_intensities_a, true_intensities_b, + #mz, mz_a, mz_b) + + def _resolve_percolator_compatibility(self, percolator_version: Optional[float] = None): + if percolator_version is None: + result = subprocess.run(["percolator", "-h"], capture_output=True, text=True) + version_line = result.stderr.splitlines()[0].strip() + version = version_line.split("version ")[1] + percolator_version = float(re.sub(r"\.[^.]+$", "", version)) + self.prot_col_name = "Proteins" if percolator_version >= 3.06 else "Protein" @staticmethod def sample_balanced_over_bins(retention_time_df: pd.DataFrame, sample_size: int = 5000) -> pd.Index: @@ -258,38 +291,52 @@ def get_target_decoy_label(reverse: bool): def add_common_features(self): """Add features used by both Andromeda and Prosit feature scoring sets.""" - self.metrics_val["missedCleavages"] = self.metadata["SEQUENCE"].apply(Percolator.count_missed_cleavages) - self.metrics_val["KR"] = self.metadata["SEQUENCE"].apply(Percolator.count_arginines_and_lysines) - self.metrics_val["sequence_length"] = self.metadata["SEQUENCE"].apply(lambda x: len(x)) + if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + self.metrics_val["missedCleavages_A"] = self.metadata["SEQUENCE_A"].apply(Percolator.count_missed_cleavages) + self.metrics_val["missedCleavages_B"] = self.metadata["SEQUENCE_B"].apply(Percolator.count_missed_cleavages) + self.metrics_val["KR_A"] = self.metadata["SEQUENCE_A"].apply(Percolator.count_arginines_and_lysines) + self.metrics_val["KR_B"] = self.metadata["SEQUENCE_B"].apply(Percolator.count_arginines_and_lysines) + self.metrics_val["sequence_length_a"] = self.metadata["SEQUENCE_A"].apply(lambda x: len(x)) + self.metrics_val["sequence_length_b"] = self.metadata["SEQUENCE_B"].apply(lambda x: len(x)) + self.metrics_val["Mass_A"] = self.metadata["CALCULATED_MASS_A"] # this is the calculated mass of cross-linked peptide used as a feature + self.metrics_val["Mass_B"] = self.metadata["CALCULATED_MASS_B"] # this is the calculated mass of cross-linked peptide used as a feature + else: + self.metrics_val["missedCleavages"] = self.metadata["SEQUENCE"].apply(Percolator.count_missed_cleavages) + self.metrics_val["KR"] = self.metadata["SEQUENCE"].apply(Percolator.count_arginines_and_lysines) + self.metrics_val["sequence_length"] = self.metadata["SEQUENCE"].apply(lambda x: len(x)) + self.metrics_val["Mass"] = self.metadata["CALCULATED_MASS"] # this is the calculated mass used as a feature - self.metrics_val["Mass"] = self.metadata["CALCULATED_MASS"] # this is the calculated mass used as a feature self.metrics_val["Charge1"] = (self.metadata["PRECURSOR_CHARGE"] == 1).astype(int) self.metrics_val["Charge2"] = (self.metadata["PRECURSOR_CHARGE"] == 2).astype(int) self.metrics_val["Charge3"] = (self.metadata["PRECURSOR_CHARGE"] == 3).astype(int) self.metrics_val["Charge4"] = (self.metadata["PRECURSOR_CHARGE"] == 4).astype(int) self.metrics_val["Charge5"] = (self.metadata["PRECURSOR_CHARGE"] == 5).astype(int) self.metrics_val["Charge6"] = (self.metadata["PRECURSOR_CHARGE"] == 6).astype(int) - - self.metrics_val["UnknownFragmentationMethod"] = (~self.metadata["FRAGMENTATION"].isin(["HCD", "CID"])).astype( - int - ) + self.metrics_val["UnknownFragmentationMethod"] = (~self.metadata["FRAGMENTATION"].isin(["HCD", "CID"])).astype(int) self.metrics_val["HCD"] = (self.metadata["FRAGMENTATION"] == "HCD").astype(int) self.metrics_val["CID"] = (self.metadata["FRAGMENTATION"] == "CID").astype(int) + def add_percolator_metadata_columns(self): """Add metadata columns needed by percolator, e.g. to identify a PSM.""" - spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE", "PRECURSOR_CHARGE"] + if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE_A","MODIFIED_SEQUENCE_B", "PRECURSOR_CHARGE"] + self.metrics_val["Peptide"] = (self.metadata["MODIFIED_SEQUENCE_A"]+ "_" + + self.metadata["MODIFIED_SEQUENCE_B"]).apply(lambda x: "_." + x + "._") + self.metrics_val[self.prot_col_name] = (self.metadata["MODIFIED_SEQUENCE_A"]+ "_" + + self.metadata["MODIFIED_SEQUENCE_B"]) + else: + spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE", "PRECURSOR_CHARGE"] + self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") + self.metrics_val[self.prot_col_name] = self.metadata["MODIFIED_SEQUENCE"] + if "SCAN_EVENT_NUMBER" in self.metadata.columns: spec_id_cols.append("SCAN_EVENT_NUMBER") self.metrics_val["SpecId"] = self.metadata[spec_id_cols].apply(Percolator.get_specid, axis=1) self.metrics_val["Label"] = self.target_decoy_labels self.metrics_val["ScanNr"] = self.metadata[["RAW_FILE", "SCAN_NUMBER"]].apply(Percolator.get_scannr, axis=1) - # self.metrics_val['ExpMass'] = self.metadata['MASS'] - self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") - self.metrics_val["Protein"] = self.metadata[ - "MODIFIED_SEQUENCE" - ] # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence + # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence def apply_lda_and_get_indices_below_fdr( self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01 @@ -385,7 +432,7 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray: def _reorder_columns_for_percolator(self): all_columns = self.metrics_val.columns first_columns = ["SpecId", "Label", "ScanNr"] - last_columns = ["Peptide", "Protein"] + last_columns = ["Peptide", "Protein"] if "Protein" in all_columns else ["Peptide", "Proteins"] mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns)) new_columns = first_columns + sorted(mid_columns) + last_columns self.metrics_val = self.metrics_val[new_columns] @@ -418,33 +465,36 @@ def calc(self): if current_fdr >= 0.1: lda_failed = True break - - if lda_failed: - sampled_idxs = Percolator.sample_balanced_over_bins(self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]]) + if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0 else: - sampled_idxs = Percolator.sample_balanced_over_bins( - self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]].iloc[idxs_below_lda_fdr, :] - ) - - file_sample = self.metadata.iloc[sampled_idxs].sort_values("PREDICTED_IRT") - aligned_predicted_rts = Percolator.get_aligned_predicted_retention_times( - file_sample["RETENTION_TIME"], - file_sample["PREDICTED_IRT"], - self.metadata["PREDICTED_IRT"], - self.regression_method, - ) - - self.metrics_val["RT"] = self.metadata["RETENTION_TIME"] - self.metrics_val["pred_RT"] = self.metadata["PREDICTED_IRT"] - self.metrics_val["iRT"] = aligned_predicted_rts - self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0 - self.metrics_val["abs_rt_diff"] = np.abs(self.metadata["RETENTION_TIME"] - aligned_predicted_rts) - - median_abs_error_lda_targets = np.median(self.metrics_val["abs_rt_diff"].iloc[idxs_below_lda_fdr]) - logger.info( - f"Median absolute error predicted vs observed retention time on targets < 1% FDR: {median_abs_error_lda_targets}" - ) - logger.debug(self.metrics_val[["RT", "pred_RT", "abs_rt_diff", "lda_scores"]].iloc[idxs_below_lda_fdr, :]) + if lda_failed: + sampled_idxs = Percolator.sample_balanced_over_bins(self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]]) + else: + sampled_idxs = Percolator.sample_balanced_over_bins( + self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]].iloc[idxs_below_lda_fdr, :] + ) + + file_sample = self.metadata.iloc[sampled_idxs].sort_values("PREDICTED_IRT") + aligned_predicted_rts = Percolator.get_aligned_predicted_retention_times( + file_sample["RETENTION_TIME"], + file_sample["PREDICTED_IRT"], + self.metadata["PREDICTED_IRT"], + self.regression_method, + ) + + self.metrics_val["RT"] = self.metadata["RETENTION_TIME"] + self.metrics_val["pred_RT"] = self.metadata["PREDICTED_IRT"] + self.metrics_val["iRT"] = aligned_predicted_rts + self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0 + self.metrics_val["abs_rt_diff"] = np.abs(self.metadata["RETENTION_TIME"] - aligned_predicted_rts) + + median_abs_error_lda_targets = np.median(self.metrics_val["abs_rt_diff"].iloc[idxs_below_lda_fdr]) + logger.info( + f"Median absolute error predicted vs observed retention time on targets < 1% FDR: {median_abs_error_lda_targets}" + ) + logger.debug(self.metrics_val[["RT", "pred_RT", "abs_rt_diff", "lda_scores"]].iloc[idxs_below_lda_fdr, :]) + else: self.metrics_val["andromeda"] = self.metadata["SCORE"] @@ -493,4 +543,4 @@ def spline(knots: int, x: np.ndarray, y: np.ndarray): def logistic(x: Union[pd.Series, np.ndarray], a: float, b: float, c: float, d: float): """Calculates logistic regression function.""" exponent = np.clip(-c * (x - d), -700, 700) # make this stable, i.e. avoid 0.0 or inf - return a / (1.0 + np.exp(exponent)) + b + return a / (1.0 + np.exp(exponent)) + b \ No newline at end of file diff --git a/spectrum_fundamentals/metrics/similarity.py b/spectrum_fundamentals/metrics/similarity.py index feb2a81..0bf17d5 100644 --- a/spectrum_fundamentals/metrics/similarity.py +++ b/spectrum_fundamentals/metrics/similarity.py @@ -7,7 +7,8 @@ from numpy import absolute, mean, std from scipy import spatial from sklearn.metrics import mean_squared_error - +from oktoberfest.utils.config import Config +from oktoberfest.constants_dir import CONFIG_PATH from .. import constants from .metric import Metric @@ -58,23 +59,44 @@ def spectral_angle( :param charge: to filter by the peak charges, 0 means everything :return: SA values """ - if charge != 0: - if charge == 1: - boolean_array = constants.SINGLE_CHARGED_MASK - elif charge == 2: - boolean_array = constants.DOUBLE_CHARGED_MASK - elif charge == 3: - boolean_array = constants.TRIPLE_CHARGED_MASK - elif charge == 4: - boolean_array = constants.B_ION_MASK - else: - boolean_array = constants.Y_ION_MASK - - boolean_array = scipy.sparse.csr_matrix(boolean_array) - observed_intensities = scipy.sparse.csr_matrix(observed_intensities) - predicted_intensities = scipy.sparse.csr_matrix(predicted_intensities) - observed_intensities = observed_intensities.multiply(boolean_array).toarray() - predicted_intensities = predicted_intensities.multiply(boolean_array).toarray() + config = Config() + config.read(CONFIG_PATH) + if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + if charge != 0: + if charge == 1: + boolean_array = constants.SINGLE_CHARGED_MASK_XL + elif charge == 2: + boolean_array = constants.DOUBLE_CHARGED_MASK_XL + elif charge == 3: + boolean_array = constants.TRIPLE_CHARGED_MASK_XL + elif charge == 4: + boolean_array = constants.B_ION_MASK_XL + else: + boolean_array = constants.Y_ION_MASK_XL + + boolean_array = scipy.sparse.csr_matrix(boolean_array) + observed_intensities = scipy.sparse.csr_matrix(observed_intensities) + predicted_intensities = scipy.sparse.csr_matrix(predicted_intensities) + observed_intensities = observed_intensities.multiply(boolean_array).toarray() + predicted_intensities = predicted_intensities.multiply(boolean_array).toarray() + else: + if charge != 0: + if charge == 1: + boolean_array = constants.SINGLE_CHARGED_MASK + elif charge == 2: + boolean_array = constants.DOUBLE_CHARGED_MASK + elif charge == 3: + boolean_array = constants.TRIPLE_CHARGED_MASK + elif charge == 4: + boolean_array = constants.B_ION_MASK + else: + boolean_array = constants.Y_ION_MASK + + boolean_array = scipy.sparse.csr_matrix(boolean_array) + observed_intensities = scipy.sparse.csr_matrix(observed_intensities) + predicted_intensities = scipy.sparse.csr_matrix(predicted_intensities) + observed_intensities = observed_intensities.multiply(boolean_array).toarray() + predicted_intensities = predicted_intensities.multiply(boolean_array).toarray() predicted_non_zero_mask = predicted_intensities > constants.EPSILON @@ -411,69 +433,209 @@ def calc(self, all_features: bool): :param all_features: if True, calculcate all metrics """ - self.metrics_val["spectral_angle"] = SimilarityMetrics.spectral_angle( - self.true_intensities, self.pred_intensities, 0 + config = Config() + config.read(CONFIG_PATH) + if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]): + true_intensities_a = self.true_intensities[:,0:348] + true_intensities_b = self.true_intensities[:,348:] + pred_intensities_a = self.pred_intensities[:,0:348] + pred_intensities_b = self.pred_intensities[:,348:] + self.metrics_val["spectral_angle_a"] = SimilarityMetrics.spectral_angle( + true_intensities_a, pred_intensities_a, 0 ) - self.metrics_val["pearson_corr"] = SimilarityMetrics.correlation( - self.true_intensities, self.pred_intensities, 0, "pearson" + self.metrics_val["spectral_angle_b"] = SimilarityMetrics.spectral_angle( + true_intensities_b, pred_intensities_b, 0 ) - self.metrics_val["modified_cosine"] = SimilarityMetrics.modified_cosine( - self.true_intensities, self.pred_intensities, self.mz, self.mz + self.metrics_val["spectral_angle"] = (self.metrics_val["spectral_angle_a"] + self.metrics_val["spectral_angle_b"])/2 + + self.metrics_val["pearson_corr_a"] = SimilarityMetrics.correlation( + true_intensities_a, pred_intensities_a, 0 ) - if all_features: - self.metrics_val["spectral_entropy_similarity"] = SimilarityMetrics.spectral_entropy_similarity( - self.true_intensities, self.pred_intensities - ) - - col_names_spectral_angle = [ - f"spectral_angle_{amount}_charge" for amount in ["single", "double", "triple"] - ] + ["spectral_angle_b_ions", "spectral_angle_y_ions"] - col_names_pearson_corr = [f"pearson_corr_{amount}_charge" for amount in ["single", "double", "triple"]] + [ - "pearson_corr_b_ions", - "pearson_corr_y_ions", - ] - col_names_spearman_corr = [ - f"spearman_corr_{amount}_charge" for amount in ["single", "double", "triple"] - ] + ["spearman_corr_b_ions", "spearman_corr_y_ions"] - - for i, col_name_spectral_angle in enumerate(col_names_spectral_angle): - self.metrics_val[col_name_spectral_angle] = SimilarityMetrics.spectral_angle( - self.true_intensities, self.pred_intensities, i + 1 - ) - - for i, col_name_pearson_corr in enumerate(col_names_pearson_corr): - self.metrics_val[col_name_pearson_corr] = SimilarityMetrics.correlation( - self.true_intensities, self.pred_intensities, i + 1, "pearson" - ) - - self.metrics_val["cos"] = SimilarityMetrics.cos(self.true_intensities, self.pred_intensities) - self.metrics_val["mean_abs_diff"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "mean" + self.metrics_val["pearson_corr_b"] = SimilarityMetrics.correlation( + true_intensities_b, pred_intensities_b, 0 + ) + if all_features: + self.metrics_val["modified_cosine_a"] = SimilarityMetrics.modified_cosine( + true_intensities_a, pred_intensities_a, 0 ) - self.metrics_val["std_abs_diff"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "std" + self.metrics_val["modified_cosine_b"] = SimilarityMetrics.modified_cosine( + true_intensities_b, pred_intensities_b, 0 ) - self.metrics_val["abs_diff_Q3"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "q3" + self.metrics_val["spectral_entropy_similarity_a"] = SimilarityMetrics.spectral_entropy_similarity( + true_intensities_a, pred_intensities_a, 0 ) - self.metrics_val["abs_diff_Q2"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "q2" + self.metrics_val["spectral_entropy_similarity_b"] = SimilarityMetrics.spectral_entropy_similarity( + true_intensities_b, pred_intensities_b, 0 ) - self.metrics_val["abs_diff_Q1"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "q1" + self.metrics_val["spectral_entropy_similarity_a"] = SimilarityMetrics.spectral_entropy_similarity( + true_intensities_a, pred_intensities_a, 0 ) - self.metrics_val["min_abs_diff"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "min" + self.metrics_val["spectral_entropy_similarity_b"] = SimilarityMetrics.spectral_entropy_similarity( + true_intensities_b, pred_intensities_b, 0 ) - self.metrics_val["max_abs_diff"] = SimilarityMetrics.abs_diff( - self.true_intensities, self.pred_intensities, "max" + + col_names_spectral_angle_a = [ + f"spectral_angle_{amount}_charge_a" for amount in ["single", "double", "triple"] + ] + ["spectral_angle_b_ions_a", "spectral_angle_y_ions_a"] + col_names_spectral_angle_b = [ + f"spectral_angle_{amount}_charge_b" for amount in ["single", "double", "triple"] + ] + ["spectral_angle_b_ions_b", "spectral_angle_y_ions_b"] + col_names_pearson_corr_a = [f"pearson_corr_{amount}_charge_a" for amount in ["single", "double", "triple"]] + [ + "pearson_corr_b_ions_a", + "pearson_corr_y_ions_a", + ] + col_names_pearson_corr_b = [f"pearson_corr_{amount}_charge_b" for amount in ["single", "double", "triple"]] + [ + "pearson_corr_b_ions_b", + "pearson_corr_y_ions_b", + ] + col_names_spearman_corr_a = [ + f"spearman_corr_{amount}_charge_a" for amount in ["single", "double", "triple"] + ] + ["spearman_corr_b_ions_a", "spearman_corr_y_ions_a"] + col_names_spearman_corr_b = [ + f"spearman_corr_{amount}_charge" for amount in ["single", "double", "triple"] + ] + ["spearman_corr_b_ions_b", "spearman_corr_y_ions_b"] + + for i, col_name_spectral_angle_a in enumerate(col_names_spectral_angle_a): + self.metrics_val[col_name_spectral_angle_a] = SimilarityMetrics.spectral_angle( + self.true_intensities_a, self.pred_intensities_a, i + 1 + ) + for i, col_name_spectral_angle_b in enumerate(col_names_spectral_angle_b): + self.metrics_val[col_name_spectral_angle_b] = SimilarityMetrics.spectral_angle( + self.true_intensities_b, self.pred_intensities_b, i + 1 + ) + for i, col_name_pearson_corr_a in enumerate(col_names_pearson_corr_a): + self.metrics_val[col_name_pearson_corr_a] = SimilarityMetrics.correlation( + self.true_intensities_a, self.pred_intensities_a, i + 1, "pearson" + ) + for i, col_name_pearson_corr_b in enumerate(col_names_pearson_corr_b): + self.metrics_val[col_name_pearson_corr_b] = SimilarityMetrics.correlation( + self.true_intensities_b, self.pred_intensities_b, i + 1, "pearson" + ) + for i, col_name_spearman_corr_a in enumerate(col_names_spearman_corr_a): + self.metrics_val[col_name_spearman_corr_a] = SimilarityMetrics.correlation( + self.true_intensities_a, self.pred_intensities_a, i + 1, "spearman" + ) + for i, col_name_spearman_corr_b in enumerate(col_names_spearman_corr_b): + self.metrics_val[col_name_spearman_corr_b] = SimilarityMetrics.correlation( + self.true_intensities_b, self.pred_intensities_b, i + 1, "spearman" + ) + self.metrics_val["cos_a"] = SimilarityMetrics.cos(self.true_intensities_a, self.pred_intensities_a) + self.metrics_val["cos_b"] = SimilarityMetrics.cos(self.true_intensities_b, self.pred_intensities_b) + self.metrics_val["mean_abs_diff_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "mean" + ) + self.metrics_val["mean_abs_diff_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "mean" + ) + self.metrics_val["std_abs_diff_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "std" + ) + self.metrics_val["std_abs_diff_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "std" + ) + self.metrics_val["abs_diff_Q3_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "q3" + ) + self.metrics_val["abs_diff_Q3_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "q3" + ) + self.metrics_val["abs_diff_Q2_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "q2" + ) + self.metrics_val["abs_diff_Q2_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "q2" + ) + self.metrics_val["abs_diff_Q1_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "q1" + ) + self.metrics_val["abs_diff_Q1_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "q1" + ) + self.metrics_val["min_abs_diff_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "min" + ) + self.metrics_val["min_abs_diff_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "min" + ) + self.metrics_val["max_abs_diff_a"] = SimilarityMetrics.abs_diff( + self.true_intensities_a, self.pred_intensities_a, "max" + ) + self.metrics_val["max_abs_diff_b"] = SimilarityMetrics.abs_diff( + self.true_intensities_b, self.pred_intensities_b, "max" + ) + self.metrics_val["mse_a"] = SimilarityMetrics.abs_diff(self.true_intensities_a, self.pred_intensities_a, "mse") + self.metrics_val["mse_b"] = SimilarityMetrics.abs_diff(self.true_intensities_b, self.pred_intensities_b, "mse") + + self.metrics_val["spearman_corr_a"] = SimilarityMetrics.correlation( + self.true_intensities_a, self.pred_intensities_a, 0, "spearman" + ) + self.metrics_val["spearman_corr_b"] = SimilarityMetrics.correlation( + self.true_intensities_b, self.pred_intensities_b, 0, "spearman" + ) + else: + self.metrics_val["spectral_angle"] = SimilarityMetrics.spectral_angle( + self.true_intensities, self.pred_intensities, 0 ) - self.metrics_val["mse"] = SimilarityMetrics.abs_diff(self.true_intensities, self.pred_intensities, "mse") - self.metrics_val["spearman_corr"] = SimilarityMetrics.correlation( - self.true_intensities, self.pred_intensities, 0, "spearman" + self.metrics_val["pearson_corr"] = SimilarityMetrics.correlation( + self.true_intensities, self.pred_intensities, 0, "pearson" ) + if all_features: + self.metrics_val["modified_cosine"] = SimilarityMetrics.modified_cosine( + self.true_intensities, self.pred_intensities, self.mz, self.mz + ) + self.metrics_val["spectral_entropy_similarity"] = SimilarityMetrics.spectral_entropy_similarity( + self.true_intensities, self.pred_intensities + ) - for i, col_name_spearman_corr in enumerate(col_names_spearman_corr): - self.metrics_val[col_name_spearman_corr] = SimilarityMetrics.correlation( - self.true_intensities, self.pred_intensities, i + 1, "spearman" + col_names_spectral_angle = [ + f"spectral_angle_{amount}_charge" for amount in ["single", "double", "triple"] + ] + ["spectral_angle_b_ions", "spectral_angle_y_ions"] + col_names_pearson_corr = [f"pearson_corr_{amount}_charge" for amount in ["single", "double", "triple"]] + [ + "pearson_corr_b_ions", + "pearson_corr_y_ions", + ] + col_names_spearman_corr = [ + f"spearman_corr_{amount}_charge" for amount in ["single", "double", "triple"] + ] + ["spearman_corr_b_ions", "spearman_corr_y_ions"] + + for i, col_name_spectral_angle in enumerate(col_names_spectral_angle): + self.metrics_val[col_name_spectral_angle] = SimilarityMetrics.spectral_angle( + self.true_intensities, self.pred_intensities, i + 1 + ) + + for i, col_name_pearson_corr in enumerate(col_names_pearson_corr): + self.metrics_val[col_name_pearson_corr] = SimilarityMetrics.correlation( + self.true_intensities, self.pred_intensities, i + 1, "pearson" + ) + + self.metrics_val["cos"] = SimilarityMetrics.cos(self.true_intensities, self.pred_intensities) + self.metrics_val["mean_abs_diff"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "mean" + ) + self.metrics_val["std_abs_diff"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "std" ) + self.metrics_val["abs_diff_Q3"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "q3" + ) + self.metrics_val["abs_diff_Q2"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "q2" + ) + self.metrics_val["abs_diff_Q1"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "q1" + ) + self.metrics_val["min_abs_diff"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "min" + ) + self.metrics_val["max_abs_diff"] = SimilarityMetrics.abs_diff( + self.true_intensities, self.pred_intensities, "max" + ) + self.metrics_val["mse"] = SimilarityMetrics.abs_diff(self.true_intensities, self.pred_intensities, "mse") + self.metrics_val["spearman_corr"] = SimilarityMetrics.correlation( + self.true_intensities, self.pred_intensities, 0, "spearman" + ) + + for i, col_name_spearman_corr in enumerate(col_names_spearman_corr): + self.metrics_val[col_name_spearman_corr] = SimilarityMetrics.correlation( + self.true_intensities, self.pred_intensities, i + 1, "spearman" + ) \ No newline at end of file