Skip to content

Commit

Permalink
Changed the percolator and fragments_ratio functions to accomodate a …
Browse files Browse the repository at this point in the history
…custom ion

dictionary as input.
  • Loading branch information
jlapin1 committed Dec 19, 2024
1 parent f459be2 commit 5a2f3cf
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 4 deletions.
130 changes: 128 additions & 2 deletions spectrum_fundamentals/metrics/fragments_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

import numpy as np
import scipy.sparse
import pandas as pd
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

from .. import constants
from .metric import Metric
Expand Down Expand Up @@ -49,7 +52,7 @@ def count_with_ion_mask(
if xl:
array_size = 348
else:
array_size = 174
array_size = boolean_array.shape[1]

if ion_mask is None:
ion_mask = scipy.sparse.csr_matrix(np.ones((array_size, 1)))
Expand Down Expand Up @@ -136,7 +139,7 @@ def get_observation_state(
)
return observation_state

def calc(self, xl: bool = False):
def calc(self, xl: bool = False, ion_dict: pd.DataFrame = None):
"""Adds columns with count, fraction and fraction_predicted features to metrics_val dataframe."""
if self.true_intensities is None or self.pred_intensities is None:
return None
Expand Down Expand Up @@ -418,6 +421,129 @@ def calc(self, xl: bool = False):
self.metrics_val["fraction_observed_but_not_predicted_y_vs_predicted_y_b"] = (
self.metrics_val["count_observed_but_not_predicted_y_b"] / num_predicted_ions_y_b
)

elif ion_dict is not None:
unique_ions = ion_dict['ion'].unique()
ION_MASKS = {ion: (ion_dict['ion'] == ion).to_numpy().astype(int) for ion in unique_ions}

mask_observed_valid = FragmentsRatio.get_mask_observed_valid(self.true_intensities)
observed_boolean = FragmentsRatio.make_boolean(self.true_intensities, mask_observed_valid)
predicted_boolean = FragmentsRatio.make_boolean(self.pred_intensities, mask_observed_valid, cutoff=0.05)
observation_state = FragmentsRatio.get_observation_state(
observed_boolean, predicted_boolean, mask_observed_valid
)

# counting metrics
# - count_predicted
# - count_observed
# - count_not_observed_and_not_predicted
# - count_observed_and_predicted
# - count_observed_but_not_predicted
# - count_not_observed_but_predicted

self.metrics_val["count_predicted"] = FragmentsRatio.count_with_ion_mask(predicted_boolean)
self.metrics_val["count_observed"] = FragmentsRatio.count_with_ion_mask(observed_boolean)
self.metrics_val["count_observed_and_predicted"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.OBS_AND_PRED
)
self.metrics_val["count_not_observed_and_not_predicted"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.NOT_OBS_AND_NOT_PRED
)
self.metrics_val["count_observed_but_not_predicted"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.OBS_BUT_NOT_PRED
)
self.metrics_val["count_not_observed_but_predicted"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.NOT_OBS_BUT_PRED
)
for ion, mask in ION_MASKS.items():
self.metrics_val[f"count_predicted_{ion}"] = FragmentsRatio.count_with_ion_mask(
predicted_boolean, mask
)
self.metrics_val[f"count_observed_{ion}"] = FragmentsRatio.count_with_ion_mask(
observed_boolean, mask
)
self.metrics_val[f"count_observed_and_predicted_{ion}"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.OBS_AND_PRED, mask
)
self.metrics_val[f"count_not_observed_and_not_predicted_{ion}"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.NOT_OBS_AND_NOT_PRED, mask
)
self.metrics_val[f"count_observed_but_not_predicted_{ion}"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.OBS_BUT_NOT_PRED, mask
)
self.metrics_val[f"count_not_observed_but_predicted_{ion}"] = FragmentsRatio.count_observation_states(
observation_state, ObservationState.NOT_OBS_BUT_PRED, mask
)

# fractional count metrics

valid_ions = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid))
self.metrics_val["fraction_predicted"] = self.metrics_val["count_predicted"].values / valid_ions
self.metrics_val["fraction_observed"] = self.metrics_val["count_observed"].values / valid_ions
self.metrics_val["fraction_observed_and_predicted"] = (
self.metrics_val["count_observed_and_predicted"].values / valid_ions
)
self.metrics_val["fraction_not_observed_and_not_predicted"] = (
self.metrics_val["count_not_observed_and_not_predicted"].values / valid_ions
)
self.metrics_val["fraction_observed_but_not_predicted"] = (
self.metrics_val["count_observed_but_not_predicted"].values / valid_ions
)
self.metrics_val["fraction_not_observed_but_predicted"] = (
self.metrics_val["count_not_observed_but_predicted"].values / valid_ions
)
for ion, mask in ION_MASKS.items():
valid_ions = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid, mask))
self.metrics_val[f"fraction_predicted_{ion}"] = self.metrics_val[f"count_predicted_{ion}"].values / valid_ions
self.metrics_val[f"fraction_observed_{ion}"] = self.metrics_val[f"count_observed_{ion}"].values / valid_ions
self.metrics_val[f"fraction_observed_and_predicted_{ion}"] = (
self.metrics_val[f"count_observed_and_predicted_{ion}"].values / valid_ions
)
self.metrics_val[f"fraction_not_observed_and_not_predicted_{ion}"] = (
self.metrics_val[f"count_not_observed_and_not_predicted_{ion}"].values / valid_ions
)
self.metrics_val[f"fraction_observed_but_not_predicted_{ion}"] = (
self.metrics_val[f"count_observed_but_not_predicted_{ion}"].values / valid_ions
)
self.metrics_val[f"fraction_not_observed_but_predicted_{ion}"] = (
self.metrics_val[f"count_not_observed_but_predicted_{ion}"].values / valid_ions
)

# fractional count metrics relative to predictions

num_predicted_ions = np.maximum(1, self.metrics_val["count_predicted"])
self.metrics_val["fraction_observed_and_predicted_vs_predicted"] = (
self.metrics_val["count_observed_and_predicted"].values / num_predicted_ions
)
self.metrics_val["fraction_not_observed_and_not_predicted_vs_predicted"] = (
self.metrics_val["count_not_observed_and_not_predicted"].values / num_predicted_ions
)
self.metrics_val["fraction_observed_but_not_predicted_vs_predicted"] = (
self.metrics_val["count_observed_but_not_predicted"].values / num_predicted_ions
)
for ion, mask in ION_MASKS.items():
num_predicted_ions = np.maximum(1, self.metrics_val[f"count_predicted_{ion}"])
self.metrics_val[f"fraction_observed_and_predicted_{ion}_vs_predicted_{ion}"] = (
self.metrics_val[f"count_observed_and_predicted_{ion}"].values / num_predicted_ions
)
self.metrics_val[f"fraction_not_observed_and_not_predicted_{ion}_vs_predicted_{ion}"] = (
self.metrics_val[f"count_not_observed_and_not_predicted_{ion}"].values / num_predicted_ions
)
self.metrics_val[f"fraction_observed_but_not_predicted_{ion}_vs_predicted_{ion}"] = (
self.metrics_val[f"count_observed_but_not_predicted_{ion}"].values / num_predicted_ions
)

# not needed, as these are simply (1 - fraction_observed_and_predicted_vs_predicted)

num_predicted_ions = np.maximum(1, self.metrics_val["count_predicted"])
self.metrics_val["fraction_not_observed_but_predicted_vs_predicted"] = (
self.metrics_val["count_not_observed_but_predicted"].values / num_predicted_ions
)
for ion, mask in ION_MASKS.items():
num_predicted_ions = np.maximum(1, self.metrics_val[f"count_predicted_{ion}"])
self.metrics_val[f"fraction_not_observed_but_predicted_{ion}_vs_predicted"] = (
self.metrics_val[f"count_not_observed_but_predicted_{ion}"].values / num_predicted_ions
)

else:
mask_observed_valid = FragmentsRatio.get_mask_observed_valid(self.true_intensities)
Expand Down
4 changes: 2 additions & 2 deletions spectrum_fundamentals/metrics/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def _reorder_columns_for_percolator(self):
new_columns = first_columns + sorted(mid_columns) + last_columns
self.metrics_val = self.metrics_val[new_columns]

def calc(self): # noqa: C901
def calc(self, ion_dict): # noqa: C901
"""Adds percolator metadata and feature columns to metrics_val based on PSM metadata."""
self.add_common_features()
self.target_decoy_labels = self.metadata["REVERSE"].apply(Percolator.get_target_decoy_label).to_numpy()
Expand All @@ -455,7 +455,7 @@ def calc(self): # noqa: C901
# add additional features
self.add_additional_features()
fragments_ratio = fr.FragmentsRatio(self.pred_intensities, self.true_intensities)
fragments_ratio.calc(xl=self.xl)
fragments_ratio.calc(xl=self.xl, ion_dict=ion_dict)
similarity = sim.SimilarityMetrics(self.pred_intensities, self.true_intensities, self.mz)
similarity.calc(self.all_features_flag, xl=self.xl)

Expand Down

0 comments on commit 5a2f3cf

Please sign in to comment.