Skip to content

Commit

Permalink
fixed issues found by nox
Browse files Browse the repository at this point in the history
  • Loading branch information
Mostafa Kalhor committed Apr 2, 2024
1 parent c513695 commit 9b8573c
Show file tree
Hide file tree
Showing 11 changed files with 1,539 additions and 97 deletions.
1 change: 0 additions & 1 deletion spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,6 @@ def parallel_annotate(
:return: a tuple containing intensity values (np.ndarray), masses (np.ndarray), calculated mass (float),
and any removed peaks (List[str])
"""

mod_seq_column = "MODIFIED_SEQUENCE"
if "MODIFIED_SEQUENCE_MSA" in index_columns:
mod_seq_column = "MODIFIED_SEQUENCE_MSA"
Expand Down
20 changes: 13 additions & 7 deletions spectrum_fundamentals/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,18 @@ def initialize_peaks(
charge_delta = charge * constants.PARTICLE_MASSES["PROTON"]
for ion_type in range(0, number_of_ion_types): # generate all ion types
# Check for neutral loss here
if noncl_xl == 1 and ion_type == 0 and i + 1 >= xl_pos: # for the b ions
ion_mass_with_peptide_beta = ion_type_masses[ion_type] + peptide_beta_mass
mass = (ion_mass_with_peptide_beta + charge_delta) / charge
elif noncl_xl == 1 and ion_type == 1 and i >= peptide_length - xl_pos: # for the y-ions
ion_mass_with_peptide_beta = ion_type_masses[ion_type] + peptide_beta_mass
mass = (ion_mass_with_peptide_beta + charge_delta) / charge
if noncl_xl == 1 and ion_type == 0 and xl_pos is not None and i + 1 >= xl_pos: # for the b ions
if peptide_beta_mass is not None:
ion_mass_with_peptide_beta = ion_type_masses[ion_type] + peptide_beta_mass
mass = (ion_mass_with_peptide_beta + charge_delta) / charge
else:
raise ValueError("peptide_beta_mass cannot be None. Please check your input data.")
elif noncl_xl == 1 and ion_type == 1 and xl_pos is not None and i >= peptide_length - xl_pos: # for the y-ions
if peptide_beta_mass is not None:
ion_mass_with_peptide_beta = ion_type_masses[ion_type] + peptide_beta_mass
mass = (ion_mass_with_peptide_beta + charge_delta) / charge
else:
raise ValueError("peptide_beta_mass cannot be None. Please check your input data.")
else:
mass = (ion_type_masses[ion_type] + charge_delta) / charge
min_mass, max_mass = get_min_max_mass(mass_analyzer, mass, mass_tolerance, unit_mass_tolerance)
Expand Down Expand Up @@ -296,7 +302,7 @@ def initialize_peaks_xl(
mass = compute_peptide_mass(sequence_without_crosslinker)

elif crosslinker_type in ["BS3", "DSS"]: # non-cleavable XL
charge=3 # generate only peaks with charge 1, 2 and 3
charge = 3 # generate only peaks with charge 1, 2 and 3

sequence_without_crosslinker = sequence.replace("[UNIMOD:1898]", "")
sequence_beta_without_crosslinker = sequence_beta.replace("[UNIMOD:1898]", "")
Expand Down
95 changes: 47 additions & 48 deletions spectrum_fundamentals/metrics/fragments_ratio.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import enum
from typing import Optional, Union
import pandas as pd

import numpy as np
import scipy.sparse
from . import percolator

from .. import constants
from .metric import Metric

Expand All @@ -25,9 +25,10 @@ class ObservationState(enum.IntEnum):
OBS_BUT_NOT_PRED = 1
INVALID_ION = 0


class FragmentsRatio(Metric):
"""Main to initialize a FragmentsRatio obj."""

@staticmethod
def count_with_ion_mask(
boolean_array: scipy.sparse.csr_matrix,
Expand All @@ -44,7 +45,6 @@ def count_with_ion_mask(
:param xl: whether to process with crosslinked or linear peptides
:return: number of observed/predicted peaks not masked by ion_mask
"""

if ion_mask is None:
ion_mask = []

Expand Down Expand Up @@ -78,7 +78,7 @@ def count_observation_states(
"""
state_boolean = observation_state == test_state
if xl:
return FragmentsRatio.count_with_ion_mask(state_boolean, ion_mask, xl = True)
return FragmentsRatio.count_with_ion_mask(state_boolean, ion_mask, xl=True)
else:
return FragmentsRatio.count_with_ion_mask(state_boolean, ion_mask)

Expand Down Expand Up @@ -145,7 +145,6 @@ def get_observation_state(
def calc(self):
"""Adds columns with count, fraction and fraction_predicted features to metrics_val dataframe."""
if self.true_intensities.shape[1] == 696:
true_intensities = self.true_intensities
self.xl = True
if self.xl:
true_intensities_a = self.true_intensities[:, 0:348]
Expand All @@ -164,120 +163,120 @@ def calc(self):
observation_state_b = FragmentsRatio.get_observation_state(
observed_boolean_b, predicted_boolean_b, mask_observed_valid_b
)
valid_ions_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, xl = True))
valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, xl = True))
valid_ions_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, xl=True))
valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, xl=True))
valid_ions_b_a = np.maximum(
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.B_ION_MASK_XL, xl = True)
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.B_ION_MASK_XL, xl=True)
)
valid_ions_b_b = np.maximum(
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.B_ION_MASK_XL, xl = True)
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.B_ION_MASK_XL, xl=True)
)
valid_ions_y_a = np.maximum(
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.Y_ION_MASK_XL, xl = True)
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.Y_ION_MASK_XL, xl=True)
)
valid_ions_y_b = np.maximum(
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.Y_ION_MASK_XL, xl = True)
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.Y_ION_MASK_XL, xl=True)
)
# counting metrics
self.metrics_val["count_predicted_a"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_a, xl = True)
self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_b, xl = True)
self.metrics_val["count_predicted_a"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_a, xl=True)
self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_b, xl=True)
self.metrics_val["count_predicted_b_a"] = FragmentsRatio.count_with_ion_mask(
predicted_boolean_a, constants.B_ION_MASK_XL, xl = True
predicted_boolean_a, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_predicted_b_b"] = FragmentsRatio.count_with_ion_mask(
predicted_boolean_b, constants.B_ION_MASK_XL, xl = True
predicted_boolean_b, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_predicted_y_a"] = FragmentsRatio.count_with_ion_mask(
predicted_boolean_a, constants.Y_ION_MASK_XL, xl = True
predicted_boolean_a, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_predicted_y_b"] = FragmentsRatio.count_with_ion_mask(
predicted_boolean_b, constants.Y_ION_MASK_XL, xl = True
predicted_boolean_b, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_a"] = FragmentsRatio.count_with_ion_mask(observed_boolean_a, xl = True)
self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask(observed_boolean_b, xl = True)
self.metrics_val["count_observed_a"] = FragmentsRatio.count_with_ion_mask(observed_boolean_a, xl=True)
self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask(observed_boolean_b, xl=True)
self.metrics_val["count_observed_b_a"] = FragmentsRatio.count_with_ion_mask(
observed_boolean_a, constants.B_ION_MASK_XL, xl = True
observed_boolean_a, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_b_b"] = FragmentsRatio.count_with_ion_mask(
observed_boolean_b, constants.B_ION_MASK_XL, xl = True
observed_boolean_b, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_y_a"] = FragmentsRatio.count_with_ion_mask(
observed_boolean_a, constants.Y_ION_MASK_XL, xl = True
observed_boolean_a, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_y_b"] = FragmentsRatio.count_with_ion_mask(
observed_boolean_b, constants.Y_ION_MASK_XL, xl = True
observed_boolean_b, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_and_predicted_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_AND_PRED, xl = True
observation_state_a, ObservationState.OBS_AND_PRED, xl=True
)
self.metrics_val["count_observed_and_predicted_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_AND_PRED, xl = True
observation_state_b, ObservationState.OBS_AND_PRED, xl=True
)
self.metrics_val["count_observed_and_predicted_b_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_a, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_and_predicted_b_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_b, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_and_predicted_y_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_a, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_and_predicted_y_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_b, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, xl = True
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, xl = True
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_b_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_b_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_y_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_and_not_predicted_y_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_but_not_predicted_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, xl = True
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, xl=True
)
self.metrics_val["count_observed_but_not_predicted_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, xl = True
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, xl=True
)
self.metrics_val["count_observed_but_not_predicted_b_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_but_not_predicted_b_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_but_not_predicted_y_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_observed_but_not_predicted_y_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_but_predicted_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, xl = True
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, xl=True
)
self.metrics_val["count_not_observed_but_predicted_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, xl = True
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, xl=True
)
self.metrics_val["count_not_observed_but_predicted_b_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_but_predicted_b_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl = True
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_but_predicted_y_a"] = FragmentsRatio.count_observation_states(
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl=True
)
self.metrics_val["count_not_observed_but_predicted_y_b"] = FragmentsRatio.count_observation_states(
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl = True
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl=True
)
# fractional count metrics
self.metrics_val["fraction_predicted_a"] = self.metrics_val["count_predicted_a"].values / valid_ions_a
Expand Down
2 changes: 0 additions & 2 deletions spectrum_fundamentals/metrics/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def spectral_angle(
default_value = constants.B_ION_MASK

if charge != 0:

boolean_array = masks.get(charge, default_value)

boolean_array = scipy.sparse.csr_matrix(boolean_array)
Expand Down Expand Up @@ -448,7 +447,6 @@ def calc(self, all_features: bool, xl: bool = False):
def _calc_additional_metrics(
self, true_intensities: np.ndarray, pred_intensities: np.ndarray, key_suffix: str = ""
):

if key_suffix != "":
# dirty fix, if the key_suffix is not "", that means we have XL mode.
# TODO: fix self.mz for XL mode
Expand Down
690 changes: 689 additions & 1 deletion tests/unit_tests/data/annotation_xl_input.json

Large diffs are not rendered by default.

408 changes: 407 additions & 1 deletion tests/unit_tests/data/annotation_xl_output.json

Large diffs are not rendered by default.

Loading

0 comments on commit 9b8573c

Please sign in to comment.