From 847ddceab6db8cf95f8ca6942a2fb2f91894b32e Mon Sep 17 00:00:00 2001 From: WassimG Date: Thu, 19 Sep 2024 12:37:04 +0000 Subject: [PATCH] Added neutral losses annotation --- spectrum_fundamentals/constants.py | 59 ++++++++++++++++ spectrum_fundamentals/fragments.py | 106 ++++++++++++++++++++++++++++- tests/unit_tests/test_fragments.py | 48 +++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index 56b46b3..69b4894 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -144,6 +144,8 @@ "C": 12.0, "O": 15.9949146, "N": 14.003074, + 'S': 31.9720712, + 'P': 30.9737619 } MASSES = {**PARTICLE_MASSES, **ATOM_MASSES} @@ -283,6 +285,63 @@ AA_MOD = {**AA_MASSES, **AA_MOD_MASSES} + +AA_Neutral_losses = { + 'R': ['NH3', 'CH2N2', 'C3H9N3'], + 'N': ['NH3', 'CH3NO', 'C2H5NO', 'C3H5NO'], + 'D': ['H2O', 'CO2', 'C2H4O2'], + 'C': ['CH2S'], + 'E': ['H2O', 'C2H4O2'], + 'Q': ['NH3', 'CH3NO', 'C2H5NO', 'C3H5NO'], + 'I': ['C2H4'], + 'L': ['C3H6', 'C4H8'], + 'K': ['C2H5N', 'C4H9N', 'C4H11N', 'C3H9N'], + 'M': ['C2H4S', 'C3H6S'], + 'M[UNIMOD:35]': ['CH4SO', 'C3H8SO', 'C3H6SO'], + 'S': ['H2O', 'CH4O'], + 'T': ['H2O', 'C2H4O'], + 'W': ['C8H7N', 'C9H9N'], + 'V': ['C3H6'], + '[]-': ['NH3'], + '-[]': ['H2O'], +} + +Mod_Neutral_losses = { + 'R[UNIMOD:7]': ['CHNO'], + 'S[UNIMOD:21]': ['H3O4P'] +} + +Neutral_losses_Mass = { + 'C2H4': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4), + 'C2H4O': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + ATOM_MASSES['O'], + 'C2H4O2': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + (ATOM_MASSES['O']*2), + 'C2H4S': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + ATOM_MASSES['S'], + 'C2H5N': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'], + 'CHNO': (ATOM_MASSES['C']) + (ATOM_MASSES['H']) + ATOM_MASSES['N'] + ATOM_MASSES['O'], + 'C2H5NO': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'] + ATOM_MASSES['O'], + 'C3H5NO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'] + ATOM_MASSES['O'], + 'C3H6': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6), + 'C3H6S': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6) + ATOM_MASSES['S'], + 'C3H6SO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6) + ATOM_MASSES['S'] + ATOM_MASSES['O'], + 'C3H8SO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*8) + ATOM_MASSES['S'] + ATOM_MASSES['O'], + 'C3H9N': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'], + 'C3H9N3': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*9) + (ATOM_MASSES['N']*3), + 'C4H11N': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*11) + ATOM_MASSES['N'], + 'C4H8': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*8), + 'C4H9N': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'], + 'C8H7N': (ATOM_MASSES['C']*8) + (ATOM_MASSES['H']*7) + ATOM_MASSES['N'], + 'C9H9N': (ATOM_MASSES['C']*9) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'], + 'CH2N2': ATOM_MASSES['C'] + (ATOM_MASSES['H']*2) + (ATOM_MASSES['N']*2), + 'CH2S': ATOM_MASSES['C'] + (ATOM_MASSES['H']*2) + ATOM_MASSES['S'], + 'CH3NO': ATOM_MASSES['C'] + (ATOM_MASSES['H']*3) + ATOM_MASSES['N'] + ATOM_MASSES['O'], + 'CH4O': ATOM_MASSES['C'] + (ATOM_MASSES['H']*4) + ATOM_MASSES['O'], + 'CH4SO': ATOM_MASSES['C'] + (ATOM_MASSES['H']*4) + ATOM_MASSES['S'] + ATOM_MASSES['O'], + 'CO2': ATOM_MASSES['C'] + (ATOM_MASSES['O']*2), + 'H2O': (ATOM_MASSES['H']*2) + ATOM_MASSES['O'], + 'NH3': ATOM_MASSES['N'] + (ATOM_MASSES['H']*3), + 'H3O4P': (ATOM_MASSES['H']*3) + (ATOM_MASSES['O']*4) + ATOM_MASSES['P'] +} + ####################################### # HELPERS FOR FRAGMENT MZ CALCULATION # ####################################### diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index 24741c4..7be9e41 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -130,6 +130,83 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray: return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) +def _add_nl(neutral_losses, nl_dict, start_aa_index, end_aa_index): + """ + Adds neutral losses (NL) to a dictionary of neutral losses for specific amino acid indices. + + This function updates the `nl_dict` by incorporating the provided `neutral_losses` into + the amino acid indices between `start_aa_index` and `end_aa_index`. + + :param list neutral_losses: A list of neutral losses to be added to the amino acids. + :param dict nl_dict: A dictionary where the keys are amino acid indices and the values are lists of neutral + losses associated with each index. + :param int start_aa_index: The starting index of the amino acid range to which the neutral losses should be added. + :param int end_aa_index: The ending index of the amino acid range to which the neutral losses should be added. + :returns: Updated dictionary with the added neutral losses for the specified amino acid indices. + """ + first_nl = True + new_nls = {} + for nl in neutral_losses: + for i in range(start_aa_index, end_aa_index): + current_aa_nl = nl_dict[i] + if first_nl: + new_nls[i] = list(set(neutral_losses) - set(current_aa_nl)) + if nl not in current_aa_nl: + current_aa_nl.append(nl) + first_nl = False + return nl_dict + + +def _get_neutral_losses(peptide_sequence, modifications): + """ + Get possible neutral losses and position in a peptide sequence. + :param peptide_sequence: Unmodified peptide sequence + :modifications: modifications dict generated by _get_modifications from modified petide sequence. + :return: Dict with neutral losses position as an ID and composition as its value. + """ + sequence_length = len(peptide_sequence) + keys = range(0, sequence_length - 1) + + NL_b_ions = dict([(key, []) for key in keys]) + NL_y_ions = dict([(key, []) for key in keys]) + + for i in range(0, sequence_length): + aa = peptide_sequence[i] + if aa in c.AA_Neutral_losses: + if i in modifications: + if aa == 'M' and modifications[i] == 15.9949146: + NL_b_ions = _add_nl(c.AA_Neutral_losses['M[UNIMOD:35]'], NL_b_ions, i, sequence_length - 1) + NL_y_ions = _add_nl(c.AA_Neutral_losses['M[UNIMOD:35]'], NL_y_ions, sequence_length - i - 1, + sequence_length - 1) + elif aa == 'R' and modifications[i] == 0.984016: + NL_b_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_b_ions, i, sequence_length - 1) + NL_y_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_y_ions, sequence_length - i - 1, + sequence_length - 1) + elif (aa == 'S' or aa=='T') and modifications[i] == 79.9663: + NL_b_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_b_ions, i, sequence_length - 1) + NL_y_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_y_ions, sequence_length - i - 1, + sequence_length - 1) + else: + NL_b_ions = _add_nl(c.AA_Neutral_losses[aa], NL_b_ions, i, sequence_length - 1) + NL_y_ions = _add_nl(c.AA_Neutral_losses[aa], NL_y_ions, sequence_length - i - 1, sequence_length - 1) + return NL_b_ions, NL_y_ions + +def _calculate_nl_score_mass(neutral_loss): + """ + Calculates the score and mass for a given neutral loss (NL). + :param str neutral_loss: The type of neutral loss for which to calculate the score and mass. + This should be a key present in the `Neutral_losses_Mass` dictionary. + :returns: A tuple containing the adjusted score and the mass of the specified neutral loss. + """ + score = 100 + mass = 0 + mass = c.Neutral_losses_Mass[neutral_loss] + if neutral_loss == 'H2O' or neutral_loss == 'NH3': + score -= 5 + else: + score -= 30 + return score, mass + def initialize_peaks( sequence: str, mass_analyzer: str, @@ -141,6 +218,7 @@ def initialize_peaks( xl_pos: int = -1, fragmentation_method: str = "HCD", custom_mods: Optional[Dict[str, float]] = None, + add_neutral_losses: Optional[bool] = False ) -> Tuple[List[dict], int, str, float]: """ Generate theoretical peaks for a modified peptide sequence. @@ -155,6 +233,7 @@ def initialize_peaks( :param xl_pos: the position of the crosslinker for non-cleavable XL :param fragmentation_method: fragmentation method that was used :param custom_mods: mapping of custom UNIMOD string identifiers ('[UNIMOD:xyz]') to their mass + :param add_neutral_losses: Flag to indicate whether to annotate neutral losses or not :return: List of theoretical peaks, Flag to indicate if there is a tmt on n-terminus, Un modified peptide sequence """ _xl_sanity_check(noncl_xl, peptide_beta_mass, xl_pos) @@ -181,6 +260,9 @@ def initialize_peaks( # add n_term mass to first aa for easy processing in the following calculation modification_deltas[0] = modification_deltas.get(0, 0.0) + n_term_delta + if add_neutral_losses: + nl_b_ions, nl_y_ions = _get_neutral_losses(sequence, modification_deltas) + nl_ions = [nl_y_ions,nl_b_ions] mass_arr = np.array([c.AA_MASSES[_] for _ in sequence]) for pos, mod_mass in modification_deltas.items(): mass_arr[pos] += mod_mass @@ -201,7 +283,6 @@ def initialize_peaks( # shape of ion_mzs: (n_ions, n_fragments, max_charge) charges = np.arange(1, max_charge + 1) ion_mzs = (sum_array[..., np.newaxis] + charges * c.PARTICLE_MASSES["PROTON"]) / charges - min_mzs, max_mzs = get_min_max_mass(mass_analyzer, ion_mzs, mass_tolerance, unit_mass_tolerance) # write mz together with min and max value in output list with one dictionary for each ion @@ -216,8 +297,31 @@ def initialize_peaks( "mass": ion_mzs[ion_type, number, charge], # mz "min_mass": min_mzs[ion_type, number, charge], # min mz "max_mass": max_mzs[ion_type, number, charge], # max mz + "neutral_loss": '', + "fragment_score": 100, } ) + if not add_neutral_losses: + continue + for nl in nl_ions[ion_type][number]: + nl_score, nl_mass = _calculate_nl_score_mass(nl) + ion_mass = sum_array[ion_type,number] - nl_mass + ion_mz = (ion_mass + (charge+1) * c.PARTICLE_MASSES["PROTON"]) / (charge+1) + min_mz, max_mz = get_min_max_mass(mass_analyzer, ion_mz, mass_tolerance, unit_mass_tolerance) + + fragments_meta_data.append( + { + "ion_type": ion_types[ion_type], # ion type + "no": number + 1, # no + "charge": charge + 1, # charge + "mass": ion_mz, # mz + "min_mass": min_mz, # min mz + "max_mass": max_mz, # max mz + "neutral_loss": nl, + "fragment_score": 100- nl_score, + } + ) + fragments_meta_data = sorted(fragments_meta_data, key=itemgetter("mass")) diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index 6b5f095..9c1f5b7 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -176,3 +176,51 @@ def test_catches_redundant_order(self): _ = fragments.generate_fragment_ion_annotations( ion_types=["y", "b"], order=("ion_type", "position", "ion_type") ) + + +class TestNeutralLossFunctions(unittest.TestCase): + + def test_add_nl(self): + """ + Test the _add_nl function with a range of amino acids and neutral losses. + """ + neutral_losses = ['H2O', 'NH3'] + nl_dict = { + 0: ['CO2'], # Initial dictionary contains CO2 for index 0 + 1: ['H2O'], # Initial dictionary contains H2O for index 1 + } + start_aa_index = 0 + end_aa_index = 2 # Only indices 0 and 1 will be modified + + # Expected output after the neutral losses are added + expected_nl_dict = { + 0: ['CO2', 'H2O', 'NH3'], # Both H2O and NH3 added + 1: ['H2O', 'NH3'] # NH3 added, H2O already present + } + + result = fragments._add_nl(neutral_losses, nl_dict, start_aa_index, end_aa_index) + self.assertEqual(result, expected_nl_dict) + + def test_calculate_nl_score_mass(self): + """ + Test the _calculate_nl_score_mass function with various neutral losses. + """ + # Test for H2O, which should reduce the score by 5 + score, mass = fragments._calculate_nl_score_mass('H2O') + self.assertEqual(score, 95) # Starting score of 100 - 5 + self.assertEqual(mass, 18.01056467) # Mass of H2O + + # Test for NH3, which should also reduce the score by 5 + score, mass = fragments._calculate_nl_score_mass('NH3') + self.assertEqual(score, 95) # Starting score of 100 - 5 + self.assertEqual(mass, 17.026549105) # Mass of NH3 + + # Test for CO2, which should reduce the score by 30 + score, mass = fragments._calculate_nl_score_mass('CO2') + self.assertEqual(score, 70) # Starting score of 100 - 30 + self.assertEqual(mass, 43.9898292) # Mass of CO2 + + # Test for C2H4O2, which should reduce the score by 30 + score, mass = fragments._calculate_nl_score_mass('C2H4O2') + self.assertEqual(score, 70) # Starting score of 100 - 30 + self.assertEqual(mass, 60.02112934) \ No newline at end of file