Skip to content

Commit

Permalink
Added neutral losses annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
WassimG committed Sep 19, 2024
1 parent 8710a79 commit 847ddce
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 1 deletion.
59 changes: 59 additions & 0 deletions spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@
"C": 12.0,
"O": 15.9949146,
"N": 14.003074,
'S': 31.9720712,
'P': 30.9737619
}

MASSES = {**PARTICLE_MASSES, **ATOM_MASSES}
Expand Down Expand Up @@ -283,6 +285,63 @@

AA_MOD = {**AA_MASSES, **AA_MOD_MASSES}


AA_Neutral_losses = {
'R': ['NH3', 'CH2N2', 'C3H9N3'],
'N': ['NH3', 'CH3NO', 'C2H5NO', 'C3H5NO'],
'D': ['H2O', 'CO2', 'C2H4O2'],
'C': ['CH2S'],
'E': ['H2O', 'C2H4O2'],
'Q': ['NH3', 'CH3NO', 'C2H5NO', 'C3H5NO'],
'I': ['C2H4'],
'L': ['C3H6', 'C4H8'],
'K': ['C2H5N', 'C4H9N', 'C4H11N', 'C3H9N'],
'M': ['C2H4S', 'C3H6S'],
'M[UNIMOD:35]': ['CH4SO', 'C3H8SO', 'C3H6SO'],
'S': ['H2O', 'CH4O'],
'T': ['H2O', 'C2H4O'],
'W': ['C8H7N', 'C9H9N'],
'V': ['C3H6'],
'[]-': ['NH3'],
'-[]': ['H2O'],
}

Mod_Neutral_losses = {
'R[UNIMOD:7]': ['CHNO'],
'S[UNIMOD:21]': ['H3O4P']
}

Neutral_losses_Mass = {
'C2H4': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4),
'C2H4O': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + ATOM_MASSES['O'],
'C2H4O2': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + (ATOM_MASSES['O']*2),
'C2H4S': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*4) + ATOM_MASSES['S'],
'C2H5N': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'],
'CHNO': (ATOM_MASSES['C']) + (ATOM_MASSES['H']) + ATOM_MASSES['N'] + ATOM_MASSES['O'],
'C2H5NO': (ATOM_MASSES['C']*2) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'] + ATOM_MASSES['O'],
'C3H5NO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*5) + ATOM_MASSES['N'] + ATOM_MASSES['O'],
'C3H6': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6),
'C3H6S': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6) + ATOM_MASSES['S'],
'C3H6SO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*6) + ATOM_MASSES['S'] + ATOM_MASSES['O'],
'C3H8SO': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*8) + ATOM_MASSES['S'] + ATOM_MASSES['O'],
'C3H9N': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'],
'C3H9N3': (ATOM_MASSES['C']*3) + (ATOM_MASSES['H']*9) + (ATOM_MASSES['N']*3),
'C4H11N': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*11) + ATOM_MASSES['N'],
'C4H8': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*8),
'C4H9N': (ATOM_MASSES['C']*4) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'],
'C8H7N': (ATOM_MASSES['C']*8) + (ATOM_MASSES['H']*7) + ATOM_MASSES['N'],
'C9H9N': (ATOM_MASSES['C']*9) + (ATOM_MASSES['H']*9) + ATOM_MASSES['N'],
'CH2N2': ATOM_MASSES['C'] + (ATOM_MASSES['H']*2) + (ATOM_MASSES['N']*2),
'CH2S': ATOM_MASSES['C'] + (ATOM_MASSES['H']*2) + ATOM_MASSES['S'],
'CH3NO': ATOM_MASSES['C'] + (ATOM_MASSES['H']*3) + ATOM_MASSES['N'] + ATOM_MASSES['O'],
'CH4O': ATOM_MASSES['C'] + (ATOM_MASSES['H']*4) + ATOM_MASSES['O'],
'CH4SO': ATOM_MASSES['C'] + (ATOM_MASSES['H']*4) + ATOM_MASSES['S'] + ATOM_MASSES['O'],
'CO2': ATOM_MASSES['C'] + (ATOM_MASSES['O']*2),
'H2O': (ATOM_MASSES['H']*2) + ATOM_MASSES['O'],
'NH3': ATOM_MASSES['N'] + (ATOM_MASSES['H']*3),
'H3O4P': (ATOM_MASSES['H']*3) + (ATOM_MASSES['O']*4) + ATOM_MASSES['P']
}

#######################################
# HELPERS FOR FRAGMENT MZ CALCULATION #
#######################################
Expand Down
106 changes: 105 additions & 1 deletion spectrum_fundamentals/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,83 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray:
return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)


def _add_nl(neutral_losses, nl_dict, start_aa_index, end_aa_index):
"""
Adds neutral losses (NL) to a dictionary of neutral losses for specific amino acid indices.
This function updates the `nl_dict` by incorporating the provided `neutral_losses` into
the amino acid indices between `start_aa_index` and `end_aa_index`.
:param list neutral_losses: A list of neutral losses to be added to the amino acids.
:param dict nl_dict: A dictionary where the keys are amino acid indices and the values are lists of neutral
losses associated with each index.
:param int start_aa_index: The starting index of the amino acid range to which the neutral losses should be added.
:param int end_aa_index: The ending index of the amino acid range to which the neutral losses should be added.
:returns: Updated dictionary with the added neutral losses for the specified amino acid indices.
"""
first_nl = True
new_nls = {}
for nl in neutral_losses:
for i in range(start_aa_index, end_aa_index):
current_aa_nl = nl_dict[i]
if first_nl:
new_nls[i] = list(set(neutral_losses) - set(current_aa_nl))
if nl not in current_aa_nl:
current_aa_nl.append(nl)
first_nl = False
return nl_dict


def _get_neutral_losses(peptide_sequence, modifications):
"""
Get possible neutral losses and position in a peptide sequence.
:param peptide_sequence: Unmodified peptide sequence
:modifications: modifications dict generated by _get_modifications from modified petide sequence.
:return: Dict with neutral losses position as an ID and composition as its value.
"""
sequence_length = len(peptide_sequence)
keys = range(0, sequence_length - 1)

NL_b_ions = dict([(key, []) for key in keys])
NL_y_ions = dict([(key, []) for key in keys])

for i in range(0, sequence_length):
aa = peptide_sequence[i]
if aa in c.AA_Neutral_losses:
if i in modifications:
if aa == 'M' and modifications[i] == 15.9949146:
NL_b_ions = _add_nl(c.AA_Neutral_losses['M[UNIMOD:35]'], NL_b_ions, i, sequence_length - 1)
NL_y_ions = _add_nl(c.AA_Neutral_losses['M[UNIMOD:35]'], NL_y_ions, sequence_length - i - 1,
sequence_length - 1)
elif aa == 'R' and modifications[i] == 0.984016:
NL_b_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_b_ions, i, sequence_length - 1)
NL_y_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_y_ions, sequence_length - i - 1,
sequence_length - 1)
elif (aa == 'S' or aa=='T') and modifications[i] == 79.9663:
NL_b_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_b_ions, i, sequence_length - 1)
NL_y_ions = _add_nl(c.Mod_Neutral_losses['R[UNIMOD:7]'], NL_y_ions, sequence_length - i - 1,
sequence_length - 1)
else:
NL_b_ions = _add_nl(c.AA_Neutral_losses[aa], NL_b_ions, i, sequence_length - 1)
NL_y_ions = _add_nl(c.AA_Neutral_losses[aa], NL_y_ions, sequence_length - i - 1, sequence_length - 1)
return NL_b_ions, NL_y_ions

def _calculate_nl_score_mass(neutral_loss):
"""
Calculates the score and mass for a given neutral loss (NL).
:param str neutral_loss: The type of neutral loss for which to calculate the score and mass.
This should be a key present in the `Neutral_losses_Mass` dictionary.
:returns: A tuple containing the adjusted score and the mass of the specified neutral loss.
"""
score = 100
mass = 0
mass = c.Neutral_losses_Mass[neutral_loss]
if neutral_loss == 'H2O' or neutral_loss == 'NH3':
score -= 5
else:
score -= 30
return score, mass

def initialize_peaks(
sequence: str,
mass_analyzer: str,
Expand All @@ -141,6 +218,7 @@ def initialize_peaks(
xl_pos: int = -1,
fragmentation_method: str = "HCD",
custom_mods: Optional[Dict[str, float]] = None,
add_neutral_losses: Optional[bool] = False
) -> Tuple[List[dict], int, str, float]:
"""
Generate theoretical peaks for a modified peptide sequence.
Expand All @@ -155,6 +233,7 @@ def initialize_peaks(
:param xl_pos: the position of the crosslinker for non-cleavable XL
:param fragmentation_method: fragmentation method that was used
:param custom_mods: mapping of custom UNIMOD string identifiers ('[UNIMOD:xyz]') to their mass
:param add_neutral_losses: Flag to indicate whether to annotate neutral losses or not
:return: List of theoretical peaks, Flag to indicate if there is a tmt on n-terminus, Un modified peptide sequence
"""
_xl_sanity_check(noncl_xl, peptide_beta_mass, xl_pos)
Expand All @@ -181,6 +260,9 @@ def initialize_peaks(
# add n_term mass to first aa for easy processing in the following calculation
modification_deltas[0] = modification_deltas.get(0, 0.0) + n_term_delta

if add_neutral_losses:
nl_b_ions, nl_y_ions = _get_neutral_losses(sequence, modification_deltas)
nl_ions = [nl_y_ions,nl_b_ions]
mass_arr = np.array([c.AA_MASSES[_] for _ in sequence])
for pos, mod_mass in modification_deltas.items():
mass_arr[pos] += mod_mass
Expand All @@ -201,7 +283,6 @@ def initialize_peaks(
# shape of ion_mzs: (n_ions, n_fragments, max_charge)
charges = np.arange(1, max_charge + 1)
ion_mzs = (sum_array[..., np.newaxis] + charges * c.PARTICLE_MASSES["PROTON"]) / charges

min_mzs, max_mzs = get_min_max_mass(mass_analyzer, ion_mzs, mass_tolerance, unit_mass_tolerance)

# write mz together with min and max value in output list with one dictionary for each ion
Expand All @@ -216,8 +297,31 @@ def initialize_peaks(
"mass": ion_mzs[ion_type, number, charge], # mz
"min_mass": min_mzs[ion_type, number, charge], # min mz
"max_mass": max_mzs[ion_type, number, charge], # max mz
"neutral_loss": '',
"fragment_score": 100,
}
)
if not add_neutral_losses:
continue
for nl in nl_ions[ion_type][number]:
nl_score, nl_mass = _calculate_nl_score_mass(nl)
ion_mass = sum_array[ion_type,number] - nl_mass
ion_mz = (ion_mass + (charge+1) * c.PARTICLE_MASSES["PROTON"]) / (charge+1)
min_mz, max_mz = get_min_max_mass(mass_analyzer, ion_mz, mass_tolerance, unit_mass_tolerance)

fragments_meta_data.append(
{
"ion_type": ion_types[ion_type], # ion type
"no": number + 1, # no
"charge": charge + 1, # charge
"mass": ion_mz, # mz
"min_mass": min_mz, # min mz
"max_mass": max_mz, # max mz
"neutral_loss": nl,
"fragment_score": 100- nl_score,
}
)


fragments_meta_data = sorted(fragments_meta_data, key=itemgetter("mass"))

Expand Down
48 changes: 48 additions & 0 deletions tests/unit_tests/test_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,51 @@ def test_catches_redundant_order(self):
_ = fragments.generate_fragment_ion_annotations(
ion_types=["y", "b"], order=("ion_type", "position", "ion_type")
)


class TestNeutralLossFunctions(unittest.TestCase):

def test_add_nl(self):
"""
Test the _add_nl function with a range of amino acids and neutral losses.
"""
neutral_losses = ['H2O', 'NH3']
nl_dict = {
0: ['CO2'], # Initial dictionary contains CO2 for index 0
1: ['H2O'], # Initial dictionary contains H2O for index 1
}
start_aa_index = 0
end_aa_index = 2 # Only indices 0 and 1 will be modified

# Expected output after the neutral losses are added
expected_nl_dict = {
0: ['CO2', 'H2O', 'NH3'], # Both H2O and NH3 added
1: ['H2O', 'NH3'] # NH3 added, H2O already present
}

result = fragments._add_nl(neutral_losses, nl_dict, start_aa_index, end_aa_index)
self.assertEqual(result, expected_nl_dict)

def test_calculate_nl_score_mass(self):
"""
Test the _calculate_nl_score_mass function with various neutral losses.
"""
# Test for H2O, which should reduce the score by 5
score, mass = fragments._calculate_nl_score_mass('H2O')
self.assertEqual(score, 95) # Starting score of 100 - 5
self.assertEqual(mass, 18.01056467) # Mass of H2O

# Test for NH3, which should also reduce the score by 5
score, mass = fragments._calculate_nl_score_mass('NH3')
self.assertEqual(score, 95) # Starting score of 100 - 5
self.assertEqual(mass, 17.026549105) # Mass of NH3

# Test for CO2, which should reduce the score by 30
score, mass = fragments._calculate_nl_score_mass('CO2')
self.assertEqual(score, 70) # Starting score of 100 - 30
self.assertEqual(mass, 43.9898292) # Mass of CO2

# Test for C2H4O2, which should reduce the score by 30
score, mass = fragments._calculate_nl_score_mass('C2H4O2')
self.assertEqual(score, 70) # Starting score of 100 - 30
self.assertEqual(mass, 60.02112934)

0 comments on commit 847ddce

Please sign in to comment.