Skip to content

Commit

Permalink
add log sum of intensities
Browse files Browse the repository at this point in the history
  • Loading branch information
maim02 committed Oct 24, 2023
1 parent fc40523 commit 1804412
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
21 changes: 14 additions & 7 deletions spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import math
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -28,11 +29,11 @@ def match_peaks(
:param unmod_sequence: Unmodified peptide sequence
:param charge: Precursor charge
:return: List of matched/annotated peaks
:return: float with sum of experimental peaks intensities
:return: Float with log10 of sum of labeled experimental peaks intensities
"""
start_peak = 0
no_of_peaks = len(peaks_intensity)
sum_intensities = sum(peaks_intensity)
sum_intensities = 0.0
max_intensity = 1.0
row_list = []
temp_list = []
Expand Down Expand Up @@ -71,6 +72,7 @@ def match_peaks(
"intensity": peak_intensity,
}
)
sum_intensities += peak_intensity
if peak_intensity > max_intensity and fragment_no < seq_len:
max_intensity = float(peak_intensity)
matched_peak = True
Expand All @@ -79,7 +81,12 @@ def match_peaks(
for row in row_list:
row["intensity"] = float(row["intensity"]) / max_intensity
temp_list.append(row)
return temp_list, sum_intensities
if sum_intensities == 0:
log_sum_intensities = 0 # todo check ists okay?
else:
log_sum_intensities = math.log(sum_intensities)

return temp_list, log_sum_intensities


def handle_multiple_matches(
Expand Down Expand Up @@ -151,7 +158,7 @@ def annotate_spectra(
continue
raw_file_annotations.append(results)
results_df = pd.DataFrame(raw_file_annotations)
results_df.columns = ["INTENSITIES", "MZ", "CALCULATED_MASS", "removed_peaks", "SUM_INTENSITIES"]
results_df.columns = ["INTENSITIES", "MZ", "CALCULATED_MASS", "removed_peaks", "LOG_SUM_INTENSITIES"]
logger.info(f"Removed {results_df['removed_peaks'].describe()} redundant peaks")

return results_df
Expand Down Expand Up @@ -247,7 +254,7 @@ def parallel_annotate(
)
if not unmod_sequence:
return None
matched_peaks, sum_intensities = match_peaks(
matched_peaks, log_sum_intensities = match_peaks(
fragments_meta_data,
spectrum[index_columns["INTENSITIES"]],
spectrum[index_columns["MZ"]],
Expand All @@ -258,9 +265,9 @@ def parallel_annotate(
if len(matched_peaks) == 0:
intensity = np.full(174, 0.0)
mass = np.full(174, 0.0)
return intensity, mass, calc_mass, 0, sum_intensities
return intensity, mass, calc_mass, 0, log_sum_intensities
matched_peaks, removed_peaks = handle_multiple_matches(matched_peaks)
intensities, mass = generate_annotation_matrix(
matched_peaks, unmod_sequence, spectrum[index_columns["PRECURSOR_CHARGE"]]
)
return intensities, mass, calc_mass, removed_peaks, sum_intensities
return intensities, mass, calc_mass, removed_peaks, log_sum_intensities
8 changes: 4 additions & 4 deletions tests/unit_tests/data/spectrum_output.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,INTENSITIES,MZ,CALCULATED_MASS,removed_peaks,SUM_INTENSITIES
0,"[0.83793371,0.,0.29878655,0.,0.,0.,0.,0.,0.09085768,0.,0.,1.,0.,0.34441502,0.,0.,0.,0.,0.,0.25216961,0.04101572,0.,0.25216961,0.,0.54783487,0.,0.,0.54783487,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]","[132.44059902,0.,44.83719672,0.,0.,0.,0.,0.,68.5568945,0.,0.,68.43836233,0.,182.38682974,0.,0.,0.,0.,0.,239.34301275,160.41780244,0.,239.34301275,0.,549.28001133,0.,0.,549.28001133,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]",679.26692967,1,174.55355364659576
1,"[0.,0.,-1.,0.,0.,-1.,0.,1.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.94578593,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]","[0.,0.,-1.,0.,0.,-1.,0.,141.09999273,-1.,0.,0.,-1.,0.,0.,-1.,0.,197.77185605,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]",3949.2251146699996,0,156.28084972251995
2,"[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]",280.09153467,0,17.290124545430817
,INTENSITIES,MZ,CALCULATED_MASS,removed_peaks,LOG_SUM_INTENSITIES
0,"[0.83793371,0.,0.29878655,0.,0.,0.,0.,0.,0.09085768,0.,0.,1.,0.,0.34441502,0.,0.,0.,0.,0.,0.25216961,0.04101572,0.,0.25216961,0.,0.54783487,0.,0.,0.54783487,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]","[132.44059902,0.,44.83719672,0.,0.,0.,0.,0.,68.5568945,0.,0.,68.43836233,0.,182.38682974,0.,0.,0.,0.,0.,239.34301275,160.41780244,0.,239.34301275,0.,549.28001133,0.,0.,549.28001133,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]",679.26692967,1,3.685252
1,"[0.,0.,-1.,0.,0.,-1.,0.,1.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.94578593,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]","[0.,0.,-1.,0.,0.,-1.,0.,141.09999273,-1.,0.,0.,-1.,0.,0.,-1.,0.,197.77185605,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]",3949.2251146699996,0,2.892972
2,"[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]",280.09153467,0,0.000000
1 change: 1 addition & 0 deletions tests/unit_tests/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_annotate_spectra(self):
spectrum_input["MZ"] = spectrum_input["MZ"].map(lambda mz: np.array(mz))

result = annotation.annotate_spectra(spectrum_input)
print(result)
pd.testing.assert_frame_equal(expected_result, result)

def test_handle_multiple_matches(self):
Expand Down

0 comments on commit 1804412

Please sign in to comment.