diff --git a/spectrum_fundamentals/annotation/annotation.py b/spectrum_fundamentals/annotation/annotation.py index fceeef9..db0a8fa 100644 --- a/spectrum_fundamentals/annotation/annotation.py +++ b/spectrum_fundamentals/annotation/annotation.py @@ -1,4 +1,5 @@ import logging +import math from typing import Dict, List, Optional, Tuple, Union import numpy as np @@ -28,11 +29,11 @@ def match_peaks( :param unmod_sequence: Unmodified peptide sequence :param charge: Precursor charge :return: List of matched/annotated peaks - :return: float with sum of experimental peaks intensities + :return: Float with log10 of sum of labeled experimental peaks intensities """ start_peak = 0 no_of_peaks = len(peaks_intensity) - sum_intensities = sum(peaks_intensity) + sum_intensities = 0.0 max_intensity = 1.0 row_list = [] temp_list = [] @@ -71,6 +72,7 @@ def match_peaks( "intensity": peak_intensity, } ) + sum_intensities += peak_intensity if peak_intensity > max_intensity and fragment_no < seq_len: max_intensity = float(peak_intensity) matched_peak = True @@ -79,7 +81,12 @@ def match_peaks( for row in row_list: row["intensity"] = float(row["intensity"]) / max_intensity temp_list.append(row) - return temp_list, sum_intensities + if sum_intensities == 0: + log_sum_intensities = 0 # todo check ists okay? + else: + log_sum_intensities = math.log(sum_intensities) + + return temp_list, log_sum_intensities def handle_multiple_matches( @@ -151,7 +158,7 @@ def annotate_spectra( continue raw_file_annotations.append(results) results_df = pd.DataFrame(raw_file_annotations) - results_df.columns = ["INTENSITIES", "MZ", "CALCULATED_MASS", "removed_peaks", "SUM_INTENSITIES"] + results_df.columns = ["INTENSITIES", "MZ", "CALCULATED_MASS", "removed_peaks", "LOG_SUM_INTENSITIES"] logger.info(f"Removed {results_df['removed_peaks'].describe()} redundant peaks") return results_df @@ -247,7 +254,7 @@ def parallel_annotate( ) if not unmod_sequence: return None - matched_peaks, sum_intensities = match_peaks( + matched_peaks, log_sum_intensities = match_peaks( fragments_meta_data, spectrum[index_columns["INTENSITIES"]], spectrum[index_columns["MZ"]], @@ -258,9 +265,9 @@ def parallel_annotate( if len(matched_peaks) == 0: intensity = np.full(174, 0.0) mass = np.full(174, 0.0) - return intensity, mass, calc_mass, 0, sum_intensities + return intensity, mass, calc_mass, 0, log_sum_intensities matched_peaks, removed_peaks = handle_multiple_matches(matched_peaks) intensities, mass = generate_annotation_matrix( matched_peaks, unmod_sequence, spectrum[index_columns["PRECURSOR_CHARGE"]] ) - return intensities, mass, calc_mass, removed_peaks, sum_intensities + return intensities, mass, calc_mass, removed_peaks, log_sum_intensities diff --git a/tests/unit_tests/data/spectrum_output.csv b/tests/unit_tests/data/spectrum_output.csv index cbe1f78..8cb4fac 100644 --- a/tests/unit_tests/data/spectrum_output.csv +++ b/tests/unit_tests/data/spectrum_output.csv @@ -1,4 +1,4 @@ -,INTENSITIES,MZ,CALCULATED_MASS,removed_peaks,SUM_INTENSITIES -0,"[0.83793371,0.,0.29878655,0.,0.,0.,0.,0.,0.09085768,0.,0.,1.,0.,0.34441502,0.,0.,0.,0.,0.,0.25216961,0.04101572,0.,0.25216961,0.,0.54783487,0.,0.,0.54783487,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]","[132.44059902,0.,44.83719672,0.,0.,0.,0.,0.,68.5568945,0.,0.,68.43836233,0.,182.38682974,0.,0.,0.,0.,0.,239.34301275,160.41780244,0.,239.34301275,0.,549.28001133,0.,0.,549.28001133,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]",679.26692967,1,174.55355364659576 -1,"[0.,0.,-1.,0.,0.,-1.,0.,1.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.94578593,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]","[0.,0.,-1.,0.,0.,-1.,0.,141.09999273,-1.,0.,0.,-1.,0.,0.,-1.,0.,197.77185605,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]",3949.2251146699996,0,156.28084972251995 -2,"[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]",280.09153467,0,17.290124545430817 +,INTENSITIES,MZ,CALCULATED_MASS,removed_peaks,LOG_SUM_INTENSITIES +0,"[0.83793371,0.,0.29878655,0.,0.,0.,0.,0.,0.09085768,0.,0.,1.,0.,0.34441502,0.,0.,0.,0.,0.,0.25216961,0.04101572,0.,0.25216961,0.,0.54783487,0.,0.,0.54783487,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]","[132.44059902,0.,44.83719672,0.,0.,0.,0.,0.,68.5568945,0.,0.,68.43836233,0.,182.38682974,0.,0.,0.,0.,0.,239.34301275,160.41780244,0.,239.34301275,0.,549.28001133,0.,0.,549.28001133,0.,0.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.]",679.26692967,1,3.685252 +1,"[0.,0.,-1.,0.,0.,-1.,0.,1.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.94578593,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]","[0.,0.,-1.,0.,0.,-1.,0.,141.09999273,-1.,0.,0.,-1.,0.,0.,-1.,0.,197.77185605,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.,0.,0.,-1.]",3949.2251146699996,0,2.892972 +2,"[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]",280.09153467,0,0.000000 diff --git a/tests/unit_tests/test_annotation.py b/tests/unit_tests/test_annotation.py index 25d800a..44b38c7 100644 --- a/tests/unit_tests/test_annotation.py +++ b/tests/unit_tests/test_annotation.py @@ -27,6 +27,7 @@ def test_annotate_spectra(self): spectrum_input["MZ"] = spectrum_input["MZ"].map(lambda mz: np.array(mz)) result = annotation.annotate_spectra(spectrum_input) + print(result) pd.testing.assert_frame_equal(expected_result, result) def test_handle_multiple_matches(self):