Skip to content

Commit

Permalink
cover xisearch and fix issue of mach_peaks are zero
Browse files Browse the repository at this point in the history
  • Loading branch information
Mostafa Kalhor committed Nov 15, 2023
1 parent d8ca8a5 commit 125adee
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 25 deletions.
58 changes: 42 additions & 16 deletions spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def generate_annotation_matrix(

return intensity, mass


def parallel_annotate(spectrum: np.ndarray, index_columns: dict, xl:bool = False) -> Optional[Tuple[np.ndarray, np.ndarray, float, int]]:
"""
Perform parallel annotation of a spectrum.
Expand Down Expand Up @@ -368,6 +369,7 @@ def parallel_annotate(spectrum: np.ndarray, index_columns: dict, xl:bool = Fals
unmod_sequence_a,
spectrum[index_columns["PRECURSOR_CHARGE"]],
)

matched_peaks_b = match_peaks(
fragments_meta_data_b,
spectrum[index_columns["INTENSITIES"]],
Expand All @@ -377,23 +379,38 @@ def parallel_annotate(spectrum: np.ndarray, index_columns: dict, xl:bool = Fals
spectrum[index_columns["PRECURSOR_CHARGE"]],
)

if len(matched_peaks_a) == 0:
intensity = np.full(348, 0.0)
mass = np.full(348, 0.0)
return intensity_a, mass_a, calc_mass_a, 0
if len(matched_peaks_b) == 0:
intensity = np.full(348, 0.0)
mass = np.full(348, 0.0)
return intensity_b, mass_b, calc_mass_b, 0
matched_peaks_a, removed_peaks_a = handle_multiple_matches(matched_peaks_a)
matched_peaks_b, removed_peaks_b = handle_multiple_matches(matched_peaks_b)
intensities_a, mass_a = generate_annotation_matrix_xl(
matched_peaks_a, unmod_sequence_a, spectrum[index_columns["CROSSLINKER_POSITION_A"]]
if len(matched_peaks_a) == 0 and len(matched_peaks_b)==0 :
intensities_a = np.full(348, 0.0)
mass_a = np.full(348, 0.0)
intensities_b = np.full(348, 0.0)
mass_b = np.full(348, 0.0)
return intensities_a, intensities_b, mass_a, mass_b, calc_mass_a, calc_mass_b, 0, 0
elif len(matched_peaks_a) == 0 and len(matched_peaks_b)!=0 :
intensities_a = np.full(348, 0.0)
mass_a = np.full(348, 0.0)
matched_peaks_b, removed_peaks_b = handle_multiple_matches(matched_peaks_b)
intensities_b, mass_b = generate_annotation_matrix_xl(
matched_peaks_b, unmod_sequence_b, spectrum[index_columns["CROSSLINKER_POSITION_B"]]
)
intensities_b, mass_b = generate_annotation_matrix_xl(
matched_peaks_b, unmod_sequence_b, spectrum[index_columns["CROSSLINKER_POSITION_B"]]
return intensities_a, intensities_b, mass_a, mass_b, calc_mass_a, calc_mass_b, 0, removed_peaks_b
elif len(matched_peaks_a) != 0 and len(matched_peaks_b)==0 :
intensities_b = np.full(348, 0.0)
mass_b = np.full(348, 0.0)
matched_peaks_a, removed_peaks_a = handle_multiple_matches(matched_peaks_a)
intensities_a, mass_a = generate_annotation_matrix_xl(
matched_peaks_a, unmod_sequence_a, spectrum[index_columns["CROSSLINKER_POSITION_A"]]
)
return intensities_a, intensities_b, mass_a, mass_b, calc_mass_a, calc_mass_b, removed_peaks_a, removed_peaks_b
return intensities_a, intensities_b, mass_a, mass_b, calc_mass_a, calc_mass_b, removed_peaks_a, 0
else:
matched_peaks_a, removed_peaks_a = handle_multiple_matches(matched_peaks_a)
matched_peaks_b, removed_peaks_b = handle_multiple_matches(matched_peaks_b)
intensities_a, mass_a = generate_annotation_matrix_xl(
matched_peaks_a, unmod_sequence_a, spectrum[index_columns["CROSSLINKER_POSITION_A"]]
)
intensities_b, mass_b = generate_annotation_matrix_xl(
matched_peaks_b, unmod_sequence_b, spectrum[index_columns["CROSSLINKER_POSITION_B"]]
)
return intensities_a, intensities_b, mass_a, mass_b, calc_mass_a, calc_mass_b, removed_peaks_a, removed_peaks_b


else:
Expand Down Expand Up @@ -425,4 +442,13 @@ def parallel_annotate(spectrum: np.ndarray, index_columns: dict, xl:bool = Fals
matched_peaks, unmod_sequence, spectrum[index_columns["PRECURSOR_CHARGE"]]
)
return intensities, mass, calc_mass, removed_peaks











4 changes: 2 additions & 2 deletions spectrum_fundamentals/metrics/fragments_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def count_with_ion_mask(

config = Config()
config.read(CONFIG_PATH)
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
if len(ion_mask) == 0:

ion_mask = scipy.sparse.csr_matrix(np.ones((348, 1)))
Expand Down Expand Up @@ -147,7 +147,7 @@ def calc(self):
"""Adds columns with count, fraction and fraction_predicted features to metrics_val dataframe."""
config = Config()
config.read(CONFIG_PATH)
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
true_intensities_a = self.true_intensities[:,0:348]
true_intensities_b = self.true_intensities[:,348:]
pred_intensities_a = self.pred_intensities[:,0:348]
Expand Down
28 changes: 23 additions & 5 deletions spectrum_fundamentals/metrics/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def get_target_decoy_label(reverse: bool):

def add_common_features(self):
"""Add features used by both Andromeda and Prosit feature scoring sets."""
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
self.metrics_val["missedCleavages_A"] = self.metadata["SEQUENCE_A"].apply(Percolator.count_missed_cleavages)
self.metrics_val["missedCleavages_B"] = self.metadata["SEQUENCE_B"].apply(Percolator.count_missed_cleavages)
self.metrics_val["KR_A"] = self.metadata["SEQUENCE_A"].apply(Percolator.count_arginines_and_lysines)
Expand Down Expand Up @@ -319,12 +319,15 @@ def add_common_features(self):

def add_percolator_metadata_columns(self):
"""Add metadata columns needed by percolator, e.g. to identify a PSM."""
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE_A","MODIFIED_SEQUENCE_B", "PRECURSOR_CHARGE"]
self.metrics_val["Peptide"] = (self.metadata["MODIFIED_SEQUENCE_A"]+ "_" +
self.metadata["MODIFIED_SEQUENCE_B"]).apply(lambda x: "_." + x + "._")
self.metrics_val[self.prot_col_name] = (self.metadata["MODIFIED_SEQUENCE_A"]+ "_" +
self.metadata["MODIFIED_SEQUENCE_B"])
#self.metrics_val["label_pep_a"] = self.metadata["label_pep_a"]
#self.metrics_val["label_pep_b"] = self.metadata["label_pep_b"]
self.metrics_val["Label"] = self.target_decoy_labels
else:
spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE", "PRECURSOR_CHARGE"]
self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._")
Expand Down Expand Up @@ -380,9 +383,8 @@ def get_indices_below_fdr(self, feature_name: str, fdr_cutoff: float = 0.01) ->
# scores_df['Sequence'] = self.metadata['SEQUENCE']
scores_df = scores_df.sort_values(feature_name, ascending=False)
logger.debug(scores_df.head(100))

scores_df["fdr"] = Percolator.calculate_fdrs(scores_df["Label"])

# filter for targets only
scores_df = scores_df[scores_df["Label"] == TargetDecoyLabel.TARGET]

Expand Down Expand Up @@ -413,6 +415,22 @@ def calculate_fdrs(sorted_labels: Union[pd.Series, np.ndarray]) -> np.ndarray:
cumulative_decoy_count = np.cumsum(sorted_labels == TargetDecoyLabel.DECOY) + 1
cumulative_target_count = np.cumsum(sorted_labels == TargetDecoyLabel.TARGET) + 1
return Percolator.fdrs_to_qvals(cumulative_decoy_count / cumulative_target_count)

#def calculate_fdrs_xl(sorted_labels: Union[pd.Series, np.ndarray]) -> np.ndarray:
"""
Calculate FDR.
:param sorted_labels: array with labels sorted (target, decoy)
:return: array with calculated FDRs
"""
#if isinstance(sorted_labels, pd.Series):
#sorted_labels = sorted_labels.to_numpy()
#cumulative_decoy_count_1 = np.cumsum(sorted_labels == TargetDecoyLabel.DECOY) + 1
#cumulative_decoy_count_2 = np.cumsum(sorted_labels == TargetDecoyLabel.DECOY) + 1
#cumulative_target_count = np.cumsum(sorted_labels == TargetDecoyLabel.TARGET) + 1
#return Percolator.fdrs_to_qvals(cumulative_decoy_count_1 - cumulative_decoy_count_2) / (cumulative_target_count)



@staticmethod
def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -465,7 +483,7 @@ def calc(self):
if current_fdr >= 0.1:
lda_failed = True
break
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(self.config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0
else:
if lda_failed:
Expand Down
4 changes: 2 additions & 2 deletions spectrum_fundamentals/metrics/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def spectral_angle(
"""
config = Config()
config.read(CONFIG_PATH)
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx", "xisearch"]):
if charge != 0:
if charge == 1:
boolean_array = constants.SINGLE_CHARGED_MASK_XL
Expand Down Expand Up @@ -435,7 +435,7 @@ def calc(self, all_features: bool):
"""
config = Config()
config.read(CONFIG_PATH)
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx"]):
if any(config.search_type.lower() == s.lower() for s in ["plink2", "xlinkx","xisearch"]):
true_intensities_a = self.true_intensities[:,0:348]
true_intensities_b = self.true_intensities[:,348:]
pred_intensities_a = self.pred_intensities[:,0:348]
Expand Down

0 comments on commit 125adee

Please sign in to comment.