Skip to content

Commit

Permalink
Merge pull request #77 from wilhelm-lab/release/0.4.3
Browse files Browse the repository at this point in the history
Release/0.4.3
  • Loading branch information
picciama authored Oct 31, 2023
2 parents 57135ee + 4739dbc commit 9b90193
Show file tree
Hide file tree
Showing 16 changed files with 622 additions and 679 deletions.
2 changes: 1 addition & 1 deletion .cookietemple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu
email: [email protected]
project_name: spectrum_fundamentals
project_short_description: Fundamentals public repo
version: 0.4.2
version: 0.4.3
license: MIT
4 changes: 2 additions & 2 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name-template: "0.4.2 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.4.2 # <<COOKIETEMPLE_FORCE_BUMP>>
name-template: "0.4.3 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.4.3 # <<COOKIETEMPLE_FORCE_BUMP>>
exclude-labels:
- "skip-changelog"

Expand Down
2 changes: 1 addition & 1 deletion cookietemple.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.2
current_version = 0.4.3

[bumpversion_files_whitelisted]
init_file = spectrum_fundamentals/__init__.py
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@
# the built documents.
#
# The short X.Y version.
version = "0.4.2"
version = "0.4.3"
# The full version, including alpha/beta/rc tags.
release = "0.4.2"
release = "0.4.3"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
1,196 changes: 583 additions & 613 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "spectrum_fundamentals"
version = "0.4.2" # <<COOKIETEMPLE_FORCE_BUMP>>
version = "0.4.3" # <<COOKIETEMPLE_FORCE_BUMP>>
description = "Fundamentals public repo"
authors = ["WassimG <[email protected]>"]
license = "MIT"
Expand All @@ -20,7 +20,7 @@ classifiers = [


[tool.poetry.dependencies]
python = "^3.8.0"
python = ">=3.8.0,<3.11.0"
click = ">=8.0.0"
rich = ">=10.3.0"
PyYAML = ">=5.4.1"
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Initialize fundamentals."""
__author__ = "Victor Giurcoiu"
__email__ = "[email protected]"
__version__ = "0.4.2"
__version__ = "0.4.3"

import logging
import logging.handlers
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@click.command()
@click.version_option(version="0.4.2", message=click.style("spectrum_fundamentals Version: 0.4.2"))
@click.version_option(version="0.4.3", message=click.style("spectrum_fundamentals Version: 0.4.3"))
def main() -> None:
"""spectrum_fundamentals."""

Expand Down
1 change: 0 additions & 1 deletion spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def annotate_spectra(
raw_file_annotations.append(results)
results_df = pd.DataFrame(raw_file_annotations)
results_df.columns = ["INTENSITIES", "MZ", "CALCULATED_MASS", "removed_peaks"]
logger.info(f"Removed {results_df['removed_peaks'].describe()} redundant peaks")

return results_df

Expand Down
9 changes: 4 additions & 5 deletions spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
"C[UNIMOD:4]": 2,
"K[UNIMOD:737]": 22,
"K[UNIMOD:2016]": 22,
"K[UNIMOD:2016]": 22,
"K[UNIMOD:214]": 22,
"K[UNIMOD:730]": 22,
"S[UNIMOD:21]": 25,
Expand All @@ -73,16 +72,16 @@
"(ox)": "[UNIMOD:35]",
"(Oxidation (M))": "[UNIMOD:35]",
"(tm)": "[UNIMOD:737]",
"_(tm)": "_[UNIMOD:737]",
"_(tm)": "_[UNIMOD:737]-",
"K(tm)": "K[UNIMOD:737]",
"(i4)": "[UNIMOD:214]",
"_(i4)": "_[UNIMOD:214]",
"_(i4)": "_[UNIMOD:214]-",
"K(i4)": "K[UNIMOD:214]",
"(i8)": "[UNIMOD:730]",
"_(i8)": "_[UNIMOD:730]",
"_(i8)": "_[UNIMOD:730]-",
"K(i8)": "K[UNIMOD:730]",
"(tmp)": "[UNIMOD:2016]",
"_(tmp)": "_[UNIMOD:2016]",
"_(tmp)": "_[UNIMOD:2016]-",
"K(tmp)": "K[UNIMOD:2016]",
"(ph)": "[UNIMOD:21]",
"(Phospho (STY))": "[UNIMOD:21]",
Expand Down
7 changes: 4 additions & 3 deletions spectrum_fundamentals/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ def _get_modifications(peptide_sequence: str) -> Optional[Tuple[Dict[int, float]
modification_mass = constants.MOD_MASSES
# Handle terminal modifications here
for possible_tmt_mod in constants.TMT_MODS.values():
if peptide_sequence.startswith(possible_tmt_mod): # TMT_6
n_term_tmt = possible_tmt_mod + "-"
if peptide_sequence.startswith(n_term_tmt):
tmt_n_term = 2
modification_deltas.update({0: constants.MOD_MASSES[possible_tmt_mod]})
peptide_sequence = peptide_sequence[len(possible_tmt_mod) :]
peptide_sequence = peptide_sequence[len(n_term_tmt) :]
break

if "(" in peptide_sequence:
Expand Down Expand Up @@ -183,7 +184,7 @@ def initialize_peaks(
for ion_type in range(0, number_of_ion_types): # generate all ion types
# Check for neutral loss here
mass = (ion_type_masses[ion_type] + charge_delta) / charge
min_mass, max_mass = get_min_max_mass(mass_analyzer, mass)
min_mass, max_mass = get_min_max_mass(mass_analyzer, mass, mass_tolerance, unit_mass_tolerance)
fragments_meta_data.append(
{
"ion_type": ion_types[ion_type], # ion type
Expand Down
36 changes: 5 additions & 31 deletions spectrum_fundamentals/metrics/percolator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import enum
import hashlib
import logging
import re
import subprocess
from typing import Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -60,26 +57,15 @@ def __init__(
all_features_flag: bool = False,
regression_method: str = "lowess",
fdr_cutoff: float = 0.01,
percolator_version: Optional[float] = 3.05,
):
"""Initialize a Percolator obj."""
self.metadata = metadata
self.input_type = input_type
self.all_features_flag = all_features_flag
self.regression_method = regression_method
self.fdr_cutoff = fdr_cutoff

self._resolve_percolator_compatibility(percolator_version)
super().__init__(pred_intensities, true_intensities, mz)

def _resolve_percolator_compatibility(self, percolator_version: Optional[float] = None):
if percolator_version is None:
result = subprocess.run(["percolator", "-h"], capture_output=True, text=True)
version_line = result.stderr.splitlines()[0].strip()
version = version_line.split("version ")[1]
percolator_version = float(re.sub(r"\.[^.]+$", "", version))
self.prot_col_name = "Proteins" if percolator_version >= 3.06 else "Protein"

@staticmethod
def sample_balanced_over_bins(retention_time_df: pd.DataFrame, sample_size: int = 5000) -> pd.Index:
"""
Expand Down Expand Up @@ -170,18 +156,6 @@ def get_aligned_predicted_retention_times(

return aligned_rts_predicted

@staticmethod
def get_scannr(metadata_subset: Union[pd.Series, Tuple[str, int]]) -> int:
"""
Creates a hash of the raw_file and scan number to use as a unique scan number in percolator.
:param metadata_subset: tuple of (raw_file, scan_number)
:return: hashed unique id
"""
raw_file, scan_number = metadata_subset
s = f"{raw_file}{scan_number}".encode()
return int(hashlib.sha224(s).hexdigest()[:6], 16)

@staticmethod
def get_delta_score(scores_df: pd.DataFrame, scoring_feature: str) -> np.ndarray:
"""
Expand Down Expand Up @@ -296,11 +270,11 @@ def add_percolator_metadata_columns(self):
spec_id_cols.append("SCAN_EVENT_NUMBER")
self.metrics_val["SpecId"] = self.metadata[spec_id_cols].apply(Percolator.get_specid, axis=1)
self.metrics_val["Label"] = self.target_decoy_labels
self.metrics_val["ScanNr"] = self.metadata[["RAW_FILE", "SCAN_NUMBER"]].apply(Percolator.get_scannr, axis=1)

self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"]
self.metrics_val["filename"] = self.metadata["RAW_FILE"]
self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._")

self.metrics_val[self.prot_col_name] = self.metadata[
self.metrics_val["Proteins"] = self.metadata[
"MODIFIED_SEQUENCE"
] # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence

Expand Down Expand Up @@ -397,8 +371,8 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray:

def _reorder_columns_for_percolator(self):
all_columns = self.metrics_val.columns
first_columns = ["SpecId", "Label", "ScanNr"]
last_columns = ["Peptide", "Protein"] if "Protein" in all_columns else ["Peptide", "Proteins"]
first_columns = ["SpecId", "Label", "ScanNr", "filename"]
last_columns = ["Peptide", "Proteins"]
mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns))
new_columns = first_columns + sorted(mid_columns) + last_columns
self.metrics_val = self.metrics_val[new_columns]
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/data/perc_output.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,SpecId,Label,ScanNr,CID,Charge1,Charge2,Charge3,Charge4,Charge5,Charge6,HCD,KR,Mass,RT,UnknownFragmentationMethod,abs_rt_diff,collision_energy_aligned,count_not_observed_and_not_predicted,count_not_observed_and_not_predicted_b,count_not_observed_and_not_predicted_y,count_not_observed_but_predicted,count_not_observed_but_predicted_b,count_not_observed_but_predicted_y,count_observed,count_observed_and_predicted,count_observed_and_predicted_b,count_observed_and_predicted_y,count_observed_b,count_observed_but_not_predicted,count_observed_but_not_predicted_b,count_observed_but_not_predicted_y,count_observed_y,count_predicted,count_predicted_b,count_predicted_y,fraction_not_observed_and_not_predicted,fraction_not_observed_and_not_predicted_b,fraction_not_observed_and_not_predicted_b_vs_predicted_b,fraction_not_observed_and_not_predicted_vs_predicted,fraction_not_observed_and_not_predicted_y,fraction_not_observed_and_not_predicted_y_vs_predicted_y,fraction_not_observed_but_predicted,fraction_not_observed_but_predicted_b,fraction_not_observed_but_predicted_b_vs_predicted,fraction_not_observed_but_predicted_vs_predicted,fraction_not_observed_but_predicted_y,fraction_not_observed_but_predicted_y_vs_predicted,fraction_observed,fraction_observed_and_predicted,fraction_observed_and_predicted_b,fraction_observed_and_predicted_b_vs_predicted_b,fraction_observed_and_predicted_vs_predicted,fraction_observed_and_predicted_y,fraction_observed_and_predicted_y_vs_predicted_y,fraction_observed_b,fraction_observed_but_not_predicted,fraction_observed_but_not_predicted_b,fraction_observed_but_not_predicted_b_vs_predicted_b,fraction_observed_but_not_predicted_vs_predicted,fraction_observed_but_not_predicted_y,fraction_observed_but_not_predicted_y_vs_predicted_y,fraction_observed_y,fraction_predicted,fraction_predicted_b,fraction_predicted_y,iRT,lda_scores,missedCleavages,modified_cosine,pearson_corr,pred_RT,sequence_length,spectral_angle,Peptide,Protein
,SpecId,Label,ScanNr,CID,Charge1,Charge2,Charge3,Charge4,Charge5,Charge6,HCD,KR,Mass,RT,UnknownFragmentationMethod,abs_rt_diff,collision_energy_aligned,count_not_observed_and_not_predicted,count_not_observed_and_not_predicted_b,count_not_observed_and_not_predicted_y,count_not_observed_but_predicted,count_not_observed_but_predicted_b,count_not_observed_but_predicted_y,count_observed,count_observed_and_predicted,count_observed_and_predicted_b,count_observed_and_predicted_y,count_observed_b,count_observed_but_not_predicted,count_observed_but_not_predicted_b,count_observed_but_not_predicted_y,count_observed_y,count_predicted,count_predicted_b,count_predicted_y,fraction_not_observed_and_not_predicted,fraction_not_observed_and_not_predicted_b,fraction_not_observed_and_not_predicted_b_vs_predicted_b,fraction_not_observed_and_not_predicted_vs_predicted,fraction_not_observed_and_not_predicted_y,fraction_not_observed_and_not_predicted_y_vs_predicted_y,fraction_not_observed_but_predicted,fraction_not_observed_but_predicted_b,fraction_not_observed_but_predicted_b_vs_predicted,fraction_not_observed_but_predicted_vs_predicted,fraction_not_observed_but_predicted_y,fraction_not_observed_but_predicted_y_vs_predicted,fraction_observed,fraction_observed_and_predicted,fraction_observed_and_predicted_b,fraction_observed_and_predicted_b_vs_predicted_b,fraction_observed_and_predicted_vs_predicted,fraction_observed_and_predicted_y,fraction_observed_and_predicted_y_vs_predicted_y,fraction_observed_b,fraction_observed_but_not_predicted,fraction_observed_but_not_predicted_b,fraction_observed_but_not_predicted_b_vs_predicted_b,fraction_observed_but_not_predicted_vs_predicted,fraction_observed_but_not_predicted_y,fraction_observed_but_not_predicted_y_vs_predicted_y,fraction_observed_y,fraction_predicted,fraction_predicted_b,fraction_predicted_y,iRT,lda_scores,missedCleavages,modified_cosine,pearson_corr,pred_RT,sequence_length,spectral_angle,Peptide,Proteins
0,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02-7978-AAIGEATRL-2-1,1,10203379,0,0,1,0,0,0,0,1,1,900.50288029264,0.5000000183883155,0,0.0,0.3,2.0,1,1,2.0,1,1,4.0,2.0,1,1,1,2.0,0,2,3,4.0,2,2,0.25,0.3333333333333333,0.5,0.5,0.2,0.5,0.25,0.3333333333333333,0.5,0.5,0.2,0.5,0.5,0.25,0.3333333333333333,0.5,0.5,0.2,0.5,0.3333333333333333,0.25,0.0,0.0,0.5,0.4,1.0,0.6,0.5,0.6666666666666666,0.4,0.5000000183883155,1.7027402478448157e+30,1,0.8959462788501606,0.8831116463997931,0.5000000323590892,9,0.7449534694193893,_.AAIGEATRL._,AAIGEATRL
1,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02-12304-AAVPRAAFL-2-2,-1,5831230,0,0,1,0,0,0,0,1,1,914.53379,1.000000038995633,0,2.533209730870567e-08,0.3,1.0,0,1,1.0,0,1,2.0,1.0,0,1,1,1.0,1,0,1,2.0,0,2,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,0.5,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,1.0,0.25,1.0,1.0,0.5,0.0,0.0,0.3333333333333333,0.5,0.0,0.6666666666666666,1.0000000136635356,-1.7027402478448157e+30,1,1.0,0.0,1.5000000246189773,9,0.5903344918223923,_.AAVPRAAFL._,AAVPRAAFL
2,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02-12398-AAYFGVYDTAK-2-3,-1,16059765,0,0,1,0,0,0,0,1,1,1204.5764,1.5000000993570155,0,1.0185683851915428e-07,0.3,1.0,0,1,1.0,0,1,2.0,1.0,0,1,1,1.0,1,0,1,2.0,0,2,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,0.5,0.25,0.0,0.0,0.5,0.3333333333333333,0.5,1.0,0.25,1.0,1.0,0.5,0.0,0.0,0.3333333333333333,0.5,0.0,0.6666666666666666,1.499999997500177,-1.7027402478448157e+30,0,1.0,0.0,2.500000056694346,11,0.5903344918223923,_.AAYFGVYDTAK._,AAYFGVYDTAK
Expand Down
9 changes: 7 additions & 2 deletions tests/unit_tests/test_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def test_get_modifications_carbamidomethylation(self):

def test_get_modifications_tmt_tag(self):
"""Test get_modifications."""
assert fragments._get_modifications("[UNIMOD:737]ABC[UNIMOD:4]") == ({0: 229.162932, 2: 57.02146}, 2, "ABC")
assert fragments._get_modifications("[UNIMOD:737]-ABC[UNIMOD:4]") == ({0: 229.162932, 2: 57.02146}, 2, "ABC")

def test_get_modifications_tmtpro_tag(self):
"""Test get_modifications."""
assert fragments._get_modifications("[UNIMOD:2016]ABC[UNIMOD:4]") == ({0: 304.207146, 2: 57.02146}, 2, "ABC")
assert fragments._get_modifications("[UNIMOD:2016]-ABC[UNIMOD:4]") == ({0: 304.207146, 2: 57.02146}, 2, "ABC")


class TestComputeMasses(unittest.TestCase):
Expand Down Expand Up @@ -63,6 +63,11 @@ def test_compute_peptide_masses(self):
seq = "SEQUENC[UNIMOD:4]E"
self.assertEqual(fragments.compute_peptide_mass(seq), 1045.2561516699998)

def test_compute_peptide_masses_tmtpro(self):
"""Test computation of peptide masses with valid input and tmt tag."""
seq = "[UNIMOD:737]-SEQUENC[UNIMOD:4]E"
self.assertEqual(fragments.compute_peptide_mass(seq), 1274.41908367)

def test_compute_peptide_masses_with_invalid_syntax(self):
"""Negative testing of comuptation of peptide mass with unsupported syntax of mod string."""
seq = "SEQUEM(Ox.)CE"
Expand Down
10 changes: 5 additions & 5 deletions tests/unit_tests/test_mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ def test_maxquant_to_internal_variable_dehydration_long(self):

def test_maxquant_to_internal_tmt(self):
"""Test maxquant_to_internal_tmt."""
fixed_mods = {"C": "C[UNIMOD:4]", "^_": "_[UNIMOD:737]", "K": "K[UNIMOD:737]"}
fixed_mods = {"C": "C[UNIMOD:4]", "^_": "_[UNIMOD:737]-", "K": "K[UNIMOD:737]"}
self.assertEqual(
mod.maxquant_to_internal(["_ABCDEFGHK_"], fixed_mods), ["[UNIMOD:737]ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]
mod.maxquant_to_internal(["_ABCDEFGHK_"], fixed_mods), ["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]
)

def test_maxquant_to_internal_silac(self):
Expand Down Expand Up @@ -79,13 +79,13 @@ class TestInternalTransformations(unittest.TestCase):

def test_internal_without_mods(self):
"""Test internal with mods to internal without_mods."""
self.assertEqual(mod.internal_without_mods(["[UNIMOD:737]ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]), ["ABCDEFGHK"])
self.assertEqual(mod.internal_without_mods(["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]), ["ABCDEFGHK"])

def test_internal_to_mod_masses(self):
"""Test internal with mods to internal without_mods."""
self.assertEqual(
mod.internal_to_mod_mass(["[UNIMOD:737]ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]),
["[+229.162932]ABC[+57.02146]DEFGHK[+229.162932]"],
mod.internal_to_mod_mass(["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"]),
["[+229.162932]-ABC[+57.02146]DEFGHK[+229.162932]"],
)

def test_proteomicsdb_to_internal(self):
Expand Down
11 changes: 3 additions & 8 deletions tests/unit_tests/test_percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,6 @@ def _create_noisy_logistic_data():
class TestPercolator:
"""Class to test percolator."""

def test_get_scannr(self):
"""Test get_scannr."""
np.testing.assert_equal(
perc.Percolator.get_scannr(("20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02", 7978)), 10203379
)

def test_get_specid(self):
"""Test get_specid."""
np.testing.assert_string_equal(
Expand Down Expand Up @@ -361,11 +355,12 @@ def test_calc(self):
percolator.metrics_val["SpecId"][0], "20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02-7978-AAIGEATRL-2-1"
)
np.testing.assert_equal(percolator.metrics_val["Label"][0], 1)
np.testing.assert_equal(percolator.metrics_val["ScanNr"][0], 10203379)
np.testing.assert_equal(percolator.metrics_val["ScanNr"][0], 7978)
np.testing.assert_equal(percolator.metrics_val["filename"][0], "20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02")
# np.testing.assert_almost_equal(percolator.metrics_val['ExpMass'][0], 900.50345678)
np.testing.assert_string_equal(percolator.metrics_val["Peptide"][0], "_.AAIGEATRL._")
np.testing.assert_string_equal(
percolator.metrics_val["Protein"][0], "AAIGEATRL"
percolator.metrics_val["Proteins"][0], "AAIGEATRL"
) # we don't need the protein ID to get PSM / peptide results

# features
Expand Down

0 comments on commit 9b90193

Please sign in to comment.