Skip to content

Commit

Permalink
Merge pull request #112 from wilhelm-lab/feature/add_new_iontypes
Browse files Browse the repository at this point in the history
Feature/add new iontypes
  • Loading branch information
picciama authored Jul 27, 2024
2 parents 664285a + 7139560 commit 5fb9de2
Show file tree
Hide file tree
Showing 21 changed files with 67,477 additions and 68,539 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
matrix:
os: [macos-latest, ubuntu-latest, windows-latest]
python: [3.8, 3.9]
python: ["3.9", "3.10"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main_master_branch_protection.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: Set up Python
uses: actions/[email protected]
with:
python-version: "3.8"
python-version: "3.9"
# PRs to the repository master branch are only ok if coming from any patch or release branch
- name: Install mlf-core
run: pip install mlf-core
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Setup Python
uses: actions/[email protected]
with:
python-version: 3.8
python-version: 3.9

- name: Install pip
run: |
Expand Down
22 changes: 11 additions & 11 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,47 +19,47 @@ jobs:
matrix:
include:
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "pre-commit",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "safety",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "mypy",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "tests",
}
- {
python-version: 3.8,
python-version: 3.9,
os: windows-latest,
session: "tests",
}
- {
python-version: 3.8,
python-version: 3.9,
os: macos-latest,
session: "tests",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "typeguard",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "xdoctest",
}
- {
python-version: 3.8,
python-version: 3.9,
os: ubuntu-latest,
session: "docs-build",
}
Expand Down Expand Up @@ -136,10 +136,10 @@ jobs:
- name: Check out the repository
uses: actions/checkout@v4

- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/[email protected]
with:
python-version: 3.8
python-version: 3.9

- name: Install Poetry
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sync_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Setup Python
uses: actions/[email protected]
with:
python-version: 3.8
python-version: 3.9

- name: Install cookietemple
run: pip install cookietemple
Expand Down
2 changes: 1 addition & 1 deletion docs/modules.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
spectrum_fundamentals
=================
=====================

.. toctree::
:maxdepth: 4
Expand Down
3 changes: 2 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
sys.exit(1)

package = "spectrum_fundamentals"
python_versions = ["3.8", "3.9"]
python_versions = ["3.9", "3.10"]
nox.options.sessions = (
"pre-commit",
"safety",
"mypy",
"tests",
"typeguard",
"xdoctest",
"docs-build",
)
Expand Down
1,340 changes: 768 additions & 572 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ packages = [
{ include = "spectrum_fundamentals" },
]
classifiers = [
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
]


[tool.poetry.dependencies]
python = ">=3.8.0,<3.11.0"
python = ">=3.9.0,<3.11.0"
click = ">=8.0.0"
rich = ">=10.3.0"
PyYAML = ">=5.4.1"
Expand Down
71 changes: 45 additions & 26 deletions spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd

from spectrum_fundamentals import constants
from spectrum_fundamentals.fragments import initialize_peaks, initialize_peaks_xl
from spectrum_fundamentals.fragments import initialize_peaks, initialize_peaks_xl, retrieve_ion_types

logger = logging.getLogger(__name__)

Expand All @@ -31,11 +31,10 @@ def match_peaks(
"""
start_peak = 0
no_of_peaks = len(peaks_intensity)
max_intensity = 1.0
max_intensity = 0.0
row_list = []
temp_list = []
next_start_peak = 0
seq_len = len(unmod_sequence)
matched_peak = False
fragment_no: float
for fragment in fragments_meta_data:
Expand All @@ -55,7 +54,7 @@ def match_peaks(
start_peak += 1
continue
if (
not (fragment["ion_type"] == "b" and fragment_no == 1)
not (fragment["ion_type"][0] == "b" and fragment_no == 1)
or (unmod_sequence[0] == "R" or unmod_sequence[0] == "H" or unmod_sequence[0] == "K")
and (tmt_n_term == 1)
):
Expand All @@ -69,7 +68,7 @@ def match_peaks(
"intensity": peak_intensity,
}
)
if peak_intensity > max_intensity and fragment_no < seq_len:
if peak_intensity > max_intensity:
max_intensity = float(peak_intensity)
matched_peak = True
next_start_peak = start_peak
Expand Down Expand Up @@ -119,7 +118,10 @@ def handle_multiple_matches(


def annotate_spectra(
un_annot_spectra: pd.DataFrame, mass_tolerance: Optional[float] = None, unit_mass_tolerance: Optional[str] = None
un_annot_spectra: pd.DataFrame,
mass_tolerance: Optional[float] = None,
unit_mass_tolerance: Optional[str] = None,
fragmentation_method: str = "HCD",
) -> pd.DataFrame:
"""
Annotate a set of spectra.
Expand All @@ -138,12 +140,13 @@ def annotate_spectra(
:param un_annot_spectra: a Pandas DataFrame containing the raw peaks and metadata to be annotated
:param mass_tolerance: mass tolerance to calculate min and max mass
:param unit_mass_tolerance: unit for the mass tolerance (da or ppm)
:param fragmentation_method: fragmentation method that was used
:return: a Pandas DataFrame containing the annotated spectra with meta data
"""
raw_file_annotations = []
index_columns = {col: un_annot_spectra.columns.get_loc(col) for col in un_annot_spectra.columns}
for row in un_annot_spectra.values:
results = parallel_annotate(row, index_columns, mass_tolerance, unit_mass_tolerance)
results = parallel_annotate(row, index_columns, mass_tolerance, unit_mass_tolerance, fragmentation_method)
if not results:
continue
raw_file_annotations.append(results)
Expand Down Expand Up @@ -269,24 +272,29 @@ def generate_annotation_matrix_xl(


def generate_annotation_matrix(
matched_peaks: pd.DataFrame, unmod_seq: str, charge: int
matched_peaks: pd.DataFrame, unmod_seq: str, charge: int, fragmentation_method: str = "HCD"
) -> Tuple[np.ndarray, np.ndarray]:
"""
Generate the annotation matrix in the prosit format from matched peaks.
:param matched_peaks: matched peaks needed to be converted
:param unmod_seq: Un modified peptide sequence
:param charge: Precursor charge
:param fragmentation_method: fragmentation method that was used
:return: numpy array of intensities and numpy array of masses
"""
intensity = np.full(constants.VEC_LENGTH, -1.0)
mass = np.full(constants.VEC_LENGTH, -1.0)
ion_types = retrieve_ion_types(fragmentation_method)
charge_const = 3
vec_length = (constants.SEQ_LEN - 1) * charge_const * len(ion_types)

intensity = np.full(vec_length, -1.0)
mass = np.full(vec_length, -1.0)

# change values to zeros
if len(unmod_seq) < constants.SEQ_LEN:
peaks_range = range(0, ((len(unmod_seq) - 1) * 6))
peaks_range = range(0, ((len(unmod_seq) - 1) * charge_const * len(ion_types)))
else:
peaks_range = range(0, ((constants.SEQ_LEN - 1) * 6))
peaks_range = range(0, ((constants.SEQ_LEN - 1) * charge_const * len(ion_types)))

if charge == 1:
available_peaks = [index for index in peaks_range if (index % 3 == 0)]
Expand All @@ -305,18 +313,18 @@ def generate_annotation_matrix(
exp_mass_col = matched_peaks.columns.get_loc("exp_mass")

for peak in matched_peaks.values:
if peak[ion_type].startswith("y"):
peak_pos = ((peak[no_col] - 1) * 6) + (peak[charge_col] - 1)
else:
peak_pos = ((peak[no_col] - 1) * 6) + (peak[charge_col] - 1) + 3
ion_type_index = ion_types.index(peak[ion_type][0])
peak_pos = ((peak[no_col] - 1) * charge_const * len(ion_types)) + (peak[charge_col] - 1) + 3 * ion_type_index

if peak_pos >= constants.VEC_LENGTH:
continue
intensity[peak_pos] = peak[intensity_col]
mass[peak_pos] = peak[exp_mass_col]

if len(unmod_seq) < constants.SEQ_LEN:
mask_peaks = range((len(unmod_seq) - 1) * 6, ((len(unmod_seq) - 1) * 6) + 6)
mask_peaks = range(
(len(unmod_seq) - 1) * charge_const * len(ion_types), ((len(unmod_seq)) * charge_const * len(ion_types))
)
intensity[mask_peaks] = -1.0
mass[mask_peaks] = -1.0

Expand All @@ -328,6 +336,7 @@ def parallel_annotate(
index_columns: Dict[str, int],
mass_tolerance: Optional[float] = None,
unit_mass_tolerance: Optional[str] = None,
fragmentation_method: str = "HCD",
) -> Optional[
Union[
Tuple[np.ndarray, np.ndarray, float, int],
Expand All @@ -348,14 +357,17 @@ def parallel_annotate(
:param index_columns: a dictionary that contains the index columns of the spectrum
:param mass_tolerance: mass tolerance to calculate min and max mass
:param unit_mass_tolerance: unit for the mass tolerance (da or ppm)
:param fragmentation_method: fragmentation method that was used
:return: a tuple containing intensity values (np.ndarray), masses (np.ndarray), calculated mass (float),
and any removed peaks (List[str])
"""
xl_type_col = index_columns.get("CROSSLINKER_TYPE")
if xl_type_col is None:
if spectrum[index_columns["PEPTIDE_LENGTH"]] > 30: # this was in initialize peaks but can be checked prior
return None
return _annotate_linear_spectrum(spectrum, index_columns, mass_tolerance, unit_mass_tolerance)
return _annotate_linear_spectrum(
spectrum, index_columns, mass_tolerance, unit_mass_tolerance, fragmentation_method
)

if (spectrum[index_columns["PEPTIDE_LENGTH_A"]] > 30) or (spectrum[index_columns["PEPTIDE_LENGTH_B"]] > 30):
return None
Expand All @@ -369,6 +381,7 @@ def _annotate_linear_spectrum(
index_columns: Dict[str, int],
mass_tolerance: Optional[float],
unit_mass_tolerance: Optional[str],
fragmentation_method: str = "HCD",
):
"""
Annotate a linear peptide spectrum.
Expand All @@ -377,17 +390,19 @@ def _annotate_linear_spectrum(
:param index_columns: Index columns of the spectrum
:param mass_tolerance: Mass tolerance for calculating min and max mass
:param unit_mass_tolerance: Unit for the mass tolerance (da or ppm)
:param fragmentation_method: fragmentation method that was used
:return: Annotated spectrum
"""
mod_seq_column = "MODIFIED_SEQUENCE"
if "MODIFIED_SEQUENCE_MSA" in index_columns:
mod_seq_column = "MODIFIED_SEQUENCE_MSA"
fragments_meta_data, tmt_n_term, unmod_sequence, calc_mass = initialize_peaks(
spectrum[index_columns[mod_seq_column]],
spectrum[index_columns["MASS_ANALYZER"]],
spectrum[index_columns["PRECURSOR_CHARGE"]],
mass_tolerance,
unit_mass_tolerance,
sequence=spectrum[index_columns[mod_seq_column]],
mass_analyzer=spectrum[index_columns["MASS_ANALYZER"]],
charge=spectrum[index_columns["PRECURSOR_CHARGE"]],
mass_tolerance=mass_tolerance,
unit_mass_tolerance=unit_mass_tolerance,
fragmentation_method=fragmentation_method,
)
matched_peaks = match_peaks(
fragments_meta_data,
Expand All @@ -398,14 +413,18 @@ def _annotate_linear_spectrum(
spectrum[index_columns["PRECURSOR_CHARGE"]],
)

ion_types = retrieve_ion_types(fragmentation_method)
charge_const = 3
vec_length = (constants.SEQ_LEN - 1) * charge_const * len(ion_types)

if len(matched_peaks) == 0:
intensity = np.full(174, 0.0)
mass = np.full(174, 0.0)
intensity = np.full(vec_length, 0.0)
mass = np.full(vec_length, 0.0)
return intensity, mass, calc_mass, 0

matched_peaks, removed_peaks = handle_multiple_matches(matched_peaks)
intensities, mass = generate_annotation_matrix(
matched_peaks, unmod_sequence, spectrum[index_columns["PRECURSOR_CHARGE"]]
matched_peaks, unmod_sequence, spectrum[index_columns["PRECURSOR_CHARGE"]], fragmentation_method
)
return intensities, mass, calc_mass, removed_peaks

Expand Down
Loading

0 comments on commit 5fb9de2

Please sign in to comment.