Skip to content

Commit

Permalink
Merge branch 'development' into feature/add_new_iontypes
Browse files Browse the repository at this point in the history
  • Loading branch information
picciama committed Aug 10, 2024
2 parents 30f9071 + 64aae2d commit 271d68b
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 143 deletions.
2 changes: 1 addition & 1 deletion .cookietemple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu
email: [email protected]
project_name: spectrum_fundamentals
project_short_description: Fundamentals public repo
version: 0.7.2
version: 0.7.3
license: MIT
4 changes: 2 additions & 2 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name-template: "0.7.2 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.7.2 # <<COOKIETEMPLE_FORCE_BUMP>>
name-template: "0.7.3 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.7.3 # <<COOKIETEMPLE_FORCE_BUMP>>
exclude-labels:
- "skip-changelog"

Expand Down
2 changes: 1 addition & 1 deletion cookietemple.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.7.2
current_version = 0.7.3

[bumpversion_files_whitelisted]
init_file = spectrum_fundamentals/__init__.py
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
# the built documents.
#
# The short X.Y version.
version = "0.7.2"
version = "0.7.3"
# The full version, including alpha/beta/rc tags.
release = "0.7.2"
release = "0.7.3"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "spectrum_fundamentals"
version = "0.7.2" # <<COOKIETEMPLE_FORCE_BUMP>>
version = "0.7.3" # <<COOKIETEMPLE_FORCE_BUMP>>
description = "Fundamental functions, annotation pipeline and constants for oktoberfest"
authors = ["Wilhelmlab at Technical University of Munich"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = "Mario Picciani"
__email__ = "[email protected]"
__version__ = "0.7.2"
__version__ = "0.7.3"

import logging
import logging.handlers
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@click.command()
@click.version_option(version="0.7.2", message=click.style("spectrum_fundamentals Version: 0.7.2"))
@click.version_option(version="0.7.3", message=click.style("spectrum_fundamentals Version: 0.7.3"))
def main() -> None:
"""spectrum_fundamentals."""

Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def generate_annotation_matrix(
exp_mass_col = matched_peaks.columns.get_loc("exp_mass")

for peak in matched_peaks.values:
ion_type_index = ion_types.index(peak[ion_type][0])
ion_type_index = ion_types.index(peak[ion_type].split("-", 1)[0])
peak_pos = ((peak[no_col] - 1) * charge_const * len(ion_types)) + (peak[charge_col] - 1) + 3 * ion_type_index

if peak_pos >= constants.VEC_LENGTH:
Expand Down
49 changes: 48 additions & 1 deletion spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
VEC_LENGTH_CMS2 = (SEQ_LEN - 1) * 2 * 3 * 2
# peptide of length 30 can have 29 b, y, b_short, y_short, b_long and y_long ions, each with charge 1+, 2+ and 3+
# we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1


#############
# ALPHABETS #
#############
Expand Down Expand Up @@ -398,7 +400,7 @@

IONS = ["x", "y", "z", "z●" "a", "b", "c"]
HCD_IONS = ["y", "b"]
ETD_IONS = ["z●", "c"]
ETD_IONS = ["z_r", "c"]
ETCID_IONS = ["y", "z", "b", "c"]
UVPD_IONS = ["x", "y", "z", "a", "b", "c"]

Expand Down Expand Up @@ -428,3 +430,48 @@ class RescoreType(Enum):

PROSIT = "prosit"
ANDROMEDA = "andromeda"


#############
# ION TYPES #
#############
FORWARD_IONS = ["a", "b", "c"]
BACKWARDS_IONS = ["x", "y", "z", "z_r"] #
IONS = FORWARD_IONS + BACKWARDS_IONS

FRAGMENTATION_TO_IONS_BY_PAIRS = {
"HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c
"ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c
"UVPD": [
BACKWARDS_IONS[0],
FORWARD_IONS[0],
BACKWARDS_IONS[1],
FORWARD_IONS[1],
BACKWARDS_IONS[2],
FORWARD_IONS[2],
], # y,b,z,c,x,a
}

FRAGMENTATION_TO_IONS_BY_DIRECTION = {
"HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c
"ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c
"UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a
}

ION_DELTAS = {
"a": -ATOM_MASSES["O"] - ATOM_MASSES["C"],
"b": 0.0,
"c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"],
"x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"],
"y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"],
"z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"],
"z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"],
}
39 changes: 7 additions & 32 deletions spectrum_fundamentals/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,10 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]:
:return: list of possible ion types
"""
fragmentation_method = fragmentation_method.upper()
if fragmentation_method == "HCD" or fragmentation_method == "CID":
return ["y", "b"]
elif fragmentation_method == "ETD" or fragmentation_method == "ECD":
return ["z●", "c"]
elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD":
return ["y", "b", "z", "c"]
elif fragmentation_method == "UVPD":
return ["y", "b", "z", "c", "x", "a"]
else:
ions = c.FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, [])
if not ions:
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
return ions


def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> List[str]:
Expand All @@ -120,17 +114,10 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis
:return: list of possible ion types
"""
fragmentation_method = fragmentation_method.upper()
if fragmentation_method == "HCD" or fragmentation_method == "CID":
return c.HCD_IONS
elif fragmentation_method == "ETD" or fragmentation_method == "ECD":
return c.ETD_IONS
elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD":
return c.ETCID_IONS
elif fragmentation_method == "UVPD":
return c.UVPD_IONS
return ["x", "y", "z", "a", "b", "c"]
else:
ions = c.FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, [])
if not ions:
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
return ions


def get_ion_delta(ion_types: List[str]) -> np.ndarray:
Expand All @@ -140,19 +127,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray:
:param ion_types: type of ions for which mass should be calculated
:return: numpy array with masses of the ions
"""
ion_type_offsets = {
"a": -c.ATOM_MASSES["O"] - c.ATOM_MASSES["C"],
"b": 0.0,
"c": 3 * c.ATOM_MASSES["H"] + c.ATOM_MASSES["N"],
"x": 2 * c.ATOM_MASSES["O"] + c.ATOM_MASSES["C"],
"y": c.ATOM_MASSES["O"] + 2 * c.ATOM_MASSES["H"],
"z": c.ATOM_MASSES["O"] - c.ATOM_MASSES["N"] - c.ATOM_MASSES["H"],
"z●": c.ATOM_MASSES["O"] - c.ATOM_MASSES["N"],
}

deltas = np.array([ion_type_offsets[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)

return deltas
return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)


def initialize_peaks(
Expand Down
11 changes: 10 additions & 1 deletion spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import difflib
import re
from itertools import combinations, repeat
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Set, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -342,6 +342,15 @@ def split_modstring(sequence: str, r_pattern):
return map(split_modstring, sequences, repeat(regex_pattern))


def get_all_tokens(sequences: List[str]) -> Set[str]:
"""Parse given sequences in UNIMOD ProForma standard into a set of all tokens."""
pattern = r"[ACDEFGHIKLMNPQRSTVWY](\[UNIMOD:\d+\])?"
tokens = set()
for seq in sequences:
tokens |= {match.group() for match in re.finditer(pattern, seq)}
return tokens


def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]):
"""
Generate different peptide sequences with moving the modification to all possible residues.
Expand Down
36 changes: 18 additions & 18 deletions tests/unit_tests/data/fragments_meta_data_etd_ecd.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"max_mass": 39.034494842950046
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 1,
"charge": 3,
"mass": 44.68542100033333,
Expand All @@ -24,7 +24,7 @@
"max_mass": 58.048093958160386
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 1,
"charge": 2,
"mass": 66.524493267,
Expand All @@ -40,7 +40,7 @@
"max_mass": 82.04955279357003
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 2,
"charge": 3,
"mass": 83.02773533366667,
Expand All @@ -64,7 +64,7 @@
"max_mass": 115.08889130379143
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 3,
"charge": 3,
"mass": 120.72242333366667,
Expand All @@ -80,15 +80,15 @@
"max_mass": 122.57068088409038
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 2,
"charge": 2,
"mass": 124.037964767,
"min_mass": 124.03548400770467,
"max_mass": 124.04044552629534
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 1,
"charge": 1,
"mass": 132.041710067,
Expand All @@ -104,7 +104,7 @@
"max_mass": 148.08435446319004
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 4,
"charge": 3,
"mass": 154.40498300033335,
Expand All @@ -120,7 +120,7 @@
"max_mass": 171.0980334117304
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 3,
"charge": 2,
"mass": 180.579996767,
Expand All @@ -136,7 +136,7 @@
"max_mass": 185.77979635695002
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 5,
"charge": 3,
"mass": 186.75590433366668,
Expand All @@ -160,15 +160,15 @@
"max_mass": 224.12287753657
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 6,
"charge": 3,
"mass": 229.77010200033337,
"min_mass": 229.76550659829337,
"max_mass": 229.77469740237336
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 4,
"charge": 2,
"mass": 231.10383626700002,
Expand All @@ -184,7 +184,7 @@
"max_mass": 244.13406515565143
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 2,
"charge": 1,
"mass": 247.068653067,
Expand All @@ -200,7 +200,7 @@
"max_mass": 278.1660462291604
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 5,
"charge": 2,
"mass": 279.63021826700003,
Expand All @@ -224,15 +224,15 @@
"max_mass": 341.18877021093147
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 6,
"charge": 2,
"mass": 344.15151476700004,
"min_mass": 344.1446317367047,
"max_mass": 344.1583977972954
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 3,
"charge": 1,
"mass": 360.152717067,
Expand All @@ -248,7 +248,7 @@
"max_mass": 442.23847016451145
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 4,
"charge": 1,
"mass": 461.20039606700004,
Expand All @@ -264,7 +264,7 @@
"max_mass": 555.3247958457914
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 5,
"charge": 1,
"mass": 558.2531600670001,
Expand All @@ -280,7 +280,7 @@
"max_mass": 670.3540393846513
},
{
"ion_type": "z●",
"ion_type": "z_r",
"no": 6,
"charge": 1,
"mass": 687.2957530670001,
Expand Down
Loading

0 comments on commit 271d68b

Please sign in to comment.