diff --git a/.cookietemple.yml b/.cookietemple.yml index 9e72d00..9100977 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu email: victor.giurcoiu@tum.de project_name: spectrum_fundamentals project_short_description: Fundamentals public repo -version: 0.7.2 +version: 0.7.3 license: MIT diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 744ea52..95b0d48 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.7.2 šŸŒˆ" # <> -tag-template: 0.7.2 # <> +name-template: "0.7.3 šŸŒˆ" # <> +tag-template: 0.7.3 # <> exclude-labels: - "skip-changelog" diff --git a/cookietemple.cfg b/cookietemple.cfg index 4bb82fa..2740c15 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.7.2 +current_version = 0.7.3 [bumpversion_files_whitelisted] init_file = spectrum_fundamentals/__init__.py diff --git a/docs/conf.py b/docs/conf.py index 68b8850..559b2f1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,9 +52,9 @@ # the built documents. # # The short X.Y version. -version = "0.7.2" +version = "0.7.3" # The full version, including alpha/beta/rc tags. -release = "0.7.2" +release = "0.7.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pyproject.toml b/pyproject.toml index e7fbe19..ed14b87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "spectrum_fundamentals" -version = "0.7.2" # <> +version = "0.7.3" # <> description = "Fundamental functions, annotation pipeline and constants for oktoberfest" authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" diff --git a/spectrum_fundamentals/__init__.py b/spectrum_fundamentals/__init__.py index ffa224b..a533669 100644 --- a/spectrum_fundamentals/__init__.py +++ b/spectrum_fundamentals/__init__.py @@ -2,7 +2,7 @@ __author__ = "Mario Picciani" __email__ = "mario.picciani@tum.de" -__version__ = "0.7.2" +__version__ = "0.7.3" import logging import logging.handlers diff --git a/spectrum_fundamentals/__main__.py b/spectrum_fundamentals/__main__.py index bc9579f..3825a50 100644 --- a/spectrum_fundamentals/__main__.py +++ b/spectrum_fundamentals/__main__.py @@ -5,7 +5,7 @@ @click.command() -@click.version_option(version="0.7.2", message=click.style("spectrum_fundamentals Version: 0.7.2")) +@click.version_option(version="0.7.3", message=click.style("spectrum_fundamentals Version: 0.7.3")) def main() -> None: """spectrum_fundamentals.""" diff --git a/spectrum_fundamentals/annotation/annotation.py b/spectrum_fundamentals/annotation/annotation.py index 8be5407..1d42d31 100644 --- a/spectrum_fundamentals/annotation/annotation.py +++ b/spectrum_fundamentals/annotation/annotation.py @@ -322,7 +322,7 @@ def generate_annotation_matrix( exp_mass_col = matched_peaks.columns.get_loc("exp_mass") for peak in matched_peaks.values: - ion_type_index = ion_types.index(peak[ion_type][0]) + ion_type_index = ion_types.index(peak[ion_type].split("-", 1)[0]) peak_pos = ((peak[no_col] - 1) * charge_const * len(ion_types)) + (peak[charge_col] - 1) + 3 * ion_type_index if peak_pos >= constants.VEC_LENGTH: diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index b31e0a2..a24f5b5 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -17,6 +17,8 @@ VEC_LENGTH_CMS2 = (SEQ_LEN - 1) * 2 * 3 * 2 # peptide of length 30 can have 29 b, y, b_short, y_short, b_long and y_long ions, each with charge 1+, 2+ and 3+ # we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1 + + ############# # ALPHABETS # ############# @@ -398,7 +400,7 @@ IONS = ["x", "y", "z", "zā—" "a", "b", "c"] HCD_IONS = ["y", "b"] -ETD_IONS = ["zā—", "c"] +ETD_IONS = ["z_r", "c"] ETCID_IONS = ["y", "z", "b", "c"] UVPD_IONS = ["x", "y", "z", "a", "b", "c"] @@ -428,3 +430,48 @@ class RescoreType(Enum): PROSIT = "prosit" ANDROMEDA = "andromeda" + + +############# +# ION TYPES # +############# +FORWARD_IONS = ["a", "b", "c"] +BACKWARDS_IONS = ["x", "y", "z", "z_r"] # +IONS = FORWARD_IONS + BACKWARDS_IONS + +FRAGMENTATION_TO_IONS_BY_PAIRS = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "UVPD": [ + BACKWARDS_IONS[0], + FORWARD_IONS[0], + BACKWARDS_IONS[1], + FORWARD_IONS[1], + BACKWARDS_IONS[2], + FORWARD_IONS[2], + ], # y,b,z,c,x,a +} + +FRAGMENTATION_TO_IONS_BY_DIRECTION = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a +} + +ION_DELTAS = { + "a": -ATOM_MASSES["O"] - ATOM_MASSES["C"], + "b": 0.0, + "c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"], + "x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"], + "y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"], + "z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"], + "z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"], +} diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index 44a4e40..8623aa5 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -97,16 +97,10 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]: :return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return ["y", "b"] - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return ["zā—", "c"] - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return ["y", "b", "z", "c"] - elif fragmentation_method == "UVPD": - return ["y", "b", "z", "c", "x", "a"] - else: + ions = c.FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> List[str]: @@ -120,17 +114,10 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis :return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return c.HCD_IONS - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return c.ETD_IONS - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return c.ETCID_IONS - elif fragmentation_method == "UVPD": - return c.UVPD_IONS - return ["x", "y", "z", "a", "b", "c"] - else: + ions = c.FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def get_ion_delta(ion_types: List[str]) -> np.ndarray: @@ -140,19 +127,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray: :param ion_types: type of ions for which mass should be calculated :return: numpy array with masses of the ions """ - ion_type_offsets = { - "a": -c.ATOM_MASSES["O"] - c.ATOM_MASSES["C"], - "b": 0.0, - "c": 3 * c.ATOM_MASSES["H"] + c.ATOM_MASSES["N"], - "x": 2 * c.ATOM_MASSES["O"] + c.ATOM_MASSES["C"], - "y": c.ATOM_MASSES["O"] + 2 * c.ATOM_MASSES["H"], - "z": c.ATOM_MASSES["O"] - c.ATOM_MASSES["N"] - c.ATOM_MASSES["H"], - "zā—": c.ATOM_MASSES["O"] - c.ATOM_MASSES["N"], - } - - deltas = np.array([ion_type_offsets[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) - - return deltas + return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) def initialize_peaks( diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index 47a839e..cbbe4f6 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -1,7 +1,7 @@ import difflib import re from itertools import combinations, repeat -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union import numpy as np import pandas as pd @@ -342,6 +342,15 @@ def split_modstring(sequence: str, r_pattern): return map(split_modstring, sequences, repeat(regex_pattern)) +def get_all_tokens(sequences: List[str]) -> Set[str]: + """Parse given sequences in UNIMOD ProForma standard into a set of all tokens.""" + pattern = r"[ACDEFGHIKLMNPQRSTVWY](\[UNIMOD:\d+\])?" + tokens = set() + for seq in sequences: + tokens |= {match.group() for match in re.finditer(pattern, seq)} + return tokens + + def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]): """ Generate different peptide sequences with moving the modification to all possible residues. diff --git a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json index 1139aca..e19ebd9 100644 --- a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json +++ b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json @@ -8,7 +8,7 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 1, "charge": 3, "mass": 44.68542100033333, @@ -24,7 +24,7 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 1, "charge": 2, "mass": 66.524493267, @@ -40,7 +40,7 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 2, "charge": 3, "mass": 83.02773533366667, @@ -64,7 +64,7 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 3, "charge": 3, "mass": 120.72242333366667, @@ -80,7 +80,7 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 2, "charge": 2, "mass": 124.037964767, @@ -88,7 +88,7 @@ "max_mass": 124.04044552629534 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 1, "charge": 1, "mass": 132.041710067, @@ -104,7 +104,7 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 4, "charge": 3, "mass": 154.40498300033335, @@ -120,7 +120,7 @@ "max_mass": 171.0980334117304 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 3, "charge": 2, "mass": 180.579996767, @@ -136,7 +136,7 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 5, "charge": 3, "mass": 186.75590433366668, @@ -160,7 +160,7 @@ "max_mass": 224.12287753657 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 6, "charge": 3, "mass": 229.77010200033337, @@ -168,7 +168,7 @@ "max_mass": 229.77469740237336 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 4, "charge": 2, "mass": 231.10383626700002, @@ -184,7 +184,7 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 2, "charge": 1, "mass": 247.068653067, @@ -200,7 +200,7 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 5, "charge": 2, "mass": 279.63021826700003, @@ -224,7 +224,7 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 6, "charge": 2, "mass": 344.15151476700004, @@ -232,7 +232,7 @@ "max_mass": 344.1583977972954 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 3, "charge": 1, "mass": 360.152717067, @@ -248,7 +248,7 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 4, "charge": 1, "mass": 461.20039606700004, @@ -264,7 +264,7 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 5, "charge": 1, "mass": 558.2531600670001, @@ -280,7 +280,7 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "zā—", + "ion_type": "z_r", "no": 6, "charge": 1, "mass": 687.2957530670001, diff --git a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json index 38e58ea..bc48f11 100644 --- a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json +++ b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json @@ -16,12 +16,12 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 1, "charge": 3, - "mass": 44.349479322, - "min_mass": 44.34859233241356, - "max_mass": 44.35036631158644 + "mass": 44.68542100033333, + "min_mass": 44.68452729191332, + "max_mass": 44.686314708753336 }, { "ion_type": "b", @@ -48,12 +48,12 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 1, "charge": 2, - "mass": 66.0205807495, - "min_mass": 66.01926033788502, - "max_mass": 66.02190116111498 + "mass": 66.524493267, + "min_mass": 66.52316277713466, + "max_mass": 66.52582375686534 }, { "ion_type": "y", @@ -80,12 +80,12 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 2, "charge": 3, - "mass": 82.69179365533334, - "min_mass": 82.69013981946024, - "max_mass": 82.69344749120644 + "mass": 83.02773533366667, + "min_mass": 83.02607477896, + "max_mass": 83.02939588837334 }, { "ion_type": "y", @@ -136,12 +136,12 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 3, "charge": 3, - "mass": 120.38648165533334, - "min_mass": 120.38407392570024, - "max_mass": 120.38888938496645 + "mass": 120.72242333366667, + "min_mass": 120.7200088852, + "max_mass": 120.72483778213333 }, { "ion_type": "c", @@ -152,12 +152,12 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 2, "charge": 2, - "mass": 123.53405224950001, - "min_mass": 123.53158156845501, - "max_mass": 123.536522930545 + "mass": 124.037964767, + "min_mass": 124.03548400770467, + "max_mass": 124.04044552629534 }, { "ion_type": "y", @@ -168,12 +168,12 @@ "max_mass": 126.06451926362715 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 1, "charge": 1, - "mass": 131.033885032, - "min_mass": 131.03126435429937, - "max_mass": 131.03650570970063 + "mass": 132.041710067, + "min_mass": 132.03906923279865, + "max_mass": 132.04435090120134 }, { "ion_type": "y", @@ -208,12 +208,12 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 4, "charge": 3, - "mass": 154.069041322, - "min_mass": 154.06595994117356, - "max_mass": 154.07212270282645 + "mass": 154.40498300033335, + "min_mass": 154.40189490067334, + "max_mass": 154.40807109999335 }, { "ion_type": "y", @@ -239,14 +239,6 @@ "min_mass": 171.0911896272696, "max_mass": 171.0980334117304 }, - { - "ion_type": "z", - "no": 3, - "charge": 2, - "mass": 180.0760842495, - "min_mass": 180.07248272781501, - "max_mass": 180.079685771185 - }, { "ion_type": "b", "no": 5, @@ -255,6 +247,14 @@ "min_mass": 180.09696245571067, "max_mass": 180.10416647828933 }, + { + "ion_type": "z_r", + "no": 3, + "charge": 2, + "mass": 180.579996767, + "min_mass": 180.57638516706467, + "max_mass": 180.58360836693535 + }, { "ion_type": "c", "no": 5, @@ -264,12 +264,12 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 5, "charge": 3, - "mass": 186.41996265533336, - "min_mass": 186.41623425608026, - "max_mass": 186.42369105458647 + "mass": 186.75590433366668, + "min_mass": 186.75216921558, + "max_mass": 186.75963945175334 }, { "ion_type": "y", @@ -328,20 +328,20 @@ "max_mass": 227.10717551966934 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 6, "charge": 3, - "mass": 229.43416032200003, - "min_mass": 229.4295716387936, - "max_mass": 229.43874900520646 + "mass": 229.77010200033337, + "min_mass": 229.76550659829337, + "max_mass": 229.77469740237336 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 4, "charge": 2, - "mass": 230.59992374950002, - "min_mass": 230.59531175102504, - "max_mass": 230.604535747975 + "mass": 231.10383626700002, + "min_mass": 231.0992141902747, + "max_mass": 231.10845834372535 }, { "ion_type": "y", @@ -368,12 +368,12 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 2, "charge": 1, - "mass": 246.06082803200002, - "min_mass": 246.05590681543939, - "max_mass": 246.06574924856065 + "mass": 247.068653067, + "min_mass": 247.06371169393867, + "max_mass": 247.07359444006136 }, { "ion_type": "y", @@ -400,12 +400,12 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 5, "charge": 2, - "mass": 279.12630574950003, - "min_mass": 279.120723223385, - "max_mass": 279.13188827561504 + "mass": 279.63021826700003, + "min_mass": 279.6246256626347, + "max_mass": 279.6358108713654 }, { "ion_type": "y", @@ -448,12 +448,12 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 6, "charge": 2, - "mass": 343.64760224950004, - "min_mass": 343.64072929745504, - "max_mass": 343.65447520154504 + "mass": 344.15151476700004, + "min_mass": 344.1446317367047, + "max_mass": 344.1583977972954 }, { "ion_type": "y", @@ -464,12 +464,12 @@ "max_mass": 352.16792001953604 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 3, "charge": 1, - "mass": 359.14489203200003, - "min_mass": 359.13770913415937, - "max_mass": 359.1520749298407 + "mass": 360.152717067, + "min_mass": 360.1455140126587, + "max_mass": 360.1599201213414 }, { "ion_type": "y", @@ -496,12 +496,12 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 4, "charge": 1, - "mass": 460.19257103200005, - "min_mass": 460.1833671805794, - "max_mass": 460.2017748834207 + "mass": 461.20039606700004, + "min_mass": 461.1911720590787, + "max_mass": 461.20962007492136 }, { "ion_type": "y", @@ -528,12 +528,12 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 5, "charge": 1, - "mass": 557.2453350320001, - "min_mass": 557.2341901252994, - "max_mass": 557.2564799387007 + "mass": 558.2531600670001, + "min_mass": 558.2419950037987, + "max_mass": 558.2643251302014 }, { "ion_type": "y", @@ -560,12 +560,12 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z", + "ion_type": "z_r", "no": 6, "charge": 1, - "mass": 686.2879280320001, - "min_mass": 686.2742022734394, - "max_mass": 686.3016537905607 + "mass": 687.2957530670001, + "min_mass": 687.2820071519387, + "max_mass": 687.3094989820614 }, { "ion_type": "y", diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index d1cbf9b..6b5f095 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -107,15 +107,15 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types("ETD") == ["zā—", "c"] + assert fragments.retrieve_ion_types("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z", "c"] + assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z_r", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" - assert fragments.retrieve_ion_types("uvpd") == ["y", "b", "z", "c", "x", "a"] + assert fragments.retrieve_ion_types("uvpd") == ["x", "a", "y", "b", "z", "c"] def test_invalid_fragmentation_method(self): """Test if error is raised for invalid fragmentation method.""" @@ -131,11 +131,11 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["zā—", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z", "b", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z_r", "b", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" diff --git a/tests/unit_tests/test_mod_string.py b/tests/unit_tests/test_mod_string.py index cee6c5c..d03c736 100644 --- a/tests/unit_tests/test_mod_string.py +++ b/tests/unit_tests/test_mod_string.py @@ -309,6 +309,13 @@ def test_parse_modstrings_invalid_with_filtering(self): invalid_seq = "testing" self.assertEqual(next(mod.parse_modstrings([invalid_seq], alphabet=c.ALPHABET, filter=True)), [0]) + def test_get_all_tokens(self): + """Test parsing of any UNIMOD sequence into tokens.""" + seqs = ["ACKC[UNIMOD:4]AD", "PEPTIDE", "PEM[UNIMOD:35]"] + + result = mod.get_all_tokens(seqs) + self.assertEqual(result, {"A", "C", "C[UNIMOD:4]", "D", "E", "I", "K", "M[UNIMOD:35]", "P", "T"}) + class TestCustomToInternal(unittest.TestCase): """Class to test custom to internal."""