From 839b5142872a26aaac5d5470052da4be280f9de5 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 17:16:45 +0200 Subject: [PATCH 1/4] =?UTF-8?q?simplify=20code=20and=20add=20z=E2=97=8F=20?= =?UTF-8?q?to=20ethcd/etcid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spectrum_fundamentals/constants.py | 36 +++++ spectrum_fundamentals/fragments.py | 33 ++-- .../data/fragments_meta_data_ethcd_etcid.json | 152 +++++++++--------- tests/unit_tests/test_fragments.py | 6 +- 4 files changed, 129 insertions(+), 98 deletions(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index bc72e3f..b5c7ec1 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -17,6 +17,42 @@ VEC_LENGTH_CMS2 = (SEQ_LEN - 1) * 2 * 3 * 2 # peptide of length 30 can have 29 b, y, b_short, y_short, b_long and y_long ions, each with charge 1+, 2+ and 3+ # we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1 + + +############# +# ION TYPES # +############# +FORWARD_IONS = ["a", "b", "c"] +BACKWARDS_IONS = ["x", "y", "z", "z●"] +IONS = FORWARD_IONS + BACKWARDS_IONS + +FRAGMENTATION_TO_IONS_BY_PAIRS = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c + "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c + "UVPD": [ + BACKWARDS_IONS[0], + FORWARD_IONS[0], + BACKWARDS_IONS[1], + FORWARD_IONS[1], + BACKWARDS_IONS[2], + FORWARD_IONS[2], + ], # y,b,z,c,x,a +} + +FRAGMENTATION_TO_IONS_BY_DIRECTION = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c + "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c + "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a +} + ############# # ALPHABETS # ############# diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index f91a943..524251e 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -6,7 +6,14 @@ import numpy as np import pandas as pd -from .constants import AA_MASSES, ATOM_MASSES, MOD_MASSES, PARTICLE_MASSES +from .constants import ( + AA_MASSES, + ATOM_MASSES, + FRAGMENTATION_TO_IONS_BY_DIRECTION, + FRAGMENTATION_TO_IONS_BY_PAIRS, + MOD_MASSES, + PARTICLE_MASSES, +) from .mod_string import internal_without_mods logger = logging.getLogger(__name__) @@ -95,16 +102,10 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]: : return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return ["y", "b"] - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return ["z●", "c"] - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return ["y", "b", "z", "c"] - elif fragmentation_method == "UVPD": - return ["y", "b", "z", "c", "x", "a"] - else: + ions = FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> List[str]: @@ -118,16 +119,10 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis : return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return ["y", "b"] - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return ["z●", "c"] - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return ["y", "z", "b", "c"] - elif fragmentation_method == "UVPD": - return ["x", "y", "z", "a", "b", "c"] - else: + ions = FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def get_ion_delta(ion_types: List[str]) -> np.ndarray: diff --git a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json index 38e58ea..de130c0 100644 --- a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json +++ b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json @@ -16,12 +16,12 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 3, - "mass": 44.349479322, - "min_mass": 44.34859233241356, - "max_mass": 44.35036631158644 + "mass": 44.68542100033333, + "min_mass": 44.68452729191332, + "max_mass": 44.686314708753336 }, { "ion_type": "b", @@ -48,12 +48,12 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 2, - "mass": 66.0205807495, - "min_mass": 66.01926033788502, - "max_mass": 66.02190116111498 + "mass": 66.524493267, + "min_mass": 66.52316277713466, + "max_mass": 66.52582375686534 }, { "ion_type": "y", @@ -80,12 +80,12 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 3, - "mass": 82.69179365533334, - "min_mass": 82.69013981946024, - "max_mass": 82.69344749120644 + "mass": 83.02773533366667, + "min_mass": 83.02607477896, + "max_mass": 83.02939588837334 }, { "ion_type": "y", @@ -136,12 +136,12 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z", + "ion_type": "z●", "no": 3, "charge": 3, - "mass": 120.38648165533334, - "min_mass": 120.38407392570024, - "max_mass": 120.38888938496645 + "mass": 120.72242333366667, + "min_mass": 120.7200088852, + "max_mass": 120.72483778213333 }, { "ion_type": "c", @@ -152,12 +152,12 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 2, - "mass": 123.53405224950001, - "min_mass": 123.53158156845501, - "max_mass": 123.536522930545 + "mass": 124.037964767, + "min_mass": 124.03548400770467, + "max_mass": 124.04044552629534 }, { "ion_type": "y", @@ -168,12 +168,12 @@ "max_mass": 126.06451926362715 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 1, - "mass": 131.033885032, - "min_mass": 131.03126435429937, - "max_mass": 131.03650570970063 + "mass": 132.041710067, + "min_mass": 132.03906923279865, + "max_mass": 132.04435090120134 }, { "ion_type": "y", @@ -208,12 +208,12 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 3, - "mass": 154.069041322, - "min_mass": 154.06595994117356, - "max_mass": 154.07212270282645 + "mass": 154.40498300033335, + "min_mass": 154.40189490067334, + "max_mass": 154.40807109999335 }, { "ion_type": "y", @@ -239,14 +239,6 @@ "min_mass": 171.0911896272696, "max_mass": 171.0980334117304 }, - { - "ion_type": "z", - "no": 3, - "charge": 2, - "mass": 180.0760842495, - "min_mass": 180.07248272781501, - "max_mass": 180.079685771185 - }, { "ion_type": "b", "no": 5, @@ -255,6 +247,14 @@ "min_mass": 180.09696245571067, "max_mass": 180.10416647828933 }, + { + "ion_type": "z●", + "no": 3, + "charge": 2, + "mass": 180.579996767, + "min_mass": 180.57638516706467, + "max_mass": 180.58360836693535 + }, { "ion_type": "c", "no": 5, @@ -264,12 +264,12 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 3, - "mass": 186.41996265533336, - "min_mass": 186.41623425608026, - "max_mass": 186.42369105458647 + "mass": 186.75590433366668, + "min_mass": 186.75216921558, + "max_mass": 186.75963945175334 }, { "ion_type": "y", @@ -328,20 +328,20 @@ "max_mass": 227.10717551966934 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 3, - "mass": 229.43416032200003, - "min_mass": 229.4295716387936, - "max_mass": 229.43874900520646 + "mass": 229.77010200033337, + "min_mass": 229.76550659829337, + "max_mass": 229.77469740237336 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 2, - "mass": 230.59992374950002, - "min_mass": 230.59531175102504, - "max_mass": 230.604535747975 + "mass": 231.10383626700002, + "min_mass": 231.0992141902747, + "max_mass": 231.10845834372535 }, { "ion_type": "y", @@ -368,12 +368,12 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 1, - "mass": 246.06082803200002, - "min_mass": 246.05590681543939, - "max_mass": 246.06574924856065 + "mass": 247.068653067, + "min_mass": 247.06371169393867, + "max_mass": 247.07359444006136 }, { "ion_type": "y", @@ -400,12 +400,12 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 2, - "mass": 279.12630574950003, - "min_mass": 279.120723223385, - "max_mass": 279.13188827561504 + "mass": 279.63021826700003, + "min_mass": 279.6246256626347, + "max_mass": 279.6358108713654 }, { "ion_type": "y", @@ -448,12 +448,12 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 2, - "mass": 343.64760224950004, - "min_mass": 343.64072929745504, - "max_mass": 343.65447520154504 + "mass": 344.15151476700004, + "min_mass": 344.1446317367047, + "max_mass": 344.1583977972954 }, { "ion_type": "y", @@ -464,12 +464,12 @@ "max_mass": 352.16792001953604 }, { - "ion_type": "z", + "ion_type": "z●", "no": 3, "charge": 1, - "mass": 359.14489203200003, - "min_mass": 359.13770913415937, - "max_mass": 359.1520749298407 + "mass": 360.152717067, + "min_mass": 360.1455140126587, + "max_mass": 360.1599201213414 }, { "ion_type": "y", @@ -496,12 +496,12 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 1, - "mass": 460.19257103200005, - "min_mass": 460.1833671805794, - "max_mass": 460.2017748834207 + "mass": 461.20039606700004, + "min_mass": 461.1911720590787, + "max_mass": 461.20962007492136 }, { "ion_type": "y", @@ -528,12 +528,12 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 1, - "mass": 557.2453350320001, - "min_mass": 557.2341901252994, - "max_mass": 557.2564799387007 + "mass": 558.2531600670001, + "min_mass": 558.2419950037987, + "max_mass": 558.2643251302014 }, { "ion_type": "y", @@ -560,12 +560,12 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 1, - "mass": 686.2879280320001, - "min_mass": 686.2742022734394, - "max_mass": 686.3016537905607 + "mass": 687.2957530670001, + "min_mass": 687.2820071519387, + "max_mass": 687.3094989820614 }, { "ion_type": "y", diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index 330f2d2..a486b1d 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -110,11 +110,11 @@ def test_get_ion_types_etd(self): def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z", "c"] + assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z●", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" - assert fragments.retrieve_ion_types("uvpd") == ["y", "b", "z", "c", "x", "a"] + assert fragments.retrieve_ion_types("uvpd") == ["x", "a", "y", "b", "z", "c"] def test_invalid_fragmentation_method(self): """Test if error is raised for invalid fragmentation method.""" @@ -134,7 +134,7 @@ def test_get_ion_types_etd(self): def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z", "b", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z●", "b", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" From 5b5eb204cf9810870c1bb7feeef25d1bad33d251 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 18:39:34 +0200 Subject: [PATCH 2/4] fix encoding problems: using z_r everywhere --- spectrum_fundamentals/constants.py | 79 +++++++++++-------- spectrum_fundamentals/fragments.py | 15 +--- .../data/fragments_meta_data_etd_ecd.json | 36 ++++----- .../data/fragments_meta_data_ethcd_etcid.json | 36 ++++----- tests/unit_tests/test_fragments.py | 8 +- 5 files changed, 87 insertions(+), 87 deletions(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index b5c7ec1..e0d1a3d 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -19,40 +19,6 @@ # we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1 -############# -# ION TYPES # -############# -FORWARD_IONS = ["a", "b", "c"] -BACKWARDS_IONS = ["x", "y", "z", "z●"] -IONS = FORWARD_IONS + BACKWARDS_IONS - -FRAGMENTATION_TO_IONS_BY_PAIRS = { - "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c - "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c - "UVPD": [ - BACKWARDS_IONS[0], - FORWARD_IONS[0], - BACKWARDS_IONS[1], - FORWARD_IONS[1], - BACKWARDS_IONS[2], - FORWARD_IONS[2], - ], # y,b,z,c,x,a -} - -FRAGMENTATION_TO_IONS_BY_DIRECTION = { - "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c - "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c - "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a -} - ############# # ALPHABETS # ############# @@ -449,3 +415,48 @@ class RescoreType(Enum): PROSIT = "prosit" ANDROMEDA = "andromeda" + + +############# +# ION TYPES # +############# +FORWARD_IONS = ["a", "b", "c"] +BACKWARDS_IONS = ["x", "y", "z", "z_r"] # +IONS = FORWARD_IONS + BACKWARDS_IONS + +FRAGMENTATION_TO_IONS_BY_PAIRS = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "UVPD": [ + BACKWARDS_IONS[0], + FORWARD_IONS[0], + BACKWARDS_IONS[1], + FORWARD_IONS[1], + BACKWARDS_IONS[2], + FORWARD_IONS[2], + ], # y,b,z,c,x,a +} + +FRAGMENTATION_TO_IONS_BY_DIRECTION = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a +} + +ION_DELTAS = { + "a": -ATOM_MASSES["O"] - ATOM_MASSES["C"], + "b": 0.0, + "c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"], + "x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"], + "y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"], + "z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"], + "z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"], +} diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index 524251e..3c2233e 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -11,6 +11,7 @@ ATOM_MASSES, FRAGMENTATION_TO_IONS_BY_DIRECTION, FRAGMENTATION_TO_IONS_BY_PAIRS, + ION_DELTAS, MOD_MASSES, PARTICLE_MASSES, ) @@ -132,19 +133,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray: :param ion_types: type of ions for which mass should be calculated :return: numpy array with masses of the ions """ - ion_type_offsets = { - "a": -ATOM_MASSES["O"] - ATOM_MASSES["C"], - "b": 0.0, - "c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"], - "x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"], - "y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"], - "z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"], - "z●": ATOM_MASSES["O"] - ATOM_MASSES["N"], - } - - deltas = np.array([ion_type_offsets[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) - - return deltas + return np.array([ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) def initialize_peaks( diff --git a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json index 1139aca..e19ebd9 100644 --- a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json +++ b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json @@ -8,7 +8,7 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 3, "mass": 44.68542100033333, @@ -24,7 +24,7 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 2, "mass": 66.524493267, @@ -40,7 +40,7 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 3, "mass": 83.02773533366667, @@ -64,7 +64,7 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 3, "mass": 120.72242333366667, @@ -80,7 +80,7 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 2, "mass": 124.037964767, @@ -88,7 +88,7 @@ "max_mass": 124.04044552629534 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 1, "mass": 132.041710067, @@ -104,7 +104,7 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 3, "mass": 154.40498300033335, @@ -120,7 +120,7 @@ "max_mass": 171.0980334117304 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 2, "mass": 180.579996767, @@ -136,7 +136,7 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 3, "mass": 186.75590433366668, @@ -160,7 +160,7 @@ "max_mass": 224.12287753657 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 3, "mass": 229.77010200033337, @@ -168,7 +168,7 @@ "max_mass": 229.77469740237336 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 2, "mass": 231.10383626700002, @@ -184,7 +184,7 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 1, "mass": 247.068653067, @@ -200,7 +200,7 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 2, "mass": 279.63021826700003, @@ -224,7 +224,7 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 2, "mass": 344.15151476700004, @@ -232,7 +232,7 @@ "max_mass": 344.1583977972954 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 1, "mass": 360.152717067, @@ -248,7 +248,7 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 1, "mass": 461.20039606700004, @@ -264,7 +264,7 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 1, "mass": 558.2531600670001, @@ -280,7 +280,7 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 1, "mass": 687.2957530670001, diff --git a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json index de130c0..bc48f11 100644 --- a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json +++ b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json @@ -16,7 +16,7 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 3, "mass": 44.68542100033333, @@ -48,7 +48,7 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 2, "mass": 66.524493267, @@ -80,7 +80,7 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 3, "mass": 83.02773533366667, @@ -136,7 +136,7 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 3, "mass": 120.72242333366667, @@ -152,7 +152,7 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 2, "mass": 124.037964767, @@ -168,7 +168,7 @@ "max_mass": 126.06451926362715 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 1, "mass": 132.041710067, @@ -208,7 +208,7 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 3, "mass": 154.40498300033335, @@ -248,7 +248,7 @@ "max_mass": 180.10416647828933 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 2, "mass": 180.579996767, @@ -264,7 +264,7 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 3, "mass": 186.75590433366668, @@ -328,7 +328,7 @@ "max_mass": 227.10717551966934 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 3, "mass": 229.77010200033337, @@ -336,7 +336,7 @@ "max_mass": 229.77469740237336 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 2, "mass": 231.10383626700002, @@ -368,7 +368,7 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 1, "mass": 247.068653067, @@ -400,7 +400,7 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 2, "mass": 279.63021826700003, @@ -448,7 +448,7 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 2, "mass": 344.15151476700004, @@ -464,7 +464,7 @@ "max_mass": 352.16792001953604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 1, "mass": 360.152717067, @@ -496,7 +496,7 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 1, "mass": 461.20039606700004, @@ -528,7 +528,7 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 1, "mass": 558.2531600670001, @@ -560,7 +560,7 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 1, "mass": 687.2957530670001, diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index a486b1d..ff6551b 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -106,11 +106,11 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types("ETD") == ["z●", "c"] + assert fragments.retrieve_ion_types("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z●", "c"] + assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z_r", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" @@ -130,11 +130,11 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["z●", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z●", "b", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z_r", "b", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" From 3babf78f1fda12a43e4604e3697bcf98aca59d44 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 18:52:21 +0200 Subject: [PATCH 3/4] added get_all_token method --- spectrum_fundamentals/mod_string.py | 11 ++++++++++- tests/unit_tests/test_mod_string.py | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index 47a839e..cbbe4f6 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -1,7 +1,7 @@ import difflib import re from itertools import combinations, repeat -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union import numpy as np import pandas as pd @@ -342,6 +342,15 @@ def split_modstring(sequence: str, r_pattern): return map(split_modstring, sequences, repeat(regex_pattern)) +def get_all_tokens(sequences: List[str]) -> Set[str]: + """Parse given sequences in UNIMOD ProForma standard into a set of all tokens.""" + pattern = r"[ACDEFGHIKLMNPQRSTVWY](\[UNIMOD:\d+\])?" + tokens = set() + for seq in sequences: + tokens |= {match.group() for match in re.finditer(pattern, seq)} + return tokens + + def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]): """ Generate different peptide sequences with moving the modification to all possible residues. diff --git a/tests/unit_tests/test_mod_string.py b/tests/unit_tests/test_mod_string.py index cee6c5c..d03c736 100644 --- a/tests/unit_tests/test_mod_string.py +++ b/tests/unit_tests/test_mod_string.py @@ -309,6 +309,13 @@ def test_parse_modstrings_invalid_with_filtering(self): invalid_seq = "testing" self.assertEqual(next(mod.parse_modstrings([invalid_seq], alphabet=c.ALPHABET, filter=True)), [0]) + def test_get_all_tokens(self): + """Test parsing of any UNIMOD sequence into tokens.""" + seqs = ["ACKC[UNIMOD:4]AD", "PEPTIDE", "PEM[UNIMOD:35]"] + + result = mod.get_all_tokens(seqs) + self.assertEqual(result, {"A", "C", "C[UNIMOD:4]", "D", "E", "I", "K", "M[UNIMOD:35]", "P", "T"}) + class TestCustomToInternal(unittest.TestCase): """Class to test custom to internal.""" From 64aae2d4cfdcfc93757c82684267a3610f6764f1 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 18:56:49 +0200 Subject: [PATCH 4/4] Bump version from 0.7.2 to 0.7.3 --- .cookietemple.yml | 2 +- .github/release-drafter.yml | 4 ++-- cookietemple.cfg | 2 +- docs/conf.py | 4 ++-- pyproject.toml | 2 +- spectrum_fundamentals/__init__.py | 2 +- spectrum_fundamentals/__main__.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.cookietemple.yml b/.cookietemple.yml index 9e72d00..9100977 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu email: victor.giurcoiu@tum.de project_name: spectrum_fundamentals project_short_description: Fundamentals public repo -version: 0.7.2 +version: 0.7.3 license: MIT diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 744ea52..95b0d48 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.7.2 🌈" # <> -tag-template: 0.7.2 # <> +name-template: "0.7.3 🌈" # <> +tag-template: 0.7.3 # <> exclude-labels: - "skip-changelog" diff --git a/cookietemple.cfg b/cookietemple.cfg index 4bb82fa..2740c15 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.7.2 +current_version = 0.7.3 [bumpversion_files_whitelisted] init_file = spectrum_fundamentals/__init__.py diff --git a/docs/conf.py b/docs/conf.py index 68b8850..559b2f1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,9 +52,9 @@ # the built documents. # # The short X.Y version. -version = "0.7.2" +version = "0.7.3" # The full version, including alpha/beta/rc tags. -release = "0.7.2" +release = "0.7.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pyproject.toml b/pyproject.toml index e7fbe19..ed14b87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "spectrum_fundamentals" -version = "0.7.2" # <> +version = "0.7.3" # <> description = "Fundamental functions, annotation pipeline and constants for oktoberfest" authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" diff --git a/spectrum_fundamentals/__init__.py b/spectrum_fundamentals/__init__.py index ffa224b..a533669 100644 --- a/spectrum_fundamentals/__init__.py +++ b/spectrum_fundamentals/__init__.py @@ -2,7 +2,7 @@ __author__ = "Mario Picciani" __email__ = "mario.picciani@tum.de" -__version__ = "0.7.2" +__version__ = "0.7.3" import logging import logging.handlers diff --git a/spectrum_fundamentals/__main__.py b/spectrum_fundamentals/__main__.py index bc9579f..3825a50 100644 --- a/spectrum_fundamentals/__main__.py +++ b/spectrum_fundamentals/__main__.py @@ -5,7 +5,7 @@ @click.command() -@click.version_option(version="0.7.2", message=click.style("spectrum_fundamentals Version: 0.7.2")) +@click.version_option(version="0.7.3", message=click.style("spectrum_fundamentals Version: 0.7.3")) def main() -> None: """spectrum_fundamentals."""