From 839b5142872a26aaac5d5470052da4be280f9de5 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 17:16:45 +0200 Subject: [PATCH] =?UTF-8?q?simplify=20code=20and=20add=20z=E2=97=8F=20to?= =?UTF-8?q?=20ethcd/etcid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spectrum_fundamentals/constants.py | 36 +++++ spectrum_fundamentals/fragments.py | 33 ++-- .../data/fragments_meta_data_ethcd_etcid.json | 152 +++++++++--------- tests/unit_tests/test_fragments.py | 6 +- 4 files changed, 129 insertions(+), 98 deletions(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index bc72e3f..b5c7ec1 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -17,6 +17,42 @@ VEC_LENGTH_CMS2 = (SEQ_LEN - 1) * 2 * 3 * 2 # peptide of length 30 can have 29 b, y, b_short, y_short, b_long and y_long ions, each with charge 1+, 2+ and 3+ # we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1 + + +############# +# ION TYPES # +############# +FORWARD_IONS = ["a", "b", "c"] +BACKWARDS_IONS = ["x", "y", "z", "z●"] +IONS = FORWARD_IONS + BACKWARDS_IONS + +FRAGMENTATION_TO_IONS_BY_PAIRS = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c + "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c + "UVPD": [ + BACKWARDS_IONS[0], + FORWARD_IONS[0], + BACKWARDS_IONS[1], + FORWARD_IONS[1], + BACKWARDS_IONS[2], + FORWARD_IONS[2], + ], # y,b,z,c,x,a +} + +FRAGMENTATION_TO_IONS_BY_DIRECTION = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c + "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c + "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c + "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a +} + ############# # ALPHABETS # ############# diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index f91a943..524251e 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -6,7 +6,14 @@ import numpy as np import pandas as pd -from .constants import AA_MASSES, ATOM_MASSES, MOD_MASSES, PARTICLE_MASSES +from .constants import ( + AA_MASSES, + ATOM_MASSES, + FRAGMENTATION_TO_IONS_BY_DIRECTION, + FRAGMENTATION_TO_IONS_BY_PAIRS, + MOD_MASSES, + PARTICLE_MASSES, +) from .mod_string import internal_without_mods logger = logging.getLogger(__name__) @@ -95,16 +102,10 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]: : return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return ["y", "b"] - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return ["z●", "c"] - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return ["y", "b", "z", "c"] - elif fragmentation_method == "UVPD": - return ["y", "b", "z", "c", "x", "a"] - else: + ions = FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> List[str]: @@ -118,16 +119,10 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis : return: list of possible ion types """ fragmentation_method = fragmentation_method.upper() - if fragmentation_method == "HCD" or fragmentation_method == "CID": - return ["y", "b"] - elif fragmentation_method == "ETD" or fragmentation_method == "ECD": - return ["z●", "c"] - elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD": - return ["y", "z", "b", "c"] - elif fragmentation_method == "UVPD": - return ["x", "y", "z", "a", "b", "c"] - else: + ions = FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, []) + if not ions: raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}") + return ions def get_ion_delta(ion_types: List[str]) -> np.ndarray: diff --git a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json index 38e58ea..de130c0 100644 --- a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json +++ b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json @@ -16,12 +16,12 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 3, - "mass": 44.349479322, - "min_mass": 44.34859233241356, - "max_mass": 44.35036631158644 + "mass": 44.68542100033333, + "min_mass": 44.68452729191332, + "max_mass": 44.686314708753336 }, { "ion_type": "b", @@ -48,12 +48,12 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 2, - "mass": 66.0205807495, - "min_mass": 66.01926033788502, - "max_mass": 66.02190116111498 + "mass": 66.524493267, + "min_mass": 66.52316277713466, + "max_mass": 66.52582375686534 }, { "ion_type": "y", @@ -80,12 +80,12 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 3, - "mass": 82.69179365533334, - "min_mass": 82.69013981946024, - "max_mass": 82.69344749120644 + "mass": 83.02773533366667, + "min_mass": 83.02607477896, + "max_mass": 83.02939588837334 }, { "ion_type": "y", @@ -136,12 +136,12 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z", + "ion_type": "z●", "no": 3, "charge": 3, - "mass": 120.38648165533334, - "min_mass": 120.38407392570024, - "max_mass": 120.38888938496645 + "mass": 120.72242333366667, + "min_mass": 120.7200088852, + "max_mass": 120.72483778213333 }, { "ion_type": "c", @@ -152,12 +152,12 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 2, - "mass": 123.53405224950001, - "min_mass": 123.53158156845501, - "max_mass": 123.536522930545 + "mass": 124.037964767, + "min_mass": 124.03548400770467, + "max_mass": 124.04044552629534 }, { "ion_type": "y", @@ -168,12 +168,12 @@ "max_mass": 126.06451926362715 }, { - "ion_type": "z", + "ion_type": "z●", "no": 1, "charge": 1, - "mass": 131.033885032, - "min_mass": 131.03126435429937, - "max_mass": 131.03650570970063 + "mass": 132.041710067, + "min_mass": 132.03906923279865, + "max_mass": 132.04435090120134 }, { "ion_type": "y", @@ -208,12 +208,12 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 3, - "mass": 154.069041322, - "min_mass": 154.06595994117356, - "max_mass": 154.07212270282645 + "mass": 154.40498300033335, + "min_mass": 154.40189490067334, + "max_mass": 154.40807109999335 }, { "ion_type": "y", @@ -239,14 +239,6 @@ "min_mass": 171.0911896272696, "max_mass": 171.0980334117304 }, - { - "ion_type": "z", - "no": 3, - "charge": 2, - "mass": 180.0760842495, - "min_mass": 180.07248272781501, - "max_mass": 180.079685771185 - }, { "ion_type": "b", "no": 5, @@ -255,6 +247,14 @@ "min_mass": 180.09696245571067, "max_mass": 180.10416647828933 }, + { + "ion_type": "z●", + "no": 3, + "charge": 2, + "mass": 180.579996767, + "min_mass": 180.57638516706467, + "max_mass": 180.58360836693535 + }, { "ion_type": "c", "no": 5, @@ -264,12 +264,12 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 3, - "mass": 186.41996265533336, - "min_mass": 186.41623425608026, - "max_mass": 186.42369105458647 + "mass": 186.75590433366668, + "min_mass": 186.75216921558, + "max_mass": 186.75963945175334 }, { "ion_type": "y", @@ -328,20 +328,20 @@ "max_mass": 227.10717551966934 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 3, - "mass": 229.43416032200003, - "min_mass": 229.4295716387936, - "max_mass": 229.43874900520646 + "mass": 229.77010200033337, + "min_mass": 229.76550659829337, + "max_mass": 229.77469740237336 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 2, - "mass": 230.59992374950002, - "min_mass": 230.59531175102504, - "max_mass": 230.604535747975 + "mass": 231.10383626700002, + "min_mass": 231.0992141902747, + "max_mass": 231.10845834372535 }, { "ion_type": "y", @@ -368,12 +368,12 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z", + "ion_type": "z●", "no": 2, "charge": 1, - "mass": 246.06082803200002, - "min_mass": 246.05590681543939, - "max_mass": 246.06574924856065 + "mass": 247.068653067, + "min_mass": 247.06371169393867, + "max_mass": 247.07359444006136 }, { "ion_type": "y", @@ -400,12 +400,12 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 2, - "mass": 279.12630574950003, - "min_mass": 279.120723223385, - "max_mass": 279.13188827561504 + "mass": 279.63021826700003, + "min_mass": 279.6246256626347, + "max_mass": 279.6358108713654 }, { "ion_type": "y", @@ -448,12 +448,12 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 2, - "mass": 343.64760224950004, - "min_mass": 343.64072929745504, - "max_mass": 343.65447520154504 + "mass": 344.15151476700004, + "min_mass": 344.1446317367047, + "max_mass": 344.1583977972954 }, { "ion_type": "y", @@ -464,12 +464,12 @@ "max_mass": 352.16792001953604 }, { - "ion_type": "z", + "ion_type": "z●", "no": 3, "charge": 1, - "mass": 359.14489203200003, - "min_mass": 359.13770913415937, - "max_mass": 359.1520749298407 + "mass": 360.152717067, + "min_mass": 360.1455140126587, + "max_mass": 360.1599201213414 }, { "ion_type": "y", @@ -496,12 +496,12 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z", + "ion_type": "z●", "no": 4, "charge": 1, - "mass": 460.19257103200005, - "min_mass": 460.1833671805794, - "max_mass": 460.2017748834207 + "mass": 461.20039606700004, + "min_mass": 461.1911720590787, + "max_mass": 461.20962007492136 }, { "ion_type": "y", @@ -528,12 +528,12 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z", + "ion_type": "z●", "no": 5, "charge": 1, - "mass": 557.2453350320001, - "min_mass": 557.2341901252994, - "max_mass": 557.2564799387007 + "mass": 558.2531600670001, + "min_mass": 558.2419950037987, + "max_mass": 558.2643251302014 }, { "ion_type": "y", @@ -560,12 +560,12 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z", + "ion_type": "z●", "no": 6, "charge": 1, - "mass": 686.2879280320001, - "min_mass": 686.2742022734394, - "max_mass": 686.3016537905607 + "mass": 687.2957530670001, + "min_mass": 687.2820071519387, + "max_mass": 687.3094989820614 }, { "ion_type": "y", diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index 330f2d2..a486b1d 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -110,11 +110,11 @@ def test_get_ion_types_etd(self): def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z", "c"] + assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z●", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" - assert fragments.retrieve_ion_types("uvpd") == ["y", "b", "z", "c", "x", "a"] + assert fragments.retrieve_ion_types("uvpd") == ["x", "a", "y", "b", "z", "c"] def test_invalid_fragmentation_method(self): """Test if error is raised for invalid fragmentation method.""" @@ -134,7 +134,7 @@ def test_get_ion_types_etd(self): def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z", "b", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z●", "b", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method."""