From 5b5eb204cf9810870c1bb7feeef25d1bad33d251 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Fri, 9 Aug 2024 18:39:34 +0200 Subject: [PATCH] fix encoding problems: using z_r everywhere --- spectrum_fundamentals/constants.py | 79 +++++++++++-------- spectrum_fundamentals/fragments.py | 15 +--- .../data/fragments_meta_data_etd_ecd.json | 36 ++++----- .../data/fragments_meta_data_ethcd_etcid.json | 36 ++++----- tests/unit_tests/test_fragments.py | 8 +- 5 files changed, 87 insertions(+), 87 deletions(-) diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index b5c7ec1..e0d1a3d 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -19,40 +19,6 @@ # we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1 -############# -# ION TYPES # -############# -FORWARD_IONS = ["a", "b", "c"] -BACKWARDS_IONS = ["x", "y", "z", "z●"] -IONS = FORWARD_IONS + BACKWARDS_IONS - -FRAGMENTATION_TO_IONS_BY_PAIRS = { - "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c - "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z●,c - "UVPD": [ - BACKWARDS_IONS[0], - FORWARD_IONS[0], - BACKWARDS_IONS[1], - FORWARD_IONS[1], - BACKWARDS_IONS[2], - FORWARD_IONS[2], - ], # y,b,z,c,x,a -} - -FRAGMENTATION_TO_IONS_BY_DIRECTION = { - "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b - "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z●,c - "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c - "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z●,b,c - "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a -} - ############# # ALPHABETS # ############# @@ -449,3 +415,48 @@ class RescoreType(Enum): PROSIT = "prosit" ANDROMEDA = "andromeda" + + +############# +# ION TYPES # +############# +FORWARD_IONS = ["a", "b", "c"] +BACKWARDS_IONS = ["x", "y", "z", "z_r"] # +IONS = FORWARD_IONS + BACKWARDS_IONS + +FRAGMENTATION_TO_IONS_BY_PAIRS = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c + "UVPD": [ + BACKWARDS_IONS[0], + FORWARD_IONS[0], + BACKWARDS_IONS[1], + FORWARD_IONS[1], + BACKWARDS_IONS[2], + FORWARD_IONS[2], + ], # y,b,z,c,x,a +} + +FRAGMENTATION_TO_IONS_BY_DIRECTION = { + "HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b + "ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c + "ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c + "UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a +} + +ION_DELTAS = { + "a": -ATOM_MASSES["O"] - ATOM_MASSES["C"], + "b": 0.0, + "c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"], + "x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"], + "y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"], + "z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"], + "z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"], +} diff --git a/spectrum_fundamentals/fragments.py b/spectrum_fundamentals/fragments.py index 524251e..3c2233e 100644 --- a/spectrum_fundamentals/fragments.py +++ b/spectrum_fundamentals/fragments.py @@ -11,6 +11,7 @@ ATOM_MASSES, FRAGMENTATION_TO_IONS_BY_DIRECTION, FRAGMENTATION_TO_IONS_BY_PAIRS, + ION_DELTAS, MOD_MASSES, PARTICLE_MASSES, ) @@ -132,19 +133,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray: :param ion_types: type of ions for which mass should be calculated :return: numpy array with masses of the ions """ - ion_type_offsets = { - "a": -ATOM_MASSES["O"] - ATOM_MASSES["C"], - "b": 0.0, - "c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"], - "x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"], - "y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"], - "z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"], - "z●": ATOM_MASSES["O"] - ATOM_MASSES["N"], - } - - deltas = np.array([ion_type_offsets[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) - - return deltas + return np.array([ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1) def initialize_peaks( diff --git a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json index 1139aca..e19ebd9 100644 --- a/tests/unit_tests/data/fragments_meta_data_etd_ecd.json +++ b/tests/unit_tests/data/fragments_meta_data_etd_ecd.json @@ -8,7 +8,7 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 3, "mass": 44.68542100033333, @@ -24,7 +24,7 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 2, "mass": 66.524493267, @@ -40,7 +40,7 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 3, "mass": 83.02773533366667, @@ -64,7 +64,7 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 3, "mass": 120.72242333366667, @@ -80,7 +80,7 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 2, "mass": 124.037964767, @@ -88,7 +88,7 @@ "max_mass": 124.04044552629534 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 1, "mass": 132.041710067, @@ -104,7 +104,7 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 3, "mass": 154.40498300033335, @@ -120,7 +120,7 @@ "max_mass": 171.0980334117304 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 2, "mass": 180.579996767, @@ -136,7 +136,7 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 3, "mass": 186.75590433366668, @@ -160,7 +160,7 @@ "max_mass": 224.12287753657 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 3, "mass": 229.77010200033337, @@ -168,7 +168,7 @@ "max_mass": 229.77469740237336 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 2, "mass": 231.10383626700002, @@ -184,7 +184,7 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 1, "mass": 247.068653067, @@ -200,7 +200,7 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 2, "mass": 279.63021826700003, @@ -224,7 +224,7 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 2, "mass": 344.15151476700004, @@ -232,7 +232,7 @@ "max_mass": 344.1583977972954 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 1, "mass": 360.152717067, @@ -248,7 +248,7 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 1, "mass": 461.20039606700004, @@ -264,7 +264,7 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 1, "mass": 558.2531600670001, @@ -280,7 +280,7 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 1, "mass": 687.2957530670001, diff --git a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json index de130c0..bc48f11 100644 --- a/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json +++ b/tests/unit_tests/data/fragments_meta_data_ethcd_etcid.json @@ -16,7 +16,7 @@ "max_mass": 39.034494842950046 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 3, "mass": 44.68542100033333, @@ -48,7 +48,7 @@ "max_mass": 58.048093958160386 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 2, "mass": 66.524493267, @@ -80,7 +80,7 @@ "max_mass": 82.04955279357003 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 3, "mass": 83.02773533366667, @@ -136,7 +136,7 @@ "max_mass": 115.08889130379143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 3, "mass": 120.72242333366667, @@ -152,7 +152,7 @@ "max_mass": 122.57068088409038 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 2, "mass": 124.037964767, @@ -168,7 +168,7 @@ "max_mass": 126.06451926362715 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 1, "charge": 1, "mass": 132.041710067, @@ -208,7 +208,7 @@ "max_mass": 148.08435446319004 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 3, "mass": 154.40498300033335, @@ -248,7 +248,7 @@ "max_mass": 180.10416647828933 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 2, "mass": 180.579996767, @@ -264,7 +264,7 @@ "max_mass": 185.77979635695002 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 3, "mass": 186.75590433366668, @@ -328,7 +328,7 @@ "max_mass": 227.10717551966934 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 3, "mass": 229.77010200033337, @@ -336,7 +336,7 @@ "max_mass": 229.77469740237336 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 2, "mass": 231.10383626700002, @@ -368,7 +368,7 @@ "max_mass": 244.13406515565143 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 2, "charge": 1, "mass": 247.068653067, @@ -400,7 +400,7 @@ "max_mass": 278.1660462291604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 2, "mass": 279.63021826700003, @@ -448,7 +448,7 @@ "max_mass": 341.18877021093147 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 2, "mass": 344.15151476700004, @@ -464,7 +464,7 @@ "max_mass": 352.16792001953604 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 3, "charge": 1, "mass": 360.152717067, @@ -496,7 +496,7 @@ "max_mass": 442.23847016451145 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 4, "charge": 1, "mass": 461.20039606700004, @@ -528,7 +528,7 @@ "max_mass": 555.3247958457914 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 5, "charge": 1, "mass": 558.2531600670001, @@ -560,7 +560,7 @@ "max_mass": 670.3540393846513 }, { - "ion_type": "z●", + "ion_type": "z_r", "no": 6, "charge": 1, "mass": 687.2957530670001, diff --git a/tests/unit_tests/test_fragments.py b/tests/unit_tests/test_fragments.py index a486b1d..ff6551b 100644 --- a/tests/unit_tests/test_fragments.py +++ b/tests/unit_tests/test_fragments.py @@ -106,11 +106,11 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types("ETD") == ["z●", "c"] + assert fragments.retrieve_ion_types("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z●", "c"] + assert fragments.retrieve_ion_types("ETCID") == ["y", "b", "z_r", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method.""" @@ -130,11 +130,11 @@ def test_get_ion_types_hcd(self): def test_get_ion_types_etd(self): """Test retrieving ion types for ETD.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["z●", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETD") == ["z_r", "c"] def test_get_ion_types_etcid(self): """Test retrieving ion types for ETCID.""" - assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z●", "b", "c"] + assert fragments.retrieve_ion_types_for_peak_initialization("ETCID") == ["y", "z_r", "b", "c"] def test_get_ion_types_lower_case(self): """Test lower case fragmentation method."""