From 9143d1d85ef4e5cef65006cb1a1fc76a6e6bbbdb Mon Sep 17 00:00:00 2001
From: Fabian Basso <fabian.basso@tum.de>
Date: Wed, 31 Jul 2024 12:02:07 +0000
Subject: [PATCH] tests pased spectrum_io

---
 spectrum_io/search_result/mascot.py           |  34 ++--
 spectrum_io/search_result/maxquant.py         |  53 +++---
 spectrum_io/search_result/msfragger.py        |  35 ++--
 spectrum_io/search_result/sage.py             |  27 ++-
 spectrum_io/search_result/search_results.py   |  30 +--
 spectrum_io/search_result/xisearch.py         |  10 +-
 spectrum_io/spectral_library/dlib.py          |   9 +-
 spectrum_io/spectral_library/msp.py           |   5 +-
 .../spectral_library/spectral_library.py      |  13 +-
 spectrum_io/spectral_library/spectronaut.py   |   5 +-
 tests/unit_tests/data/psm_mods.pepXML         | 175 ++++++++++++++++++
 .../unit_tests/data/psm_tmt_internal_mods.csv |   5 +
 tests/unit_tests/test_msfragger.py            |  17 +-
 13 files changed, 334 insertions(+), 84 deletions(-)
 create mode 100644 tests/unit_tests/data/psm_mods.pepXML
 create mode 100644 tests/unit_tests/data/psm_tmt_internal_mods.csv

diff --git a/spectrum_io/search_result/mascot.py b/spectrum_io/search_result/mascot.py
index e3f7adc..7e6f2eb 100644
--- a/spectrum_io/search_result/mascot.py
+++ b/spectrum_io/search_result/mascot.py
@@ -2,11 +2,11 @@
 import re
 import sqlite3
 from pathlib import Path
-from typing import Optional, Union, Dict, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import pandas as pd
 import spectrum_fundamentals.constants as c
-from spectrum_fundamentals.mod_string import internal_without_mods, custom_regex_escape
+from spectrum_fundamentals.mod_string import custom_regex_escape, internal_without_mods
 
 from .search_results import SearchResults, filter_valid_prosit_sequences
 
@@ -16,14 +16,16 @@
 class Mascot(SearchResults):
     """Handle search results from Mascot."""
 
-    def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                    var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def read_result(
+        self,
+        tmt_labeled: str,
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Function to read a mascot msf file and perform some basic formatting.
 
         :param tmt_labeled: tmt label as str
-        :param var_mods: dict with custom variable identifier and respecitve internal equivalent 
-        :param stat_mods: dict with custom static identifier and respecitve internal equivalent
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         :return: pd.DataFrame with the formatted data
         """
         logger.info("Reading mascot msf file")
@@ -78,10 +80,10 @@ def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = No
             ["SCAN_NUMBER", "PRECURSOR_CHARGE", "SCORE", "RAW_FILE", "SEQUENCE", "REVERSE"],
             as_index=False,
         ).agg({"MODIFICATIONS": "|".join})
-        MOD_MASSES = c.update_mod_masses()
-        mod_masses_reverse = {round(float(v), 3): k for k, v in MOD_MASSES.items()}
-        
-        def find_replacement(match: re.Match, sequence: str) -> str:
+        mod_masses = c.update_mod_masses()
+        mod_masses_reverse = {round(float(v), 3): k for k, v in mod_masses.items()}
+
+        def find_replacement(match: re.Match) -> str:
             """
             Subfunction to find the corresponding substitution for a match.
 
@@ -90,7 +92,13 @@ def find_replacement(match: re.Match, sequence: str) -> str:
             """
             key = match.string[match.start() : match.end()]
             return mods[key]
-        
+
+        stat_mods: Dict[str, str] = {}
+        var_mods: Dict[str, str] = {}
+
+        if custom_mods is not None:
+            stat_mods = {key: value[0] for key, value in (custom_mods.get("stat_mods") or {}).items()}
+            var_mods = {key: value[0] for key, value in (custom_mods.get("var_mods") or {}).items()}
 
         mods = {}
 
@@ -107,11 +115,11 @@ def find_replacement(match: re.Match, sequence: str) -> str:
             modifications = row["MODIFICATIONS"].split("|")
             sequence = row["SEQUENCE"]
             if mods:
-                sequence = regex.sub(lambda match: find_replacement(match, sequence), sequence) 
+                sequence = regex.sub(lambda match: find_replacement(match), sequence)
 
             if len(modifications) == 0:
                 sequences.append(sequence)
-            else:                 
+            else:
                 skip = 0
                 for mod in modifications:
                     pos, mass = mod.split("$")
diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
index 2180979..410102e 100644
--- a/spectrum_io/search_result/maxquant.py
+++ b/spectrum_io/search_result/maxquant.py
@@ -1,11 +1,10 @@
 import logging
 from pathlib import Path
-from typing import Optional, Union, Dict, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import pandas as pd
 import spectrum_fundamentals.constants as c
 from spectrum_fundamentals.mod_string import internal_without_mods, maxquant_to_internal
-from spectrum_fundamentals.constants import MAXQUANT_VAR_MODS
 
 from .search_results import SearchResults, filter_valid_prosit_sequences
 
@@ -42,14 +41,16 @@ def add_tmt_mod(mass: float, seq: str, unimod_tag: str) -> float:
         mass += num_of_tmt * c.MOD_MASSES[f"{unimod_tag}"]
         return mass
 
-    def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                    var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def read_result(
+        self,
+        tmt_labeled: str,
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Function to read a msms txt and perform some basic formatting.
 
         :param tmt_labeled: tmt label as str
-        :param var_mods: dict with custom variable identifier and respecitve internal equivalent 
-        :param stat_mods: dict with custom static identifier and respecitve internal equivalent
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         :return: pd.DataFrame with the formatted data
         """
         logger.info("Reading msms.txt file")
@@ -76,24 +77,35 @@ def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = No
         df.columns = df.columns.str.upper()
         df.columns = df.columns.str.replace(" ", "_")
 
+        stat_mods: Dict[str, str] = {}
+        var_mods: Dict[str, str] = {}
+
+        if custom_mods is not None:
+            stat_mods = {key: value[0] for key, value in (custom_mods.get("stat_mods") or {}).items()}
+            var_mods = {key: value[0] for key, value in (custom_mods.get("var_mods") or {}).items()}
+
         df = MaxQuant.update_columns_for_prosit(df, tmt_labeled, stat_mods, var_mods)
         return filter_valid_prosit_sequences(df)
 
     @staticmethod
-    def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                                  var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def update_columns_for_prosit(
+        df: pd.DataFrame,
+        tmt_labeled: str,
+        stat_mods: Optional[Dict[str, str]] = None,
+        var_mods: Optional[Dict[str, str]] = None,
+    ) -> pd.DataFrame:
         """
         Update columns of df to work with Prosit.
 
         :param df: df to modify
         :param tmt_labeled: True if tmt labeled
-        :param var_mods: dict with custom variable identifier and respecitve internal equivalent 
+        :param var_mods: dict with custom variable identifier and respecitve internal equivalent
         :param stat_mods: dict with custom static identifier and respecitve internal equivalent
         :return: modified df as pd.DataFrame
         """
         df.rename(columns={"CHARGE": "PRECURSOR_CHARGE"}, inplace=True)
 
-        mods = {**(MAXQUANT_VAR_MODS), **(stat_mods or {}), **(var_mods or {})}
+        mods = {**(c.MAXQUANT_VAR_MODS), **(stat_mods or {}), **(var_mods or {})}
 
         df["REVERSE"].fillna(False, inplace=True)
         df["REVERSE"].replace("+", True, inplace=True)
@@ -102,33 +114,32 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str, stat_mods: Opt
             unimod_tag = c.TMT_MODS[tmt_labeled]
             logger.info("Adding TMT fixed modifications")
             df["MODIFIED_SEQUENCE"] = maxquant_to_internal(
-                df["MODIFIED_SEQUENCE"].to_numpy(), mods=
-                {**{"C": "C[UNIMOD:4]", "^_": f"_{unimod_tag}-", "K": f"K{unimod_tag}"}, **mods}
-                )
+                df["MODIFIED_SEQUENCE"].to_numpy(),
+                mods={**{"C": "C[UNIMOD:4]", "^_": f"_{unimod_tag}-", "K": f"K{unimod_tag}"}, **mods},
+            )
             df["MASS"] = df.apply(lambda x: MaxQuant.add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, unimod_tag), axis=1)
             if "msa" in tmt_labeled:
                 logger.info("Replacing phospho by dehydration for Phospho-MSA")
                 df["MODIFIED_SEQUENCE_MSA"] = df["MODIFIED_SEQUENCE"].str.replace(
                     "[UNIMOD:21]", "[UNIMOD:23]", regex=False
                 )
-                fixed_mods = {"C": "C[UNIMOD:4]"}
         elif "LABELING_STATE" in df.columns:
             logger.info("Adding SILAC fixed modifications")
-    
+
             df.loc[df["LABELING_STATE"] == 1, "MODIFIED_SEQUENCE"] = maxquant_to_internal(
-                df[df["LABELING_STATE"] == 1]["MODIFIED_SEQUENCE"].to_numpy(), mods = 
-                {**{"C": "C[UNIMOD:4]", "K": "K[UNIMOD:259]", "R": "R[UNIMOD:267]"}, **mods}
+                df[df["LABELING_STATE"] == 1]["MODIFIED_SEQUENCE"].to_numpy(),
+                mods={**{"C": "C[UNIMOD:4]", "K": "K[UNIMOD:259]", "R": "R[UNIMOD:267]"}, **mods},
             )
             df.loc[df["LABELING_STATE"] != 1, "MODIFIED_SEQUENCE"] = maxquant_to_internal(
-                df[df["LABELING_STATE"] != 1]["MODIFIED_SEQUENCE"].to_numpy(), mods=
-                {**{"C": "C[UNIMOD:4]"}, **mods}
+                df[df["LABELING_STATE"] != 1]["MODIFIED_SEQUENCE"].to_numpy(), mods={**{"C": "C[UNIMOD:4]"}, **mods}
             )
             df["MASS"] = df.apply(lambda x: MaxQuant.add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, "[UNIMOD:259]"), axis=1)
             df["MASS"] = df.apply(lambda x: MaxQuant.add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, "[UNIMOD:267]"), axis=1)
             df.drop(columns=["LABELING_STATE"], inplace=True)
         else:
-            df["MODIFIED_SEQUENCE"] = maxquant_to_internal(df["MODIFIED_SEQUENCE"].to_numpy(), mods=
-                                                           {**{"C": "C[UNIMOD:4]"}, **mods})
+            df["MODIFIED_SEQUENCE"] = maxquant_to_internal(
+                df["MODIFIED_SEQUENCE"].to_numpy(), mods={**{"C": "C[UNIMOD:4]"}, **mods}
+            )
         df["SEQUENCE"] = internal_without_mods(df["MODIFIED_SEQUENCE"])
         df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))
         df["PROTEINS"].fillna("UNKNOWN", inplace=True)
diff --git a/spectrum_io/search_result/msfragger.py b/spectrum_io/search_result/msfragger.py
index df98d79..3911700 100644
--- a/spectrum_io/search_result/msfragger.py
+++ b/spectrum_io/search_result/msfragger.py
@@ -1,12 +1,12 @@
 import logging
 from pathlib import Path
-from typing import Optional, Union, Dict, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import pandas as pd
 import spectrum_fundamentals.constants as c
 from pyteomics import pepxml
-from spectrum_fundamentals.mod_string import internal_without_mods, msfragger_to_internal
 from spectrum_fundamentals.constants import MSFRAGGER_VAR_MODS
+from spectrum_fundamentals.mod_string import internal_without_mods, msfragger_to_internal
 from tqdm import tqdm
 
 from .search_results import SearchResults, filter_valid_prosit_sequences
@@ -17,14 +17,17 @@
 class MSFragger(SearchResults):
     """Handle search results from MSFragger."""
 
-    def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                    var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def read_result(
+        self,
+        tmt_labeled: str,
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Function to read a msms txt and perform some basic formatting.
 
         :param tmt_labeled: tmt label as str
-        :param var_mods: dict with custom variable identifier and respecitve internal equivalent 
-        :param stat_mods: dict with custom static identifier and respecitve internal equivalent:raises FileNotFoundError: in case the given path is neither a file, nor a directory.
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
+        :raises FileNotFoundError: in case the given path is neither a file, nor a directory.
         :return: pd.DataFrame with the formatted data
         """
         if self.path.is_file():
@@ -40,18 +43,26 @@ def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = No
 
         df = pd.concat(ms_frag_results)
 
+        stat_mods: Dict[str, str] = {}
+        var_mods: Dict[str, str] = {}
+
+        if custom_mods is not None:
+            stat_mods = {key: value[0] for key, value in (custom_mods.get("stat_mods") or {}).items()}
+            var_mods = {key: value[0] for key, value in (custom_mods.get("var_mods") or {}).items()}
+
         df = update_columns_for_prosit(df, tmt_labeled, stat_mods=stat_mods, var_mods=var_mods)
         return filter_valid_prosit_sequences(df)
 
 
-def update_columns_for_prosit(df, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                              var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+def update_columns_for_prosit(
+    df, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, var_mods: Optional[Dict[str, str]] = None
+) -> pd.DataFrame:
     """
     Update columns of df to work with Prosit.
 
     :param df: df to modify
     :param tmt_labeled: True if tmt labeled
-    :param var_mods: dict with custom variable identifier and respecitve internal equivalent 
+    :param var_mods: dict with custom variable identifier and respecitve internal equivalent
     :param stat_mods: dict with custom static identifier and respecitve internal equivalent
     :return: modified df as pd.DataFrame
     """
@@ -64,15 +75,13 @@ def update_columns_for_prosit(df, tmt_labeled: str, stat_mods: Optional[Dict[str
 
     mods = {**(MSFRAGGER_VAR_MODS), **(stat_mods or {}), **(var_mods or {})}
 
-
     if tmt_labeled != "":
         unimod_tag = c.TMT_MODS[tmt_labeled]
         logger.info("Adding TMT fixed modifications")
         mods = {**{"C": "C[UNIMOD:4]", r"n[\d+]": f"{unimod_tag}-", "K": f"K{unimod_tag}"}, **mods}
-        df["MODIFIED_SEQUENCE"] = msfragger_to_internal(
-            df["modified_peptide"].to_list(), mods=mods)
+        df["MODIFIED_SEQUENCE"] = msfragger_to_internal(df["modified_peptide"].to_list(), mods=mods)
     else:
-        #By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included
+        # By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included
         # in the fixed modifications. If you want to have no fixed modifictions at all, supply fixed_mods={}
         mods = {**{"C": "C[UNIMOD:4]"}, **mods}
         df["MODIFIED_SEQUENCE"] = msfragger_to_internal(df["modified_peptide"].to_list(), mods=mods)
diff --git a/spectrum_io/search_result/sage.py b/spectrum_io/search_result/sage.py
index 10be092..50438b9 100644
--- a/spectrum_io/search_result/sage.py
+++ b/spectrum_io/search_result/sage.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Optional, Union, Dict, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import pandas as pd
 from spectrum_fundamentals.constants import MOD_MASSES_SAGE
@@ -14,14 +14,16 @@
 class Sage(SearchResults):
     """Handle search results from Sage."""
 
-    def read_result(self, tmt_labeled: str = "", stat_mods: Optional[Dict[str, str]] = None, 
-                    var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def read_result(
+        self,
+        tmt_labeled: str = "",
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Function to read a msms tsv and perform some basic formatting.
 
         :param tmt_labeled: tmt label as str
-        :param var_mods: Variable modifications with custom identifiers and their respective internal equivalents
-        :param stat_mods: Static modifications with custom identifiers and their respective internal equivalents
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         :return: pd.DataFrame with the formatted data
         """
         logger.info(f"Reading {self.path}")
@@ -36,12 +38,23 @@ def read_result(self, tmt_labeled: str = "", stat_mods: Optional[Dict[str, str]]
         df.columns = df.columns.str.upper()
         df.columns = df.columns.str.replace(" ", "_")
 
+        stat_mods: Dict[str, str] = {}
+        var_mods: Dict[str, str] = {}
+
+        if custom_mods is not None:
+            stat_mods = {key: value[0] for key, value in (custom_mods.get("stat_mods") or {}).items()}
+            var_mods = {key: value[0] for key, value in (custom_mods.get("var_mods") or {}).items()}
+
         df = Sage.update_columns_for_prosit(df, tmt_labeled, stat_mods=stat_mods, var_mods=var_mods)
         return filter_valid_prosit_sequences(df)
 
     @staticmethod
-    def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                                  var_mods: Optional[Dict[str, str]] = None) -> pd.DataFrame:
+    def update_columns_for_prosit(
+        df: pd.DataFrame,
+        tmt_labeled: str,
+        stat_mods: Optional[Dict[str, str]] = None,
+        var_mods: Optional[Dict[str, str]] = None,
+    ) -> pd.DataFrame:
         """
         Update columns of df to work with Prosit.
 
diff --git a/spectrum_io/search_result/search_results.py b/spectrum_io/search_result/search_results.py
index 9bccdfc..0d9c476 100644
--- a/spectrum_io/search_result/search_results.py
+++ b/spectrum_io/search_result/search_results.py
@@ -2,7 +2,7 @@
 import re
 from abc import abstractmethod
 from pathlib import Path
-from typing import Optional, Union, Dict, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import pandas as pd
 
@@ -51,33 +51,35 @@ def __init__(self, path: Union[str, Path]):
         self.path = path
 
     @abstractmethod
-    def read_result(self, tmt_labeled: str, stat_mods: Optional[Dict[str, str]] = None, 
-                    var_mods: Optional[Dict[str, str]] = None):
+    def read_result(
+        self,
+        tmt_labeled: str,
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ):
         """Read result.
 
         :param tmt_labeled: tmt label as str
-        :param var_mods: variable modifications with custom identifier and respecitve internal equivalent 
-        :param stat_mods: static modifications with custom identifier and respecitve internal equivalent
-
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         """
         raise NotImplementedError
 
-    def generate_internal(self, tmt_labeled: str, out_path: Optional[Union[str, Path]] = None, custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None) -> pd.DataFrame:
+    def generate_internal(
+        self,
+        tmt_labeled: str,
+        out_path: Optional[Union[str, Path]] = None,
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Generate df and save to out_path if provided.
 
         :param out_path: path to output
         :param tmt_labeled: tmt label as str
         :param custom_mods: dict with static and variable custom modifications, their internal identifier and mass
-        :raises AssertionError: if custom modification with illegal mass was provided
         :return: path to output file
         """
-        stat_mods: Dict[str, str] = {key: value[0] for key, value in (custom_mods.get("stat_mods") or {}).items()}
-        var_mods: Dict[str, str] = {key: value[0] for key, value in (custom_mods.get("var_mods") or {}).items()}
-
         if out_path is None:
             # convert and return
-            return self.read_result(tmt_labeled, stat_mods=stat_mods, var_mods=var_mods)
+            return self.read_result(tmt_labeled, custom_mods=custom_mods)
 
         if isinstance(out_path, str):
             out_path = Path(out_path)
@@ -85,11 +87,11 @@ def generate_internal(self, tmt_labeled: str, out_path: Optional[Union[str, Path
         if out_path.is_file():
             # only read converted and return
             logger.info(f"Found search results in internal format at {out_path}, skipping conversion")
-            #TODO: internal_to_unimod
+            # TODO: internal_to_unimod
             return csv.read_file(out_path)
 
         # convert, save and return
-        df = self.read_result(tmt_labeled, stat_mods=stat_mods, var_mods=var_mods)
+        df = self.read_result(tmt_labeled, custom_mods=custom_mods)
         csv.write_file(df, out_path)
         return df
 
diff --git a/spectrum_io/search_result/xisearch.py b/spectrum_io/search_result/xisearch.py
index 5e1bbd9..ffeb5ef 100644
--- a/spectrum_io/search_result/xisearch.py
+++ b/spectrum_io/search_result/xisearch.py
@@ -3,9 +3,8 @@
 import os
 import re
 from pathlib import Path
-from typing import Union
+from typing import Dict, Optional, Tuple, Union
 
-import numpy as np
 import pandas as pd
 import spectrum_fundamentals.constants as c
 from spectrum_fundamentals.mod_string import xisearch_to_internal
@@ -18,11 +17,16 @@
 class Xisearch(SearchResults):
     """Handle search results from xisearch."""
 
-    def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
+    def read_result(
+        self,
+        tmt_labeled: str = "",
+        custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None,
+    ) -> pd.DataFrame:
         """
         Function to read a csv of CSMs and perform some basic formatting.
 
         :param tmt_labeled: tmt label as str
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         :raises NotImplementedError: if a tmt label is provided
         :return: pd.DataFrame with the formatted data
         """
diff --git a/spectrum_io/spectral_library/dlib.py b/spectrum_io/spectral_library/dlib.py
index 703926f..ba9558c 100644
--- a/spectrum_io/spectral_library/dlib.py
+++ b/spectrum_io/spectral_library/dlib.py
@@ -1,7 +1,7 @@
 import sqlite3
 import zlib
 from pathlib import Path
-from typing import IO, Dict, Union
+from typing import IO, Dict, Union, Tuple, Optional
 
 import numpy as np
 import pandas as pd
@@ -125,12 +125,13 @@ def _create_database(conn: sqlite3.Connection):
         c.execute(sql_insert_meta, ["staleProteinMapping", "true"])
         conn.commit()
 
-    def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame):
+    def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame,
+               custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None):
         if isinstance(out, IO):
             raise TypeError("Not supported. Use msp/spectronaut if you want to write a text file.")
         seqs = metadata["SEQUENCE"]
-        modseqs = metadata["MODIFIED_SEQUENCE"]
-        mass_mod_sequences = internal_to_mod_mass(modseqs)
+        modseqs = metadata["MODIFIED_SEQUENCE"]        
+        mass_mod_sequences = internal_to_mod_mass(modseqs, custom_mods)
 
         p_charges = metadata["PRECURSOR_CHARGE"]
         p_mzs = (metadata["MASS"] + (p_charges * PARTICLE_MASSES["PROTON"])) / p_charges
diff --git a/spectrum_io/spectral_library/msp.py b/spectrum_io/spectral_library/msp.py
index ffe42b4..1412375 100644
--- a/spectrum_io/spectral_library/msp.py
+++ b/spectrum_io/spectral_library/msp.py
@@ -1,5 +1,5 @@
 from sqlite3 import Connection
-from typing import IO, Dict, Union
+from typing import IO, Dict, Union, Tuple, Optional
 
 import numpy as np
 import pandas as pd
@@ -17,7 +17,8 @@ def _assemble_fragment_string(f_mz: float, f_int: float, f_a: bytes):
         annot = f_a[:-2].decode() if f_a.endswith(b"1") else f_a.replace(b"+", b"^").decode()
         return f'{f_mz:.8f}\t{f_int:.4f}\t"{annot}/0.0ppm"\n'
 
-    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame):
+    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame, 
+               custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None):
         # prepare metadata
         if isinstance(out, Connection):
             raise TypeError("Not supported. Use DLib if you want to write a database file.")
diff --git a/spectrum_io/spectral_library/spectral_library.py b/spectrum_io/spectral_library/spectral_library.py
index 1d8bbc9..b037c14 100644
--- a/spectrum_io/spectral_library/spectral_library.py
+++ b/spectrum_io/spectral_library/spectral_library.py
@@ -3,7 +3,7 @@
 from multiprocessing.managers import ValueProxy
 from pathlib import Path
 from sqlite3 import Connection
-from typing import IO, Dict, Optional, Union
+from typing import IO, Dict, Optional, Union, Tuple
 
 import numpy as np
 import pandas as pd
@@ -52,12 +52,14 @@ def write(self, *args, **kwargs):
     def _get_handle(self):
         return open(self.out_path, self.mode)
 
-    def async_write(self, queue: Queue, progress: ValueProxy):
+    def async_write(self, queue: Queue, progress: ValueProxy, 
+                    custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None):
         """
         Asynchronously write content to the output file from a queue.
 
         :param queue: A queue from which content will be retrieved for writing.
         :param progress: An integer value representing the progress of the writing process.
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
         """
         with self._get_handle() as out:
             self._initialize(out)
@@ -65,7 +67,7 @@ def async_write(self, queue: Queue, progress: ValueProxy):
                 content = queue.get()
                 if content is None:
                     break
-                self._write(out, *content)
+                self._write(out, *content, custom_mods=custom_mods)
                 progress.value += 1
 
     def _fragment_filter_passed(
@@ -86,7 +88,8 @@ def _fragment_filter_passed(
         return (f_mz != -1) & (f_int >= self.min_intensity_threshold)
 
     @abstractmethod
-    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame):
+    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame,
+               custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None):
         """
         Internal writer function.
 
@@ -97,6 +100,8 @@ def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metada
         :param out: file handle accepting the data to be written to disk
         :param data: Dictionary containing TODO keys and corresponding values as numpy array
         :param metadata: a dataframe that contains the columns TODO
+        :param custom_mods: dict with custom variable and static identifier and respecitve internal equivalent and mass
+
         """
         pass
 
diff --git a/spectrum_io/spectral_library/spectronaut.py b/spectrum_io/spectral_library/spectronaut.py
index 4adb612..c4632f3 100644
--- a/spectrum_io/spectral_library/spectronaut.py
+++ b/spectrum_io/spectral_library/spectronaut.py
@@ -2,7 +2,7 @@
 import re
 from itertools import chain, cycle
 from sqlite3 import Connection
-from typing import IO, Dict, Tuple, Union
+from typing import IO, Dict, Tuple, Union, Optional
 
 import numpy as np
 import pandas as pd
@@ -26,7 +26,8 @@ def _assemble_fragment_string(f_int: float, f_mz: float, f_annot: bytes):
             f"{f_int:.4f},{f_mz:.8f},{m.group(2)},{m.group(1)},{m.group(3)},{m.group(4) if m.group(4) else 'noloss'}\n"
         )
 
-    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame):
+    def _write(self, out: Union[IO, Connection], data: Dict[str, np.ndarray], metadata: pd.DataFrame,
+               custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None):
         # prepare metadata
         if isinstance(out, Connection):
             raise TypeError("Not supported. Use DLib if you want to write a database file.")
diff --git a/tests/unit_tests/data/psm_mods.pepXML b/tests/unit_tests/data/psm_mods.pepXML
new file mode 100644
index 0000000..b2a5ca5
--- /dev/null
+++ b/tests/unit_tests/data/psm_mods.pepXML
@@ -0,0 +1,175 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="pepXML_std.xsl"?>
+<msms_pipeline_analysis date="2023-11-02T08:17:10" xmlns="http://regis-web.systemsbiology.net/pepXML" summary_xml="E:\piero_giansanti\R0096-01\Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01.pepXML" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v122.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<msms_run_summary base_name="E:\piero_giansanti\R0096-01\Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01" raw_data_type="mzML" comment="This pepXML was from calibrated spectra." raw_data="mzML">
+<sample_enzyme name="trypsin_r">
+<specificity cut="R" no_cut="P" sense="C"/>
+</sample_enzyme>
+<search_summary base_name="E:\piero_giansanti\R0096-01\Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01" precursor_mass_type="monoisotopic" search_engine="X! Tandem" search_engine_version="MSFragger-3.8" fragment_mass_type="monoisotopic" search_id="1">
+<search_database local_path="E:\piero_giansanti\R0096-01\2023-11-01-decoys-contam-UniProt_Mouse_UP00000589_wISO_072021.fasta.fas" type="AA"/>
+<enzymatic_search_constraint enzyme="default" min_number_termini="3" max_num_internal_cleavages="2"/>
+<aminoacid_modification aminoacid="C" massdiff="57.02146" mass="160.03065" variable="N"/>
+<aminoacid_modification aminoacid="K" massdiff="304.20715" mass="432.30212" variable="N"/>
+<terminal_modification massdiff="304.20715" protein_terminus="N" mass="305.21497" terminus="N" variable="N"/>
+<aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0354" variable="Y"/>
+<parameter name="# MSFragger.build" value="MSFragger-3.8"/>
+<parameter name="database_name" value="E:\piero_giansanti\R0096-01\2023-11-01-decoys-contam-UniProt_Mouse_UP00000589_wISO_072021.fasta.fas"/>
+<parameter name="decoy_prefix" value="rev_"/>
+<parameter name="num_threads" value="40"/>
+<parameter name="precursor_mass_lower" value="-20.0"/>
+<parameter name="precursor_mass_upper" value="20.0"/>
+<parameter name="precursor_mass_units" value="1"/>
+<parameter name="precursor_true_tolerance" value="20.0"/>
+<parameter name="data_type" value="0"/>
+<parameter name="precursor_true_units" value="1"/>
+<parameter name="fragment_mass_tolerance" value="20.0"/>
+<parameter name="fragment_mass_units" value="1"/>
+<parameter name="calibrate_mass" value="2"/>
+<parameter name="use_all_mods_in_first_search" value="1"/>
+<parameter name="write_calibrated_mzml" value="0"/>
+<parameter name="write_uncalibrated_mgf" value="0"/>
+<parameter name="write_mzbin_all" value="0"/>
+<parameter name="isotope_error" value="-1/0/1/2/3"/>
+<parameter name="mass_offsets" value="0.0"/>
+<parameter name="labile_search_mode" value="OFF"/>
+<parameter name="restrict_deltamass_to" value="all"/>
+<parameter name="precursor_mass_mode" value="SELECTED"/>
+<parameter name="intensity_transform" value="0"/>
+<parameter name="activation_types" value="all"/>
+<parameter name="group_variable" value="0"/>
+<parameter name="require_precursor" value="1"/>
+<parameter name="reuse_dia_fragment_peaks" value="0"/>
+<parameter name="remove_precursor_peak" value="1"/>
+<parameter name="remove_precursor_range" value="-1.500000,1.500000"/>
+<parameter name="localize_delta_mass" value="0"/>
+<parameter name="delta_mass_exclude_ranges" value="(-1.5,3.5)"/>
+<parameter name="fragment_ion_series" value="b,y"/>
+<parameter name="ion_series_definitions" value=""/>
+<parameter name="search_enzyme_name" value="trypsin_r"/>
+<parameter name="min_sequence_matches" value="2"/>
+<parameter name="check_spectral_files" value="1"/>
+<parameter name="search_enzyme_cut_1" value="R"/>
+<parameter name="search_enzyme_nocut_1" value="P"/>
+<parameter name="num_enzyme_termini" value="3"/>
+<parameter name="allowed_missed_cleavage_1" value="2"/>
+<parameter name="search_enzyme_sense_1" value="C"/>
+<parameter name="clip_nTerm_M" value="1"/>
+<parameter name="allow_multiple_variable_mods_on_residue" value="0"/>
+<parameter name="max_variable_mods_per_peptide" value="3"/>
+<parameter name="max_variable_mods_combinations" value="5000"/>
+<parameter name="mass_diff_to_variable_mod" value="0"/>
+<parameter name="output_format" value="pepxml_pin"/>
+<parameter name="output_report_topN" value="1"/>
+<parameter name="output_max_expect" value="50.0"/>
+<parameter name="report_alternative_proteins" value="1"/>
+<parameter name="override_charge" value="0"/>
+<parameter name="precursor_charge" value="1 4"/>
+<parameter name="digest_min_length" value="7"/>
+<parameter name="digest_max_length" value="50"/>
+<parameter name="digest_mass_range" value="500.0 5000.0"/>
+<parameter name="max_fragment_charge" value="1"/>
+<parameter name="deisotope" value="1"/>
+<parameter name="deneutralloss" value="1"/>
+<parameter name="track_zero_topN" value="0"/>
+<parameter name="zero_bin_accept_expect" value="0.0"/>
+<parameter name="zero_bin_mult_expect" value="1.0"/>
+<parameter name="add_topN_complementary" value="0"/>
+<parameter name="minimum_peaks" value="15"/>
+<parameter name="use_topN_peaks" value="200"/>
+<parameter name="min_fragments_modelling" value="2"/>
+<parameter name="min_matched_fragments" value="4"/>
+<parameter name="minimum_ratio" value="0.0"/>
+<parameter name="clear_mz_range" value="125.5 135.5"/>
+<parameter name="excluded_scan_list_file" value=""/>
+<parameter name="variable_mod_01" value="15.9949 M 3"/>
+<parameter name="add_A_alanine" value="0.0"/>
+<parameter name="add_B_user_amino_acid" value="0.0"/>
+<parameter name="add_C_cysteine" value="57.02146"/>
+<parameter name="add_Cterm_peptide" value="0.0"/>
+<parameter name="add_Cterm_protein" value="0.0"/>
+<parameter name="add_D_aspartic_acid" value="0.0"/>
+<parameter name="add_E_glutamic_acid" value="0.0"/>
+<parameter name="add_F_phenylalanine" value="0.0"/>
+<parameter name="add_G_glycine" value="0.0"/>
+<parameter name="add_H_histidine" value="0.0"/>
+<parameter name="add_I_isoleucine" value="0.0"/>
+<parameter name="add_J_user_amino_acid" value="0.0"/>
+<parameter name="add_K_lysine" value="304.20715"/>
+<parameter name="add_L_leucine" value="0.0"/>
+<parameter name="add_M_methionine" value="0.0"/>
+<parameter name="add_N_asparagine" value="0.0"/>
+<parameter name="add_Nterm_peptide" value="304.20715"/>
+<parameter name="add_Nterm_protein" value="0.0"/>
+<parameter name="add_O_user_amino_acid" value="0.0"/>
+<parameter name="add_P_proline" value="0.0"/>
+<parameter name="add_Q_glutamine" value="0.0"/>
+<parameter name="add_R_arginine" value="0.0"/>
+<parameter name="add_S_serine" value="0.0"/>
+<parameter name="add_T_threonine" value="0.0"/>
+<parameter name="add_U_user_amino_acid" value="0.0"/>
+<parameter name="add_V_valine" value="0.0"/>
+<parameter name="add_W_tryptophan" value="0.0"/>
+<parameter name="add_X_user_amino_acid" value="0.0"/>
+<parameter name="add_Y_tyrosine" value="0.0"/>
+<parameter name="add_Z_user_amino_acid" value="0.0"/>
+</search_summary>
+<spectrum_query start_scan="2459" uncalibrated_precursor_neutral_mass="1863.0299" assumed_charge="5" spectrum="Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01.2459.2459.5" spectrumNativeID="controllerType=0 controllerNumber=1 scan=2459" end_scan="2459" index="34" precursor_neutral_mass="1863.023" retention_time_sec="1109.2298126220703">
+<search_result>
+<search_hit peptide="GQAVLAFQEQVGTGR" massdiff="-0.98974609375" calc_neutral_pep_mass="1864.0127" peptide_next_aa="Y" num_missed_cleavages="0" num_tol_term="1" protein_descr="Predicted gene 597 OS=Mus musculus OX=10090 GN=Gm597 PE=1 SV=1" num_tot_proteins="1" tot_num_ions="28" hit_rank="1" num_matched_ions="4" protein="rev_tr|E9Q8J5|E9Q8J5_MOUSE" peptide_prev_aa="A" is_rejected="0">
+<modification_info modified_peptide="n[305]GQAVLAFQEQVGTGR" mod_nterm_mass="305.21497">
+</modification_info>
+<search_score name="hyperscore" value="8.221"/>
+<search_score name="nextscore" value="8.221"/>
+<search_score name="expect" value="2.977860e+00"/>
+</search_hit>
+</search_result>
+</spectrum_query>
+<spectrum_query start_scan="2486" uncalibrated_precursor_neutral_mass="1937.0659" assumed_charge="5" spectrum="Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01.2486.2486.5" spectrumNativeID="controllerType=0 controllerNumber=1 scan=2486" end_scan="2486" index="42" precursor_neutral_mass="1937.0531" retention_time_sec="1111.622314453125">
+<search_result>
+<search_hit peptide="TEVPMGLSLRTTSAR" massdiff="-0.9996337890625" calc_neutral_pep_mass="1938.0527" peptide_next_aa="G" num_missed_cleavages="1" num_tol_term="1" protein_descr="Predicted gene 45140 (Fragment) OS=Mus musculus OX=10090 GN=Gm45140 PE=4 SV=1" num_tot_proteins="1" tot_num_ions="28" hit_rank="1" num_matched_ions="4" protein="tr|A0A0N4SW17|A0A0N4SW17_MOUSE" peptide_prev_aa="T" is_rejected="0">
+<modification_info modified_peptide="n[305]TEVPM[35]GLSLRTTSAR" mod_nterm_mass="305.21497">
+<mod_aminoacid_mass mass="147.0354" position="5"/>
+</modification_info>
+<search_score name="hyperscore" value="7.083"/>
+<search_score name="nextscore" value="0.0"/>
+<search_score name="expect" value="2.260736e+00"/>
+</search_hit>
+</search_result>
+</spectrum_query>
+<spectrum_query start_scan="2980" uncalibrated_precursor_neutral_mass="1773.8171" assumed_charge="3" spectrum="Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01.2980.2980.3" spectrumNativeID="controllerType=0 controllerNumber=1 scan=2980" end_scan="2980" index="193" precursor_neutral_mass="1773.8123" retention_time_sec="1152.5688171386719">
+<search_result>
+<search_hit peptide="YSGNCDRQSVER" massdiff="-0.026611328125" calc_neutral_pep_mass="1773.8389" peptide_next_aa="A" num_missed_cleavages="1" num_tol_term="1" protein_descr="Isoform 2 of SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6" num_tot_proteins="7" tot_num_ions="22" hit_rank="1" num_matched_ions="4" protein="sp|Q9D413-2|SH2D6_MOUSE" peptide_prev_aa="W" is_rejected="0">
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=2 SV=1" protein="sp|Q9D413|SH2D6_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=4 SV=1" protein="tr|A0A3Q4EBW9|A0A3Q4EBW9_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=1 SV=1" protein="tr|A0A3Q4ECA8|A0A3Q4ECA8_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=4 SV=1" protein="tr|A0A3Q4EGG3|A0A3Q4EGG3_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=4 SV=1" protein="tr|E0CYY5|E0CYY5_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<alternative_protein protein_descr="SH2 domain-containing protein 6 OS=Mus musculus OX=10090 GN=Sh2d6 PE=4 SV=1" protein="tr|E9QJU1|E9QJU1_MOUSE" peptide_prev_aa="W" peptide_next_aa="A" num_tol_term="1"/>
+<modification_info modified_peptide="n[305]YSGN[41]CDRQSVER" mod_nterm_mass="305.21497">
+<mod_aminoacid_mass mass="160.03065" position="5"/>
+</modification_info>
+<search_score name="hyperscore" value="3.932"/>
+<search_score name="nextscore" value="0.0"/>
+<search_score name="expect" value="1.386756e+00"/>
+</search_hit>
+</search_result>
+</spectrum_query>
+<spectrum_query start_scan="3945" uncalibrated_precursor_neutral_mass="1586.8934" assumed_charge="3" spectrum="Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01.3945.3945.3" spectrumNativeID="controllerType=0 controllerNumber=1 scan=3945" end_scan="3945" index="647" precursor_neutral_mass="1586.8887" retention_time_sec="1225.7048034667969">
+<search_result>
+<search_hit peptide="ESTKSAAER" massdiff="0.9967041015625" calc_neutral_pep_mass="1585.892" peptide_next_aa="E" num_missed_cleavages="0" num_tol_term="1" protein_descr="Isoform 5 of Protein PRRC2C OS=Mus musculus OX=10090 GN=Prrc2c" num_tot_proteins="6" tot_num_ions="16" hit_rank="1" num_matched_ions="6" protein="rev_sp|Q3TLH4-5|PRC2C_MOUSE" peptide_prev_aa="V" is_rejected="0">
+<alternative_protein protein_descr="Protein PRRC2C OS=Mus musculus OX=10090 GN=Prrc2c PE=1 SV=3" protein="rev_sp|Q3TLH4|PRC2C_MOUSE" peptide_prev_aa="V" peptide_next_aa="E" num_tol_term="1"/>
+<alternative_protein protein_descr="Protein PRRC2C OS=Mus musculus OX=10090 GN=Prrc2c PE=1 SV=1" protein="rev_tr|A0A0A0MQ79|A0A0A0MQ79_MOUSE" peptide_prev_aa="V" peptide_next_aa="E" num_tol_term="1"/>
+<alternative_protein protein_descr="Protein PRRC2C (Fragment) OS=Mus musculus OX=10090 GN=Prrc2c PE=1 SV=1" protein="rev_tr|S4R209|S4R209_MOUSE" peptide_prev_aa="V" peptide_next_aa="E" num_tol_term="1"/>
+<alternative_protein protein_descr="Protein PRRC2C OS=Mus musculus OX=10090 GN=Prrc2c PE=1 SV=1" protein="rev_tr|S4R294|S4R294_MOUSE" peptide_prev_aa="V" peptide_next_aa="E" num_tol_term="1"/>
+<alternative_protein protein_descr="Protein PRRC2C OS=Mus musculus OX=10090 GN=Prrc2c PE=1 SV=1" protein="rev_tr|S4R2J9|S4R2J9_MOUSE" peptide_prev_aa="V" peptide_next_aa="E" num_tol_term="1"/>
+<modification_info modified_peptide="n[305]ESTKSAAER" mod_nterm_mass="305.21497">
+<mod_aminoacid_mass mass="432.30212" position="4"/>
+</modification_info>
+<search_score name="hyperscore" value="10.839"/>
+<search_score name="nextscore" value="9.935"/>
+<search_score name="expect" value="2.914798e+00"/>
+</search_hit>
+</search_result>
+</spectrum_query>
+</msms_run_summary>
+</msms_pipeline_analysis>
diff --git a/tests/unit_tests/data/psm_tmt_internal_mods.csv b/tests/unit_tests/data/psm_tmt_internal_mods.csv
new file mode 100644
index 0000000..166040e
--- /dev/null
+++ b/tests/unit_tests/data/psm_tmt_internal_mods.csv
@@ -0,0 +1,5 @@
+,RAW_FILE,SCAN_NUMBER,MODIFIED_SEQUENCE,PRECURSOR_CHARGE,SCAN_EVENT_NUMBER,MASS,SCORE,REVERSE,SEQUENCE,PEPTIDE_LENGTH,PROTEINS
+0,Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01,2459,[UNIMOD:2016]-GQAVLAFQEQVGTGR,5,34,1863.023,8.221,True,GQAVLAFQEQVGTGR,15,rev_tr|E9Q8J5|E9Q8J5_MOUSE
+1,Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01,2486,[UNIMOD:2016]-TEVPM[UNIMOD:35]GLSLRTTSAR,5,42,1937.0531,7.083,False,TEVPMGLSLRTTSAR,15,tr|A0A0N4SW17|A0A0N4SW17_MOUSE
+2,Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01,2980,[UNIMOD:2016]-YSGN[UNIMOD:41]C[UNIMOD:4]DRQSVER,3,193,1773.8123,3.932,False,YSGNCDRQSVER,12,sp|Q9D413-2|SH2D6_MOUSE;sp|Q9D413|SH2D6_MOUSE;tr|A0A3Q4EBW9|A0A3Q4EBW9_MOUSE;tr|A0A3Q4ECA8|A0A3Q4ECA8_MOUSE;tr|A0A3Q4EGG3|A0A3Q4EGG3_MOUSE;tr|E0CYY5|E0CYY5_MOUSE;tr|E9QJU1|E9QJU1_MOUSE
+3,Ecl1_0277_R0096-01_S004436_D_H01_TMT18_01,3945,[UNIMOD:2016]-ESTK[UNIMOD:2016]SAAER,3,647,1586.8887,10.839,True,ESTKSAAER,9,rev_sp|Q3TLH4-5|PRC2C_MOUSE;rev_sp|Q3TLH4|PRC2C_MOUSE;rev_tr|A0A0A0MQ79|A0A0A0MQ79_MOUSE;rev_tr|S4R209|S4R209_MOUSE;rev_tr|S4R294|S4R294_MOUSE;rev_tr|S4R2J9|S4R2J9_MOUSE
diff --git a/tests/unit_tests/test_msfragger.py b/tests/unit_tests/test_msfragger.py
index 2eac2b7..86be885 100644
--- a/tests/unit_tests/test_msfragger.py
+++ b/tests/unit_tests/test_msfragger.py
@@ -27,7 +27,7 @@ def test_read_result(self):
         self.assertTrue("PROTEINS" in df.columns)
 
     def test_read_msfragger(self):
-        """Test function for reading sage results and transforming to Prosit format."""
+        """Test function for reading msfragger results and transforming to Prosit format."""
         expected_msfragger_internal_path = Path(__file__).parent / "data" / "psm_tmt_internal.csv"
 
         internal_search_results_df = MSFragger(Path(__file__).parent / "data" / "psm_tmt.pepXML").read_result(
@@ -38,3 +38,18 @@ def test_read_msfragger(self):
         print("Expected Columns:", expected_df.columns)
 
         pd.testing.assert_frame_equal(internal_search_results_df, expected_df)
+
+    def test_read_msfragger_mods(self):
+        """Test function for reading msfragger results and transforming to Prosit format with custom mods."""
+        expected_msfragger_internal_path = Path(__file__).parent / "data" / "psm_tmt_internal_mods.csv"
+        stat_mod = {"M[35]": "[UNIMOD:35]"}
+        var_mod = {"[41]": "[UNIMOD:41]"}
+
+        internal_search_results_df = MSFragger(Path(__file__).parent / "data" / "psm_mods.pepXML").read_result(
+            tmt_labeled="tmtpro", stat_mods=stat_mod, var_mods=var_mod
+        )
+        expected_df = pd.read_csv(expected_msfragger_internal_path, index_col=0)
+        print("Internal Search Results Columns:", internal_search_results_df.columns)
+        print("Expected Columns:", expected_df.columns)
+
+        pd.testing.assert_frame_equal(internal_search_results_df, expected_df)