From 6c71a98fb856a143a830549a77d137e042aa9303 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”>
Date: Mon, 22 Jan 2024 14:22:33 +0100
Subject: [PATCH 1/3] added and rearranged protein column in the end for both
 sage and mq

---
 spectrum_io/search_result/maxquant.py | 4 ++++
 spectrum_io/search_result/sage.py     | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
index e165e71..bd1e42d 100644
--- a/spectrum_io/search_result/maxquant.py
+++ b/spectrum_io/search_result/maxquant.py
@@ -51,6 +51,7 @@ def read_result(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame:
                 "MASS",  # = Calculated Precursor mass; TODO get column with experimental Precursor mass instead
                 "SCORE",
                 "REVERSE",
+                "PROTEINS",
             ],
             sep="\t",
         )
@@ -106,5 +107,8 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
             df["MODIFIED_SEQUENCE"] = maxquant_to_internal(df["MODIFIED_SEQUENCE"].to_numpy())
         df["SEQUENCE"] = internal_without_mods(df["MODIFIED_SEQUENCE"])
         df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))
+        # adding protein column in the end
+        df['PROTEIN'] = df.pop('PROTEINS')
+
 
         return df
diff --git a/spectrum_io/search_result/sage.py b/spectrum_io/search_result/sage.py
index 920a33a..b77754c 100644
--- a/spectrum_io/search_result/sage.py
+++ b/spectrum_io/search_result/sage.py
@@ -73,6 +73,7 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
         # converting proforma to unimode
         print(df)
         df["MODIFIED_SEQUENCE"] = sage_to_internal(df["MODIFIED_SEQUENCE"])
-
+        #adding protein column in the end
+        df['PROTEIN'] = df.pop('PROTEINS')
         print(df.columns)
         return df

From 3cc2395a47a2c782382da1a3a464dea1c77e12a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”>
Date: Mon, 22 Jan 2024 14:33:11 +0100
Subject: [PATCH 2/3] added a minor fix typo PROTEIN to PROTEINS

---
 spectrum_io/search_result/maxquant.py | 2 +-
 spectrum_io/search_result/sage.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
index bd1e42d..d82fa72 100644
--- a/spectrum_io/search_result/maxquant.py
+++ b/spectrum_io/search_result/maxquant.py
@@ -108,7 +108,7 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
         df["SEQUENCE"] = internal_without_mods(df["MODIFIED_SEQUENCE"])
         df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))
         # adding protein column in the end
-        df['PROTEIN'] = df.pop('PROTEINS')
+        df['PROTEINS'] = df.pop('PROTEINS')
 
 
         return df
diff --git a/spectrum_io/search_result/sage.py b/spectrum_io/search_result/sage.py
index b77754c..26a430c 100644
--- a/spectrum_io/search_result/sage.py
+++ b/spectrum_io/search_result/sage.py
@@ -74,6 +74,6 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
         print(df)
         df["MODIFIED_SEQUENCE"] = sage_to_internal(df["MODIFIED_SEQUENCE"])
         #adding protein column in the end
-        df['PROTEIN'] = df.pop('PROTEINS')
+        df['PROTEINS'] = df.pop('PROTEINS')
         print(df.columns)
         return df

From a56a26daa6b2b7023da2e4fbc30657c3a95c9e99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”>
Date: Mon, 29 Jan 2024 10:06:33 +0100
Subject: [PATCH 3/3] added protein column to maxquant and sage, added a
 replacement function for the empty protein in mq

---
 spectrum_io/search_result/maxquant.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
index d82fa72..1f3aa5f 100644
--- a/spectrum_io/search_result/maxquant.py
+++ b/spectrum_io/search_result/maxquant.py
@@ -63,7 +63,13 @@ def read_result(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame:
 
         df = MaxQuant.update_columns_for_prosit(df, tmt_labeled)
         return filter_valid_prosit_sequences(df)
+    # a method for replacing missing proteins
+    @staticmethod
+    def sanity_check(PROTEINS: pd.Series) -> pd.Series:
+        return PROTEINS.apply(lambda x: 'missing_protein' if pd.isna(x) else x)
 
+    
+    
     @staticmethod
     def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFrame:
         """
@@ -109,6 +115,7 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
         df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))
         # adding protein column in the end
         df['PROTEINS'] = df.pop('PROTEINS')
-
-
+        # calling the static method to replace unkown proteins
+        df['PROTEINS'] = MaxQuant.sanity_check(df['PROTEINS'])
+        
         return df