fix bugs for reading spectra and metadata

wilhelm-lab · Jan 24, 2024 · 104015e · 104015e
1 parent 4bd53b4
commit 104015e
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/spectrum_io/d/bruker.py b/spectrum_io/d/bruker.py
@@ -126,7 +126,8 @@ def read_timstof(d_path, scan_to_precursor_map):
     )
 
     # load filtered stuff
-    data = alphatims.bruker.TimsTOF(str(d_path))
+    data = alphatims.bruker.TimsTOF(str(d_path), slice_as_dataframe=False)
+    df.columns = ["FRAME", "SCAN", "PRECURSOR", "RETENTION_TIME", "INV_ION_MOBILITY", "MZ", "INTENSITY"]
 
     raw_idx = []
     for frames, precursors in zip(df_frame_group["FRAME"], df_frame_group["PRECURSOR"]):
@@ -154,21 +155,21 @@ def read_timstof(d_path, scan_to_precursor_map):
     # aggregation
     df_combined_grouped = (
         df.merge(scan_to_precursor_map)
-        .query("SCANNUMBEGIN <= SCAN <= SCANNUMEND")  # can probably be skipped
+        .query("SCAN_NUM_BEGIN <= SCAN <= SCAN_NUM_END")  # can probably be skipped
         .groupby(["PRECURSOR", "FRAME"], as_index=False)  # aggregate fragments per precursor in FRAME
         .agg(
             {
                 "INTENSITY": list,
                 "MZ": list,
                 "RETENTION_TIME": "first",
-                "COLLISIONENERGY": "first",
+                "COLLISION_ENERGY": "first",
                 "INV_ION_MOBILITY": "first",
             }
         )
         .merge(scan_to_precursor_map.reset_index())
         .groupby("SCAN_NUMBER", as_index=False)  # aggregate PRECURSORS for same SCAN_NUMBER
         .agg(
-            median_CE=("COLLISIONENERGY", "median"),
+            median_CE=("COLLISION_ENERGY", "median"),
             combined_INTENSITIES=("INTENSITY", lambda x: [item for sublist in x for item in sublist]),
             combined_MZ=("MZ", lambda x: [item for sublist in x for item in sublist]),
             median_RETENTION_TIME=("RETENTION_TIME", "median"),
@@ -184,7 +185,7 @@ def convert_d_hdf(
     output_path: Optional[Union[Path, str]] = None,
 ):
     data = alphatims.bruker.TimsTOF(str(input_path))
-    data.save_to_hdf(directory=str(output_path.parent), filename=str(output_path.name))
+    data.save_as_hdf(directory=str(output_path.parent), filename=str(output_path.name))
 
 
 def read_and_aggregate_timstof(source: Path, scan_to_precursor_map: Path):

diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
@@ -141,7 +141,7 @@ def read_metadata_for_timstof(self):
         df_pasef = pd.read_csv(
             self.path / "pasefMsmsScans.txt",
             sep="\t",
-            usecol=["Raw file", "Precursor", "Frame", "ScanNumBegin", "ScanNumEnd", "CollisionEnergy"],
+            usecols=["Raw file", "Precursor", "Frame", "ScanNumBegin", "ScanNumEnd", "CollisionEnergy"],
         )
         df_pasef.columns = ["RAW_FILE", "PRECURSOR", "FRAME", "SCAN_NUM_BEGIN", "SCAN_NUM_END", "COLLISION_ENERGY"]