Skip to content

Commit

Permalink
Added protein ids for dlib format
Browse files Browse the repository at this point in the history
  • Loading branch information
victorgiurcoiu committed Apr 28, 2024
1 parent 89216d1 commit 5eeec06
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions spectrum_io/spectral_library/dlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"PeptideSeq",
"RTInSeconds",
"PrecursorMz",
"ProteinIds"
]


Expand Down Expand Up @@ -102,7 +103,8 @@ def _create_database(conn: sqlite3.Connection):
RTInSecondsStop REAL,
MedianChromatogramEncodedLength INTEGER,
MedianChromatogramArray BLOB,
SourceFile TEXT NOT NULL DEFAULT 'Oktoberfest'
SourceFile TEXT NOT NULL DEFAULT 'Oktoberfest',
ProteinIds TEXT NOT NULL
)
"""
sql_create_p2p = """
Expand Down Expand Up @@ -136,6 +138,8 @@ def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray]
p_mzs = (metadata["MASS"] + (p_charges * PARTICLE_MASSES["PROTON"])) / p_charges
# ces = metadata["COLLISION_ENERGY"]

pr_ids = metadata["PROTEINS"]

# prepare spectra
irts = data["irt"][:, 0] # should create a 1D view of the (n_peptides, 1) shaped array
f_mzss = data["mz"]
Expand All @@ -144,7 +148,7 @@ def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray]

masked_values = self._calculate_masked_values(f_mzss, f_intss)

data_list = [*masked_values, p_charges, mass_mod_sequences, seqs, irts, p_mzs]
data_list = [*masked_values, p_charges, mass_mod_sequences, seqs, irts, p_mzs, pr_ids]
entries = pd.DataFrame(dict(zip(DLIB_COL_NAMES, data_list)))
p2p = pd.DataFrame({"PeptideSeq": seqs})

Expand Down

0 comments on commit 5eeec06

Please sign in to comment.