diff --git a/spectrum_io/spectral_library/dlib.py b/spectrum_io/spectral_library/dlib.py index 3abf10d..1d0ecb0 100644 --- a/spectrum_io/spectral_library/dlib.py +++ b/spectrum_io/spectral_library/dlib.py @@ -20,6 +20,7 @@ "PeptideSeq", "RTInSeconds", "PrecursorMz", + "ProteinIds" ] @@ -102,7 +103,8 @@ def _create_database(conn: sqlite3.Connection): RTInSecondsStop REAL, MedianChromatogramEncodedLength INTEGER, MedianChromatogramArray BLOB, - SourceFile TEXT NOT NULL DEFAULT 'Oktoberfest' + SourceFile TEXT NOT NULL DEFAULT 'Oktoberfest', + ProteinIds TEXT NOT NULL ) """ sql_create_p2p = """ @@ -136,6 +138,8 @@ def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray] p_mzs = (metadata["MASS"] + (p_charges * PARTICLE_MASSES["PROTON"])) / p_charges # ces = metadata["COLLISION_ENERGY"] + pr_ids = metadata["PROTEINS"] + # prepare spectra irts = data["irt"][:, 0] # should create a 1D view of the (n_peptides, 1) shaped array f_mzss = data["mz"] @@ -144,7 +148,7 @@ def _write(self, out: Union[IO, sqlite3.Connection], data: Dict[str, np.ndarray] masked_values = self._calculate_masked_values(f_mzss, f_intss) - data_list = [*masked_values, p_charges, mass_mod_sequences, seqs, irts, p_mzs] + data_list = [*masked_values, p_charges, mass_mod_sequences, seqs, irts, p_mzs, pr_ids] entries = pd.DataFrame(dict(zip(DLIB_COL_NAMES, data_list))) p2p = pd.DataFrame({"PeptideSeq": seqs})