From 5d1b879fb7c2372176ef7225bc3d717c24951be5 Mon Sep 17 00:00:00 2001 From: WassimG Date: Fri, 11 Oct 2024 09:56:41 +0000 Subject: [PATCH] fix documenation and cleanup tutorial --- docs/_key_contributors.rst | 2 +- docs/config.rst | 2 +- tutorials/Prosit_cit_tutorial.ipynb | 188 ++-------------------------- 3 files changed, 11 insertions(+), 181 deletions(-) diff --git a/docs/_key_contributors.rst b/docs/_key_contributors.rst index 054dd48..b32e462 100644 --- a/docs/_key_contributors.rst +++ b/docs/_key_contributors.rst @@ -6,7 +6,7 @@ * `Wassim Gabriel `_ * `Victor-George Giurcoiu `_ * `Firas Hamood `_ - * `Cecilia Jensen` + * Cecilia Jensen * `Mostafa Kalhor `_ * `Ludwig Lautenbacher `_ * `Julian Mueller `_ diff --git a/docs/config.rst b/docs/config.rst index 72b6921..8875f89 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -164,7 +164,7 @@ Applicable to PTM pipeline +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | possible_sites | List of Possible sites where the PTM can happen. e.g (['R','N','Q'] for citrullination/deamidation) | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | neutral_loss | Flag to annotate neutral loss peaks or not. | + | neutral_loss | Flag to annotate neutral loss peaks and use it as a feaure in percolator. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Applicable to local intensity prediction diff --git a/tutorials/Prosit_cit_tutorial.ipynb b/tutorials/Prosit_cit_tutorial.ipynb index ade8436..556cf35 100755 --- a/tutorials/Prosit_cit_tutorial.ipynb +++ b/tutorials/Prosit_cit_tutorial.ipynb @@ -32,20 +32,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cmnfs/home/w.gabriel/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "/cmnfs/home/w.gabriel/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/tritonclient/grpc/service_pb2_grpc.py:21: RuntimeWarning: The grpc package installed is at version 1.63.0, but the generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.1. Please upgrade your grpc module to grpcio>=1.64.1 or downgrade your generated code using grpcio-tools<=1.63.0. This warning will become an error in 1.65.0, scheduled for release on June 25, 2024.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "from oktoberfest.runner import run_job\n", "from oktoberfest import __version__ as version\n", @@ -140,137 +129,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-10 12:01:35,122 - INFO - oktoberfest.utils.config::read Reading configuration from ./rescoring_config.json\n", - "2024-10-10 12:01:35,127 - INFO - oktoberfest.runner::run_job Oktoberfest version 0.8.1\n", - "Copyright 2024, Wilhelmlab at Technical University of Munich\n", - "2024-10-10 12:01:35,128 - INFO - oktoberfest.runner::run_job Job executed with the following config:\n", - "2024-10-10 12:01:35,130 - INFO - oktoberfest.runner::run_job {\n", - " \"type\": \"Rescoring\",\n", - " \"tag\": \"\",\n", - " \"inputs\": {\n", - " \"search_results\": \"/cmnfs/proj/prosit_cit/Test_oktoberfest/\",\n", - " \"search_results_type\": \"MSFragger\",\n", - " \"spectra\": \"/cmnfs/proj/prosit_cit/Test_oktoberfest/\",\n", - " \"spectra_type\": \"raw\"\n", - " },\n", - " \"output\": \"/cmnfs/proj/prosit_cit/Test_oktoberfest/output/\",\n", - " \"models\": {\n", - " \"intensity\": \"Prosit_2024_intensity_cit\",\n", - " \"irt\": \"Prosit_2024_irt_cit\"\n", - " },\n", - " \"prediction_server\": \"koina.wilhelmlab.org:443\",\n", - " \"ssl\": true,\n", - " \"thermoExe\": \"ThermoRawFileParser/ThermoRawFileParser.exe\",\n", - " \"numThreads\": 1,\n", - " \"fdr_estimation_method\": \"percolator\",\n", - " \"regressionMethod\": \"spline\",\n", - " \"allFeatures\": false,\n", - " \"pipeline\": \"cit\",\n", - " \"ptm_localization\": true,\n", - " \"ptmLocalizationOptions\": {\n", - " \"unimod_id\": 7,\n", - " \"possible_sites\": [\n", - " \"R\",\n", - " \"N\",\n", - " \"Q\"\n", - " ],\n", - " \"neutral_loss\": true\n", - " }\n", - "}\n", - "2024-10-10 12:01:35,131 - INFO - oktoberfest.utils.config::read Reading configuration from ./rescoring_config.json\n", - "2024-10-10 12:01:35,134 - INFO - oktoberfest.preprocessing.preprocessing::list_spectra Found 1 raw file in the spectra input directory.\n", - "2024-10-10 12:01:35,190 - INFO - oktoberfest.runner::_preprocess Converting search results from /cmnfs/proj/prosit_cit/Test_oktoberfest to internal search result.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:25<00:00, 25.22s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-10 12:02:01,827 - INFO - spectrum_io.search_result.msfragger::filter_valid_prosit_sequences #sequences before filtering for valid prosit sequences: 99769\n", - "2024-10-10 12:02:01,963 - INFO - spectrum_io.search_result.msfragger::filter_valid_prosit_sequences #sequences after filtering for valid prosit sequences: 96891\n", - "2024-10-10 12:02:02,578 - INFO - oktoberfest.runner::_preprocess Read 96891 PSMs from /cmnfs/proj/prosit_cit/Test_oktoberfest/output/msms/msms.prosit\n", - "2024-10-10 12:02:02,663 - INFO - oktoberfest.preprocessing.preprocessing::split_search Creating split search results file /cmnfs/proj/prosit_cit/Test_oktoberfest/output/msms/YIG_244_L009_04_01_U01_R2.rescore\n", - "2024-10-10 12:02:03,247 - INFO - spectrum_io.raw.thermo_raw::convert_raw_mzml Found converted file at /cmnfs/proj/prosit_cit/Test_oktoberfest/output/spectra/YIG_244_L009_04_01_U01_R2.mzML, skipping conversion\n", - "2024-10-10 12:02:03,380 - INFO - spectrum_io.raw.msraw::_read_mzml_pyteomics Reading mzML file: /cmnfs/proj/prosit_cit/Test_oktoberfest/output/spectra/YIG_244_L009_04_01_U01_R2.mzML\n", - "2024-10-10 12:02:49,752 - INFO - oktoberfest.preprocessing.preprocessing::merge_spectra_and_peptides Merging rawfile and search result\n", - "2024-10-10 12:02:49,824 - INFO - oktoberfest.preprocessing.preprocessing::annotate_spectral_library Annotating spectra...\n", - "2024-10-10 12:06:46,369 - INFO - oktoberfest.preprocessing.preprocessing::annotate_spectral_library Finished annotating.\n", - "2024-10-10 12:06:48,460 - INFO - oktoberfest.predict.predictor::from_config Using model Prosit_2024_intensity_cit via Koina\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Prosit_2024_intensity_cit:: 100%|██████████| 32/32 [00:08<00:00, 3.97it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-10 12:07:08,093 - INFO - oktoberfest.utils.process_step::is_done Skipping ce_calib.YIG_244_L009_04_01_U01_R2 step because /cmnfs/proj/prosit_cit/Test_oktoberfest/output/proc/ce_calib.YIG_244_L009_04_01_U01_R2.done was found.\n", - "2024-10-10 12:07:08,921 - INFO - oktoberfest.predict.predictor::from_config Using model Prosit_2024_intensity_cit via Koina\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Prosit_2024_intensity_cit:: 100%|██████████| 97/97 [00:22<00:00, 4.28it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-10 12:07:56,155 - INFO - oktoberfest.predict.predictor::from_config Using model Prosit_2024_irt_cit via Koina\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Prosit_2024_irt_cit:: 100%|██████████| 97/97 [00:05<00:00, 18.38it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-10 12:09:46,440 - INFO - spectrum_fundamentals.metrics.percolator::get_indices_below_fdr Found 29024 (out of 69348) targets below 0.01 FDR using spectral_angle as feature\n", - "2024-10-10 12:09:46,444 - INFO - spectrum_fundamentals.metrics.percolator::apply_lda_and_get_indices_below_fdr Found 29024 targets and 27543 decoys as input for the LDA model\n", - "2024-10-10 12:09:46,985 - INFO - spectrum_fundamentals.metrics.percolator::get_indices_below_fdr Found 35861 (out of 69348) targets below 0.01 FDR using lda_scores as feature\n", - "2024-10-10 12:09:47,101 - INFO - spectrum_fundamentals.metrics.percolator::calc Median absolute error predicted vs observed retention time on targets < 1% FDR: 0.6327482475378723\n", - "2024-10-10 12:09:52,159 - INFO - oktoberfest.runner::run_rescoring Merging input tab files for rescoring without peptide property prediction\n", - "2024-10-10 12:09:54,077 - INFO - oktoberfest.runner::run_rescoring Merging input tab files for rescoring with peptide property prediction\n", - "2024-10-10 12:10:01,507 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Starting percolator with command percolator --weights /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.percolator.weights.csv --num-threads 3 --subset-max-train 500000 --post-processing-tdc --testFDR 0.01 --trainFDR 0.01 --results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.percolator.psms.txt --decoy-results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.percolator.decoy.psms.txt --results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.percolator.peptides.txt --decoy-results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.percolator.decoy.peptides.txt /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.tab 2> /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/original.log\n", - "2024-10-10 12:10:14,105 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Finished rescoring using percolator.\n", - "2024-10-10 12:10:14,108 - INFO - oktoberfest.runner::_rescore Start percolator rescoring\n", - "2024-10-10 12:10:14,109 - INFO - oktoberfest.runner::_rescore True\n", - "2024-10-10 12:10:18,210 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Starting percolator with command percolator --weights /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.percolator.weights.csv --num-threads 3 --subset-max-train 500000 --post-processing-tdc --testFDR 0.01 --trainFDR 0.01 --results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.percolator.psms.txt --decoy-results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.percolator.decoy.psms.txt --results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.percolator.peptides.txt --decoy-results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.percolator.decoy.peptides.txt /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.tab 2> /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/rescore.log\n", - "2024-10-10 12:10:35,804 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Finished rescoring using percolator.\n", - "2024-10-10 12:10:36,466 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Starting percolator with command percolator --weights /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.percolator.weights.csv --num-threads 3 --subset-max-train 500000 --post-processing-tdc --testFDR 0.01 --trainFDR 0.01 --results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.percolator.psms.txt --decoy-results-psms /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.percolator.decoy.psms.txt --results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.percolator.peptides.txt --decoy-results-peptides /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.percolator.decoy.peptides.txt /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.tab 2> /cmnfs/proj/prosit_cit/Test_oktoberfest/output/results/percolator/localize_mod/rescore.log\n", - "2024-10-10 12:10:41,506 - INFO - oktoberfest.rescore.rescore::rescore_with_percolator Finished rescoring using percolator.\n", - "2024-10-10 12:10:41,512 - INFO - oktoberfest.runner::run_rescoring Generating summary plots...\n", - "2024-10-10 12:10:41,513 - INFO - oktoberfest.runner::run_rescoring Finished rescoring.\n" - ] - } - ], + "outputs": [], "source": [ "run_job(\"./rescoring_config.json\")" ] @@ -302,20 +163,9 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_118297/3295614738.py:15: FutureWarning: The default value of regex will change from True to False in a future version.\n", - " df_prosit_psms['peptide'] = df_prosit_psms['peptide'].str.replace('._','')\n", - "/tmp/ipykernel_118297/3295614738.py:16: FutureWarning: The default value of regex will change from True to False in a future version.\n", - " df_prosit_psms['peptide'] = df_prosit_psms['peptide'].str.replace('_.','')\n" - ] - } - ], + "outputs": [], "source": [ "# Load spectral_angle and SpecId from the RESCORE TAB file\n", "combined_df = pd.read_csv( \n", @@ -331,8 +181,7 @@ ")\n", "\n", "#Remove _ appended to the peptide sequence\n", - "df_prosit_psms['peptide'] = df_prosit_psms['peptide'].str.replace('._','')\n", - "df_prosit_psms['peptide'] = df_prosit_psms['peptide'].str.replace('_.','')\n", + "df_prosit_psms['peptide'] = df_prosit_psms['peptide'].str.replace(r'\\._|_\\.', '', regex=True)\n", "\n", "# Rename the first column to \"SpecId\"\n", "df_prosit_psms.rename(columns={\"PSMId\": \"SpecId\"}, inplace=True)\n", @@ -365,28 +214,9 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "OSError", - "evalue": "Cannot save file into a non-existent directory: 'X:/internal_projects/L009_cit_temp_MS_Fragger'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[57], line 32\u001b[0m\n\u001b[1;32m 30\u001b[0m dt \u001b[38;5;241m=\u001b[39m pa\u001b[38;5;241m.\u001b[39maddPeptideAndPsitePositions(dt, CustomFasta, mod_dict\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mR[UNIMOD:7]\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m})\n\u001b[1;32m 31\u001b[0m dt \u001b[38;5;241m=\u001b[39m dt[dt[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSite positions\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 32\u001b[0m \u001b[43mdt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mX:/internal_projects/L009_cit_temp_MS_Fragger/Cit_rescore_site_mapping.txt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\t\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/core/generic.py:3720\u001b[0m, in \u001b[0;36mNDFrame.to_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[1;32m 3709\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, ABCDataFrame) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_frame()\n\u001b[1;32m 3711\u001b[0m formatter \u001b[38;5;241m=\u001b[39m DataFrameFormatter(\n\u001b[1;32m 3712\u001b[0m frame\u001b[38;5;241m=\u001b[39mdf,\n\u001b[1;32m 3713\u001b[0m header\u001b[38;5;241m=\u001b[39mheader,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 3717\u001b[0m decimal\u001b[38;5;241m=\u001b[39mdecimal,\n\u001b[1;32m 3718\u001b[0m )\n\u001b[0;32m-> 3720\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameRenderer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mformatter\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_csv\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3721\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_buf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3722\u001b[0m \u001b[43m \u001b[49m\u001b[43mlineterminator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlineterminator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3723\u001b[0m \u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3724\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3725\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3726\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompression\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3727\u001b[0m \u001b[43m \u001b[49m\u001b[43mquoting\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquoting\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3728\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3729\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_label\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_label\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3730\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3731\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3732\u001b[0m \u001b[43m \u001b[49m\u001b[43mquotechar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquotechar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3733\u001b[0m \u001b[43m \u001b[49m\u001b[43mdate_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdate_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3734\u001b[0m \u001b[43m \u001b[49m\u001b[43mdoublequote\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdoublequote\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3735\u001b[0m \u001b[43m \u001b[49m\u001b[43mescapechar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mescapechar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3736\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3737\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/io/formats/format.py:1189\u001b[0m, in \u001b[0;36mDataFrameRenderer.to_csv\u001b[0;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[1;32m 1168\u001b[0m created_buffer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 1170\u001b[0m csv_formatter \u001b[38;5;241m=\u001b[39m CSVFormatter(\n\u001b[1;32m 1171\u001b[0m path_or_buf\u001b[38;5;241m=\u001b[39mpath_or_buf,\n\u001b[1;32m 1172\u001b[0m lineterminator\u001b[38;5;241m=\u001b[39mlineterminator,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1187\u001b[0m formatter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfmt,\n\u001b[1;32m 1188\u001b[0m )\n\u001b[0;32m-> 1189\u001b[0m \u001b[43mcsv_formatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1191\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m created_buffer:\n\u001b[1;32m 1192\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path_or_buf, StringIO)\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/io/formats/csvs.py:241\u001b[0m, in \u001b[0;36mCSVFormatter.save\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;124;03mCreate the writer & save.\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# apply compression and byte/text conversion\u001b[39;00m\n\u001b[0;32m--> 241\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompression\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m handles:\n\u001b[1;32m 249\u001b[0m \n\u001b[1;32m 250\u001b[0m \u001b[38;5;66;03m# Note: self.encoding is irrelevant here\u001b[39;00m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwriter \u001b[38;5;241m=\u001b[39m csvlib\u001b[38;5;241m.\u001b[39mwriter(\n\u001b[1;32m 252\u001b[0m handles\u001b[38;5;241m.\u001b[39mhandle,\n\u001b[1;32m 253\u001b[0m lineterminator\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlineterminator,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 258\u001b[0m quotechar\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquotechar,\n\u001b[1;32m 259\u001b[0m )\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save()\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/io/common.py:734\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;66;03m# Only for write methods\u001b[39;00m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode \u001b[38;5;129;01mand\u001b[39;00m is_path:\n\u001b[0;32m--> 734\u001b[0m \u001b[43mcheck_parent_directory\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 736\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression:\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzstd\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 738\u001b[0m \u001b[38;5;66;03m# compression libraries do not like an explicit text-mode\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/new_oktoberfest/lib/python3.10/site-packages/pandas/io/common.py:597\u001b[0m, in \u001b[0;36mcheck_parent_directory\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 595\u001b[0m parent \u001b[38;5;241m=\u001b[39m Path(path)\u001b[38;5;241m.\u001b[39mparent\n\u001b[1;32m 596\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m parent\u001b[38;5;241m.\u001b[39mis_dir():\n\u001b[0;32m--> 597\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124mrf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot save file into a non-existent directory: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mOSError\u001b[0m: Cannot save file into a non-existent directory: 'X:/internal_projects/L009_cit_temp_MS_Fragger'" - ] - } - ], + "outputs": [], "source": [ "# Remove contaminants proteins if existing\n", "merged_df['Organism'] = merged_df['proteinIds'].apply(lambda x: x.split(':')[0].split('_')[-1])\n",