From 5ae75c6c93a05e2ecf195f569424c96a8304c443 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Tue, 26 Nov 2024 10:48:59 +0100 Subject: [PATCH 1/3] added custom modifications documentation --- docs/config.rst | 4 +++ docs/custom_mods.rst | 65 ++++++++++++++++++++++++++++++++++++++++++++ docs/usage.rst | 1 + 3 files changed, 70 insertions(+) create mode 100644 docs/custom_mods.rst diff --git a/docs/config.rst b/docs/config.rst index 8875f89..35b37ce 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -26,6 +26,10 @@ Always applicable +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | instrument_type | The type of mass spectrometer used to measure the spectra. Superseeds the value read from the mzML file (default). When predicting intensities with AlphaPept, choose one of ["QE", "LUMOS", "TIMSTOF", "SCIEXTOF"], if the instrument type of your data is not supported. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | static_mods | Custom static modifications in the format "": [, ], e.g. "C": [4, 57.0215] where is search engine specific. Overwrites default modifications used. See `Custom modification <./custom_mods.html>`_ for detailed information. | + +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | var_mods | Custom variable modifications in the format "": [, ], e.g. "M(ox)": [35, 15.9949] where is search engine specific. Overwrites default modifications used. See `Custom modification <./custom_mods.html>`_ for detailed information. | + +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | numThreads | Number of raw/mzml files processed in parallel (parallelisation on file level); more processes than files has no effect and should be avoided; for spectral library generation, the number of parallel prediction processes, needs to be balanced with batchsize in this case; default = 1 | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | prediction_server | Server and port for obtaining peptide property predictions; default: "koina.wilhelmlab.org:443" | diff --git a/docs/custom_mods.rst b/docs/custom_mods.rst new file mode 100644 index 0000000..f84fdce --- /dev/null +++ b/docs/custom_mods.rst @@ -0,0 +1,65 @@ +Custom modifications +==================== + +By default, Oktoberfest uses only static carbamidomethylation and variable methionine oxidation. +Since adding the required UNIMOD id and monoisotopic modification mass for any modification one could think of is quite cumbersome, the user can provide them manually, via the configuration file. + +Important: TMT modifications must be provided using the "tag" flag in the configuration file, they are not a custom modification! + + +Required information +-------------------- + +Oktoberfest needs to map the search engine specific output to UNIMOD format to create the internal file format, as well as the monoisotopic modification mass. +The configuration file accepts two flags, "static_mods", and "var_mods", which both expect key-value mappings of the form: + +"": [, ] + +The UNIMOD_ID and the corresponding monoisotopic modification mass can be retrieved from `unimod.org `_ (click on "Login as Guest" if prompted for credentials). +Search for your desired modification, then find the UNIMOD ID in the "Accession #" column and the modification mass in the "Monoisotopic mass" column. + +The format of the key depends on the search engine. Please consult the documentation for the search engine of your choice. Below are a few examples for common modifications for the search engines that Oktoberfest supports out of the box. + +This will translate to "[UNIMOD:]" in the internal format (ProForma standard) and use to add to the peptide mass. +Important: If you want to provide n-terminal or c-terminal modifications, you have to add a "^" or "$", respectively, in front of the key since Oktoberfest automatically appends or prepends the modification identifier with a "-" (ProForma standard). This also means, that "-" is part of the key, if the search engine already uses this notation! + + +Example configuration +--------------------- + +Oktoberfest supports two configuration flags, one for static and one for variable modifications, containing the search engine specific format as a key, and the UNIMOD ID and monoisotopic modification mass as a value: + +.. code-block:: json + + "static_mods": { + "^(Lactylation (Lac))": [2114, 72.021129] + "C": [4, 57.021464] + }, + "var_mods": { + "M(ox)": [35, 15.994915], + "(Deamidation (NQ))": [7, 0.984016] + }, + + +The above example is a MaxQuant specific mapping. It converts any carbamidomethylated "C" to "C[UNIMOD:4]", variable oxidation of "M" into "M[UNIMOD:35]", and, since no specific aminoacid was provided any deamidation on any aminoacid to "[UNIMOD:7]". Additionally, any n-terminal lactylation is converted to "[UNIMOD:2114]-" (note the added "-"). + + +Table of default modifications +------------------------------ + +.. table:: + :class: fixed-table + + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ + | MaxQuant | Sage | MSFragger | Modification | UNIMOD ID | Monoisotopic Mass | Internal representation | + +=========================+=============+===========+===========================+===========+===================+=========================+ + | C | C[+57.0215] | C[160] | carbamidomethylation of C | 4 | 57.0215 | C[UNIMOD:4] | + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ + | M(ox); M(Oxidation (M)) | M[+15.9949] | M[147] | oxidation of M | 35 | 15.9949 | M[UNIMOD:35] | + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ + | R(Citrullination) | R[+0.98402] | R[157] | citrullination of R | 7 | 0.984016 | R[UNIMOD:7] | + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ + | Q(Deamidation (NQ)) | Q[+0.98402] | Q[129] | deamidation of Q | 7 | 0.984016 | R[UNIMOD:7] | + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ + | N(Deamidation (NQ)) | N[+0.98402] | N[115] | deamidation of N | 7 | 0.984016 | R[UNIMOD:7] | + +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+ diff --git a/docs/usage.rst b/docs/usage.rst index 851ab0f..70d81d3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -11,6 +11,7 @@ The following outlines how to run a job with the high-level API, the three types config predictions internal_format + custom_mods peptides_format outputs svm_features From 519e087fcb2039a6b9a86f1c27e25cfe184fb8e2 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Tue, 26 Nov 2024 12:54:49 +0100 Subject: [PATCH 2/3] added all classes and fixed docs conf --- docs/API.rst | 31 ++++++++++++++++----------- docs/_templates/autosummary/class.rst | 31 +++++++++++++++++++++++++++ docs/conf.py | 18 ++++++++++++++++ docs/contributing.rst | 1 + docs/peptides_format.rst | 16 +++++++------- docs/predictions.rst | 2 +- docs/requirements.txt | 5 ++++- oktoberfest/data/spectra.py | 8 +++++-- oktoberfest/predict/koina.py | 2 ++ 9 files changed, 89 insertions(+), 25 deletions(-) create mode 100644 docs/_templates/autosummary/class.rst diff --git a/docs/API.rst b/docs/API.rst index 30786d6..5f2ed1f 100644 --- a/docs/API.rst +++ b/docs/API.rst @@ -12,6 +12,22 @@ Import Oktoberfest using import oktoberfest as ok +Data: :code:`data` +------------------ + +The data submodule provides access to PSMs, predictions, and metadata. + +.. module:: oktoberfest.data + +.. currentmodule:: oktoberfest + +.. autosummary:: + :toctree: api/data + + data.Spectra + data.Spectra.add_column + + Preprocessing: :code:`pp` ------------------------- @@ -60,8 +76,6 @@ Peptide preprocessing Predicting: :code:`pr` ---------------------- -.. TODO - add full class documentation through autosummary .. module:: oktoberfest.pr @@ -73,7 +87,6 @@ High-level prediction runner ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: - :recursive: :toctree: api/pr pr.Predictor @@ -82,7 +95,6 @@ Koina interface ~~~~~~~~~~~~~~~ .. autosummary:: - :recursive: :toctree: api/pr pr.Koina @@ -91,7 +103,6 @@ DLomix interface ~~~~~~~~~~~~~~~~ .. autosummary:: - :recursive: :toctree: api/pr pr.DLomix @@ -105,9 +116,6 @@ Rescoring: :code:`re` .. currentmodule:: oktoberfest -General -~~~~~~~ - .. autosummary:: :toctree: api/re @@ -118,15 +126,12 @@ General Plotting: :code:`pl` ---------------------- +-------------------- .. module:: oktoberfest.pl .. currentmodule:: oktoberfest -General -~~~~~~~ - .. autosummary:: :toctree: api/pl @@ -136,4 +141,4 @@ General pl.plot_mean_sa_ce pl.plot_violin_sa_ce pl.plot_pred_rt_vs_irt - pl.plot_all + pl.plot_all \ No newline at end of file diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 100644 index 0000000..77557d1 --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1,31 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block methods %} + .. automethod:: __init__ + :no-index: + + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + :toctree: + {% for item in methods if item != "__init__" %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/conf.py b/docs/conf.py index 3d5753a..cb471ab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -164,6 +164,22 @@ autodoc_typehints = "description" +autodoc_mock_imports = [ + "matplotlib", + "numpy", + "pandas", + "seaborn", + "scipy", + "koinapy", + "anndata", + "spectrum_fundamentals", + "spectrum_io", + "tqdm", + "picked_group_fdr", + "mokapot", + "dlomix", + "tensorflow", +] # -- Options for Texinfo output ---------------------------------------- @@ -254,3 +270,5 @@ def modurl(qualname): # -- Options for autosectionlabel mappings ----------------------------- autosectionlabel_prefix_document = True + +suppress_warnings = ["footnote.reference"] diff --git a/docs/contributing.rst b/docs/contributing.rst index 2640c0d..a71a217 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -135,3 +135,4 @@ It is recommended to open an issue before starting work on anything. This will allow a chance to talk it over with the owners and validate your approach. .. _pull request: https://github.com/wilhelm-lab/oktoberfest/pulls +.. _Code of Conduct: CODE_OF_CONDUCT.rst diff --git a/docs/peptides_format.rst b/docs/peptides_format.rst index 12b3ba6..f92fca7 100644 --- a/docs/peptides_format.rst +++ b/docs/peptides_format.rst @@ -14,8 +14,8 @@ In this case, you need to have the following parameter in your config file: Oktoberfest will then create the table of peptides with associated metadata in internal format (see below) based on the configuration in the spectralLibraryOptions of your configuration file. For a list of these options, check the `configuration options <./config.html>`_. -Format -~~~~~~ +Description of peptide list columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Oktoberfest expects a csv formatted file, where each row represent a peptide and optional mappings to proteins. @@ -29,8 +29,8 @@ Oktoberfest expects a csv formatted file, where each row represent a peptide and | proteins | An optional list of protein ids separated by ';'. If this column is left out, or if no protein is provided, the string "unknown" will be used as a proteinID in the spectral library. | +-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -Example -~~~~~~~ +Example of peptide list +~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: @@ -54,8 +54,8 @@ If you want to have full control, you can provide the table in internal format d Oktoberfest will then read the table directly. -Format -~~~~~~ +Description of internal file columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Oktoberfest expects a csv formatted file where each row represents a peptide with given metadata. The following table provides the file format specification. @@ -79,8 +79,8 @@ Oktoberfest expects a csv formatted file where each row represents a peptide wit | proteins | An optional list of protein ids separated by ';' | +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -Example -~~~~~~~ +Example of internal file +~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: diff --git a/docs/predictions.rst b/docs/predictions.rst index 715ef16..f0b0057 100644 --- a/docs/predictions.rst +++ b/docs/predictions.rst @@ -73,4 +73,4 @@ Importantly, this also gives you the option to refinement-learn the pre-trained For local intensity prediction and refinement learning, you need to provide either a path to a pre-trained model or the keyword `baseline` (for the runner to automatically download a model for you) as the intensity model in the config, and specify `localPredictionOptions` as well as optional `refinementLearningOptions`. -For more details, refer to the :ref:`job ` and :ref:`configuration ` docs. +For more details, refer to the :ref:`job ` and :ref:`configuration ` docs. diff --git a/docs/requirements.txt b/docs/requirements.txt index 3c8d7e7..8200efa 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,4 @@ --r ../requirements.txt +sphinx-autobuild==2024.10.3 ; python_version >= "3.9" and python_full_version < "3.11.0" +sphinx-autodoc-typehints==2.3.0 ; python_version >= "3.9" and python_full_version < "3.11.0" +sphinx-click==6.0.0 ; python_full_version >= "3.9.0" and python_full_version < "3.11.0" +sphinx-rtd-theme==3.0.1 ; python_full_version >= "3.9.0" and python_full_version < "3.11.0" diff --git a/oktoberfest/data/spectra.py b/oktoberfest/data/spectra.py index 9a34cb6..a1668f8 100644 --- a/oktoberfest/data/spectra.py +++ b/oktoberfest/data/spectra.py @@ -247,7 +247,7 @@ def add_intensities_without_mapping(self, intensities: np.ndarray, fragment_type Add predicted intensities and convert to sparse matrix. This function takes a numpy array, containing intensities. - The intensitz arraz is aexpected to have the same shape as this object and will be added to + The intensity array is expected to have the same shape as this object and will be added to the respective lazer without checking the order of fragment annotations. :param intensities: intensity numpy array to add with shapes (n x m) @@ -360,7 +360,11 @@ def remove_decoys(self) -> None: self.__dict__ = Spectra(self[~self.obs.REVERSE].copy()).__dict__ def filter_by_score(self, threshold: float) -> None: - """Filter out peptides with search engine score below threshold in-place.""" + """ + Filter out peptides with search engine score below threshold in-place. + + :param threshold: The threshold to use below which peptides are filtered out. + """ self.__dict__ = Spectra(self[self.obs.SCORE >= threshold].copy()).__dict__ def remove_duplicates(self, num_duplicates: int) -> None: diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index 4ed9aea..a3a74a2 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -64,6 +64,7 @@ def predict(self, data: dict[str, np.ndarray] | pd.DataFrame | Spectra, **kwargs representing the model's output. Example:: + model = Koina("Prosit_2019_intensity") input_data = { "peptide_sequences": np.array(["PEPTIDEK" for _ in range(size)]), @@ -104,6 +105,7 @@ def predict_xl( :raises ValueError: If `data` is not of type `Spectra`, `pd.DataFrame`, or a dictionary. Example:: + model = Koina("Prosit_XL_CMS2_intensity") input_data = { "peptide_sequences_1": np.array(["PEPTIDEK" for _ in range(size)]), From 639151d3babc3ed57df3adb50bdfc9bd6ea55176 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Tue, 26 Nov 2024 13:00:23 +0100 Subject: [PATCH 3/3] removed duplicate add_column documentation in API --- docs/API.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/API.rst b/docs/API.rst index 5f2ed1f..6f57273 100644 --- a/docs/API.rst +++ b/docs/API.rst @@ -25,8 +25,6 @@ The data submodule provides access to PSMs, predictions, and metadata. :toctree: api/data data.Spectra - data.Spectra.add_column - Preprocessing: :code:`pp` -------------------------