wilhelm-lab · picciama · Nov 29, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/docs/API.rst b/docs/API.rst
@@ -12,6 +12,20 @@ Import Oktoberfest using
 
    import oktoberfest as ok
 
+Data: :code:`data`
+------------------
+
+The data submodule provides access to PSMs, predictions, and metadata.
+
+.. module:: oktoberfest.data
+
+.. currentmodule:: oktoberfest
+
+.. autosummary::
+   :toctree: api/data
+
+   data.Spectra
+
 Preprocessing: :code:`pp`
 -------------------------
 
@@ -60,8 +74,6 @@ Peptide preprocessing
 
 Predicting: :code:`pr`
 ----------------------
-.. TODO
-    add full class documentation through autosummary
 
 .. module:: oktoberfest.pr
 
@@ -73,7 +85,6 @@ High-level prediction runner
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autosummary::
-    :recursive:
     :toctree: api/pr
 
     pr.Predictor
@@ -82,7 +93,6 @@ Koina interface
 ~~~~~~~~~~~~~~~
 
 .. autosummary::
-    :recursive:
     :toctree: api/pr
 
     pr.Koina
@@ -91,7 +101,6 @@ DLomix interface
 ~~~~~~~~~~~~~~~~
 
 .. autosummary::
-    :recursive:
     :toctree: api/pr
 
     pr.DLomix
@@ -105,9 +114,6 @@ Rescoring: :code:`re`
 
 .. currentmodule:: oktoberfest
 
-General
-~~~~~~~
-
 .. autosummary::
    :toctree: api/re
 
@@ -118,15 +124,12 @@ General
 
 
 Plotting: :code:`pl`
----------------------
+--------------------
 
 .. module:: oktoberfest.pl
 
 .. currentmodule:: oktoberfest
 
-General
-~~~~~~~
-
 .. autosummary::
    :toctree: api/pl
 
@@ -136,4 +139,4 @@ General
    pl.plot_mean_sa_ce
    pl.plot_violin_sa_ce
    pl.plot_pred_rt_vs_irt
-   pl.plot_all
+   pl.plot_all
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
@@ -0,0 +1,31 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   .. automethod:: __init__
+      :no-index:
+
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+      :toctree:
+      {% for item in methods if item != "__init__" %}
+         ~{{ name }}.{{ item }}
+      {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
diff --git a/docs/conf.py b/docs/conf.py
@@ -164,6 +164,22 @@
 
 autodoc_typehints = "description"
 
+autodoc_mock_imports = [
+    "matplotlib",
+    "numpy",
+    "pandas",
+    "seaborn",
+    "scipy",
+    "koinapy",
+    "anndata",
+    "spectrum_fundamentals",
+    "spectrum_io",
+    "tqdm",
+    "picked_group_fdr",
+    "mokapot",
+    "dlomix",
+    "tensorflow",
+]
 
 # -- Options for Texinfo output ----------------------------------------
 
@@ -254,3 +270,5 @@ def modurl(qualname):
 
 # -- Options for autosectionlabel mappings -----------------------------
 autosectionlabel_prefix_document = True
+
+suppress_warnings = ["footnote.reference"]
diff --git a/docs/config.rst b/docs/config.rst
@@ -26,6 +26,10 @@ Always applicable
    +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    |     instrument_type        | The type of mass spectrometer used to measure the spectra. Superseeds the value read from the mzML file (default). When predicting intensities with AlphaPept, choose one of ["QE", "LUMOS", "TIMSTOF", "SCIEXTOF"], if the instrument type of your data is not supported.                 |
    +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+   | static_mods                | Custom static modifications in the format "<key>": [<UNIMOD_ID>, <mod_mass>], e.g. "C": [4, 57.0215] where <key> is search engine specific. Overwrites default modifications used. See `Custom modification <./custom_mods.html>`_ for detailed information.                               |
+   +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+   | var_mods                   | Custom variable modifications in the format "<key>": [<UNIMOD_ID>, <mod_mass>], e.g. "M(ox)": [35, 15.9949] where <key> is search engine specific. Overwrites default modifications used. See `Custom modification <./custom_mods.html>`_ for detailed information.                        |
+   +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    | numThreads                 | Number of raw/mzml files processed in parallel (parallelisation on file level); more processes than files has no effect and should be avoided; for spectral library generation, the number of parallel prediction processes, needs to be balanced with batchsize in this case; default = 1 |
    +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    | prediction_server          | Server and port for obtaining peptide property predictions; default: "koina.wilhelmlab.org:443"                                                                                                                                                                                            |

diff --git a/docs/contributing.rst b/docs/contributing.rst
@@ -135,3 +135,4 @@ It is recommended to open an issue before starting work on anything.
 This will allow a chance to talk it over with the owners and validate your approach.
 
 .. _pull request: https://github.com/wilhelm-lab/oktoberfest/pulls
+.. _Code of Conduct: CODE_OF_CONDUCT.rst
diff --git a/docs/custom_mods.rst b/docs/custom_mods.rst
@@ -0,0 +1,65 @@
+Custom modifications
+====================
+
+By default, Oktoberfest uses only static carbamidomethylation and variable methionine oxidation.
+Since adding the required UNIMOD id and monoisotopic modification mass for any modification one could think of is quite cumbersome, the user can provide them manually, via the configuration file.
+
+Important: TMT modifications must be provided using the "tag" flag in the configuration file, they are not a custom modification!
+
+
+Required information
+--------------------
+
+Oktoberfest needs to map the search engine specific output to UNIMOD format to create the internal file format, as well as the monoisotopic modification mass.
+The configuration file accepts two flags, "static_mods", and "var_mods", which both expect key-value mappings of the form:
+
+"<key>": [<UNIMOD_ID>, <mod_mass>]
+
+The UNIMOD_ID and the corresponding monoisotopic modification mass can be retrieved from `unimod.org <https://unimod.org/>`_ (click on "Login as Guest" if prompted for credentials).
+Search for your desired modification, then find the UNIMOD ID in the "Accession #" column and the modification mass in the "Monoisotopic mass" column.
+
+The format of the key depends on the search engine. Please consult the documentation for the search engine of your choice. Below are a few examples for common modifications for the search engines that Oktoberfest supports out of the box.
+
+This will translate <key> to "[UNIMOD:<UNIMOD_ID>]" in the internal format (ProForma standard) and use <mod_mass> to add to the peptide mass.
+Important: If you want to provide n-terminal or c-terminal modifications, you have to add a "^" or "$", respectively, in front of the key since Oktoberfest automatically appends or prepends the modification identifier with a "-" (ProForma standard). This also means, that "-" is part of the key, if the search engine already uses this notation!
+
+
+Example configuration
+---------------------
+
+Oktoberfest supports two configuration flags, one for static and one for variable modifications, containing the search engine specific format as a key, and the UNIMOD ID and monoisotopic modification mass as a value:
+
+.. code-block:: json
+
+    "static_mods": {
+        "^(Lactylation (Lac))": [2114, 72.021129]
+        "C": [4, 57.021464]
+    },
+    "var_mods": {
+        "M(ox)": [35, 15.994915],
+        "(Deamidation (NQ))": [7, 0.984016]
+    },
+
+
+The above example is a MaxQuant specific mapping. It converts any carbamidomethylated "C" to "C[UNIMOD:4]", variable oxidation of "M" into "M[UNIMOD:35]", and, since no specific aminoacid was provided any deamidation on any aminoacid to "[UNIMOD:7]". Additionally, any n-terminal lactylation is converted to "[UNIMOD:2114]-" (note the added "-").
+
+
+Table of default modifications
+------------------------------
+
+.. table::
+   :class: fixed-table
+
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
+   | MaxQuant                | Sage        | MSFragger | Modification              | UNIMOD ID | Monoisotopic Mass | Internal representation |
+   +=========================+=============+===========+===========================+===========+===================+=========================+
+   | C                       | C[+57.0215] | C[160]    | carbamidomethylation of C | 4         | 57.0215           | C[UNIMOD:4]             |
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
+   | M(ox); M(Oxidation (M)) | M[+15.9949] | M[147]    | oxidation of M            | 35        | 15.9949           | M[UNIMOD:35]            |
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
+   | R(Citrullination)       | R[+0.98402] | R[157]    | citrullination of R       | 7         | 0.984016          | R[UNIMOD:7]             |
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
+   | Q(Deamidation (NQ))     | Q[+0.98402] | Q[129]    | deamidation of Q          | 7         | 0.984016          | R[UNIMOD:7]             |
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
+   | N(Deamidation (NQ))     | N[+0.98402] | N[115]    | deamidation of N          | 7         | 0.984016          | R[UNIMOD:7]             |
+   +-------------------------+-------------+-----------+---------------------------+-----------+-------------------+-------------------------+
diff --git a/docs/peptides_format.rst b/docs/peptides_format.rst
@@ -14,8 +14,8 @@ In this case, you need to have the following parameter in your config file:
 
 Oktoberfest will then create the table of peptides with associated metadata in internal format (see below) based on the configuration in the spectralLibraryOptions of your configuration file. For a list of these options, check the `configuration options <./config.html>`_.
 
-Format
-~~~~~~
+Description of peptide list columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Oktoberfest expects a csv formatted file, where each row represent a peptide and optional mappings to proteins.
 
@@ -29,8 +29,8 @@ Oktoberfest expects a csv formatted file, where each row represent a peptide and
     | proteins          | An optional list of protein ids separated by ';'. If this column is left out, or if no protein is provided, the string "unknown" will be used as a proteinID in the spectral library.                        |
     +-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
-Example
-~~~~~~~
+Example of peptide list
+~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block::
 
@@ -54,8 +54,8 @@ If you want to have full control, you can provide the table in internal format d
 
 Oktoberfest will then read the table directly.
 
-Format
-~~~~~~
+Description of internal file columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Oktoberfest expects a csv formatted file where each row represents a peptide with given metadata. The following table provides the file format specification.
 
@@ -79,8 +79,8 @@ Oktoberfest expects a csv formatted file where each row represents a peptide wit
     | proteins          | An optional list of protein ids separated by ';'                                                                                                                                                                                                                                    |
     +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
-Example
-~~~~~~~
+Example of internal file
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block::
 

diff --git a/docs/predictions.rst b/docs/predictions.rst
@@ -73,4 +73,4 @@ Importantly, this also gives you the option to refinement-learn the pre-trained
 
 For local intensity prediction and refinement learning, you need to provide either a path to a pre-trained model or the keyword `baseline`
 (for the runner to automatically download a model for you) as the intensity model in the config, and specify `localPredictionOptions` as well as optional `refinementLearningOptions`.
-For more details, refer to the :ref:`job <jobs:b) with refinement>` and :ref:`configuration <config:applicable to local intensity prediction and transfer learning>` docs.
+For more details, refer to the :ref:`job <jobs:b) with refinement>` and :ref:`configuration <config:Applicable to local intensity prediction>` docs.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1 +1,4 @@
--r ../requirements.txt
+sphinx-autobuild==2024.10.3 ; python_version >= "3.9" and python_full_version < "3.11.0"
+sphinx-autodoc-typehints==2.3.0 ; python_version >= "3.9" and python_full_version < "3.11.0"
+sphinx-click==6.0.0 ; python_full_version >= "3.9.0" and python_full_version < "3.11.0"
+sphinx-rtd-theme==3.0.1 ; python_full_version >= "3.9.0" and python_full_version < "3.11.0"
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -11,6 +11,7 @@ The following outlines how to run a job with the high-level API, the three types
     config
     predictions
     internal_format
+    custom_mods
     peptides_format
     outputs
     svm_features
diff --git a/oktoberfest/data/spectra.py b/oktoberfest/data/spectra.py
@@ -247,7 +247,7 @@ def add_intensities_without_mapping(self, intensities: np.ndarray, fragment_type
         Add predicted intensities and convert to sparse matrix.
 
         This function takes a numpy array, containing intensities.
-        The intensitz arraz is aexpected to have the same shape as this object and will be added to
+        The intensity array is expected to have the same shape as this object and will be added to
         the respective lazer without checking the order of fragment annotations.
 
         :param intensities: intensity numpy array to add with shapes (n x m)
@@ -360,7 +360,11 @@ def remove_decoys(self) -> None:
         self.__dict__ = Spectra(self[~self.obs.REVERSE].copy()).__dict__
 
     def filter_by_score(self, threshold: float) -> None:
-        """Filter out peptides with search engine score below threshold in-place."""
+        """
+        Filter out peptides with search engine score below threshold in-place.
+
+        :param threshold: The threshold to use below which peptides are filtered out.
+        """
         self.__dict__ = Spectra(self[self.obs.SCORE >= threshold].copy()).__dict__
 
     def remove_duplicates(self, num_duplicates: int) -> None:

diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py
@@ -64,6 +64,7 @@ def predict(self, data: dict[str, np.ndarray] | pd.DataFrame | Spectra, **kwargs
             representing the model's output.
 
         Example::
+
             model = Koina("Prosit_2019_intensity")
             input_data = {
                 "peptide_sequences": np.array(["PEPTIDEK" for _ in range(size)]),
@@ -104,6 +105,7 @@ def predict_xl(
         :raises ValueError: If `data` is not of type `Spectra`, `pd.DataFrame`, or a dictionary.
 
         Example::
+
             model = Koina("Prosit_XL_CMS2_intensity")
             input_data = {
                 "peptide_sequences_1": np.array(["PEPTIDEK" for _ in range(size)]),
Original file line number	Diff line number	Diff line change
Expand Up		@@ -135,3 +135,4 @@ It is recommended to open an issue before starting work on anything.
		This will allow a chance to talk it over with the owners and validate your approach.

		.. _pull request: https://github.com/wilhelm-lab/oktoberfest/pulls
		.. _Code of Conduct: CODE_OF_CONDUCT.rst