Merge pull request #116 from wilhelm-lab/release/0.6.0

Release/0.6.0
wilhelm-lab · Jul 10, 2024 · b1e1e59 · b1e1e59
2 parents 1360da5 + ac24187
commit b1e1e59
Show file tree

Hide file tree

Showing 16 changed files with 525 additions and 533 deletions.
diff --git a/.cookietemple.yml b/.cookietemple.yml
@@ -15,5 +15,5 @@ full_name: Victor Giurcoiu
 email: [email protected]
 project_name: spectrum_fundamentals
 project_short_description: Fundamentals public repo
-version: 0.5.4
+version: 0.6.0
 license: MIT
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -1,5 +1,5 @@
-name-template: "0.5.4 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.5.4 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.6.0 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.6.0 # <<COOKIETEMPLE_FORCE_BUMP>>
 exclude-labels:
     - "skip-changelog"
 

diff --git a/.github/workflows/publish_package.yml b/.github/workflows/publish_package.yml
@@ -29,7 +29,7 @@ jobs:
                   poetry build --ansi
 
             - name: Publish package on PyPI
-              uses: pypa/gh-action-pypi-publish@v1.8.14
+              uses: pypa/gh-action-pypi-publish@v1.9.0
               with:
                   # TODO COOKIETEMPLE: Configure your PyPI Token to enable automatic deployment to PyPi on releases
                   # https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets

diff --git a/.github/workflows/release_drafter.yml b/.github/workflows/release_drafter.yml
@@ -14,6 +14,6 @@ jobs:
     update_release_draft:
         runs-on: ubuntu-latest
         steps:
-            - uses: release-drafter/release-drafter@v5
+            - uses: release-drafter/release-drafter@v6
               env:
                   GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -165,4 +165,4 @@ jobs:
               run: nox --force-color --session=coverage -- xml -i
 
             - name: Upload coverage report
-              uses: codecov/codecov-action@v3.1.1
+              uses: codecov/codecov-action@v4.5.0
diff --git a/cookietemple.cfg b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.5.4
+current_version = 0.6.0
 
 [bumpversion_files_whitelisted]
 init_file = spectrum_fundamentals/__init__.py

diff --git a/docs/conf.py b/docs/conf.py
@@ -54,9 +54,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = "0.5.4"
+version = "0.6.0"
 # The full version, including alpha/beta/rc tags.
-release = "0.5.4"
+release = "0.6.0"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/noxfile.py b/noxfile.py
@@ -126,7 +126,7 @@ def safety(session: Session) -> None:
     """Scan dependencies for insecure packages."""
     requirements = session.poetry.export_requirements()
     session.install("safety")
-    session.run("safety", "check", "--full-report", f"--file={requirements}", "--ignore=51457")
+    session.run("safety", "check", "--full-report", f"--file={requirements}", "--ignore=51457,70612")
 
 
 @session(python=python_versions)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "spectrum_fundamentals"
-version = "0.5.4"  # <<COOKIETEMPLE_FORCE_BUMP>>
+version = "0.6.0"  # <<COOKIETEMPLE_FORCE_BUMP>>
 description = "Fundamental functions, annotation pipeline and constants for oktoberfest"
 authors = ["Wilhelmlab at Technical University of Munich"]
 license = "MIT"

diff --git a/setup.py b/setup.py
diff --git a/spectrum_fundamentals/__init__.py b/spectrum_fundamentals/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Mario Picciani"
 __email__ = "[email protected]"
-__version__ = "0.5.4"
+__version__ = "0.6.0"
 
 import logging
 import logging.handlers

diff --git a/spectrum_fundamentals/__main__.py b/spectrum_fundamentals/__main__.py
@@ -5,7 +5,7 @@
 
 
 @click.command()
-@click.version_option(version="0.5.4", message=click.style("spectrum_fundamentals Version: 0.5.4"))
+@click.version_option(version="0.6.0", message=click.style("spectrum_fundamentals Version: 0.6.0"))
 def main() -> None:
     """spectrum_fundamentals."""
 

diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py
@@ -331,6 +331,7 @@
     "PRECURSOR_MASS_EXP",
     "SCORE",
     "REVERSE",
+    "PROTEINS",
 ]
 META_DATA_COLUMNS = SHARED_DATA_COLUMNS + META_DATA_ONLY_COLUMNS
 MZML_ONLY_DATA_COLUMNS = [

diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py
@@ -140,7 +140,7 @@ def get_aligned_predicted_retention_times(
                 observed_rts = observed_rts[keep_idxs[0]]
                 predicted_rts = predicted_rts[keep_idxs[0]]
 
-                discard_percentage *= 1.5
+                discard_percentage *= 1.2
 
         logger.debug(f"Observed RT anchor points:\n{observed_retention_times_fdr_filtered}")
         logger.debug(f"Predicted RT anchor points:\n{predicted_retention_times_fdr_filtered}")
@@ -348,8 +348,9 @@ def get_indices_below_fdr(self, feature_name: str, fdr_cutoff: float = 0.01) ->
 
         accepted_indices = scores_df.index[scores_df["fdr"] < fdr_cutoff]
         if len(accepted_indices) == 0:
-            logger.error(
-                f"Could not find any targets below {fdr_cutoff} out of {len(scores_df.index)} targets in total"
+            logger.warning(
+                f"Could not find any targets below {fdr_cutoff} out of {len(scores_df.index)} "
+                "targets in total. Retrying with higher FDR cutoff..."
             )
             return np.array([])
 
@@ -418,44 +419,42 @@ def calc(self):
                 lda_failed = False
                 idxs_below_lda_fdr = self.apply_lda_and_get_indices_below_fdr(fdr_cutoff=self.fdr_cutoff)
                 current_fdr = self.fdr_cutoff
-                while len(idxs_below_lda_fdr) == 0:
+                while len(idxs_below_lda_fdr) <= 500:
                     current_fdr += 0.01
                     idxs_below_lda_fdr = self.apply_lda_and_get_indices_below_fdr(fdr_cutoff=current_fdr)
                     if current_fdr >= 0.1:
                         lda_failed = True
                         break
+                if lda_failed:
+                    sampled_idxs = Percolator.sample_balanced_over_bins(
+                        self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]]
+                    )
                 else:
-                    if lda_failed:
-                        sampled_idxs = Percolator.sample_balanced_over_bins(
-                            self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]]
-                        )
-                    else:
-                        sampled_idxs = Percolator.sample_balanced_over_bins(
-                            self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]].iloc[idxs_below_lda_fdr, :]
-                        )
-
-                    file_sample = self.metadata.iloc[sampled_idxs].sort_values("PREDICTED_IRT")
-                    aligned_predicted_rts = Percolator.get_aligned_predicted_retention_times(
-                        file_sample["RETENTION_TIME"],
-                        file_sample["PREDICTED_IRT"],
-                        self.metadata["PREDICTED_IRT"],
-                        self.regression_method,
+                    sampled_idxs = Percolator.sample_balanced_over_bins(
+                        self.metadata[["RETENTION_TIME", "PREDICTED_IRT"]].iloc[idxs_below_lda_fdr, :]
                     )
 
-                    self.metrics_val["RT"] = self.metadata["RETENTION_TIME"]
-                    self.metrics_val["pred_RT"] = self.metadata["PREDICTED_IRT"]
-                    self.metrics_val["iRT"] = aligned_predicted_rts
-                    self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0
-                    self.metrics_val["abs_rt_diff"] = np.abs(self.metadata["RETENTION_TIME"] - aligned_predicted_rts)
-
-                    median_abs_error_lda_targets = np.median(self.metrics_val["abs_rt_diff"].iloc[idxs_below_lda_fdr])
-                    logger.info(
-                        "Median absolute error predicted vs observed retention time on targets < 1% FDR: "
-                        f"{median_abs_error_lda_targets}"
-                    )
-                    logger.debug(
-                        self.metrics_val[["RT", "pred_RT", "abs_rt_diff", "lda_scores"]].iloc[idxs_below_lda_fdr, :]
-                    )
+                file_sample = self.metadata.iloc[sampled_idxs].sort_values("PREDICTED_IRT")
+                aligned_predicted_rts = Percolator.get_aligned_predicted_retention_times(
+                    file_sample["RETENTION_TIME"],
+                    file_sample["PREDICTED_IRT"],
+                    self.metadata["PREDICTED_IRT"],
+                    self.regression_method,
+                )
+
+                self.metrics_val["RT"] = self.metadata["RETENTION_TIME"]
+                self.metrics_val["pred_RT"] = self.metadata["PREDICTED_IRT"]
+                self.metrics_val["iRT"] = aligned_predicted_rts
+                self.metrics_val["collision_energy_aligned"] = self.metadata["COLLISION_ENERGY"] / 100.0
+                self.metrics_val["abs_rt_diff"] = np.abs(self.metadata["RETENTION_TIME"] - aligned_predicted_rts)
+                if lda_failed:
+                    median_abs_error = np.median(self.metrics_val["abs_rt_diff"])
+                else:
+                    median_abs_error = np.median(self.metrics_val["abs_rt_diff"].iloc[idxs_below_lda_fdr])
+                logger.info(
+                    "Median absolute error predicted vs observed retention time on targets < 1% FDR: "
+                    f"{median_abs_error}"
+                )
 
         else:
             self.metrics_val["andromeda"] = self.metadata["SCORE"]
@@ -497,6 +496,10 @@ def spline(knots: int, x: np.ndarray, y: np.ndarray):
     """Calculates spline fitting."""
     x_new = np.linspace(0, 1, knots + 2)[1:-1]
     q_knots = np.quantile(x, x_new)
+    if q_knots[0] == 0:
+        q_knots[0] += 1e-6
+    if q_knots[0] == q_knots[1]:
+        q_knots[1] += 1e-6
     t, c, k = interpolate.splrep(x, y, t=q_knots, s=2)
     yfit = interpolate.BSpline(t, c, k)(x)
     return yfit, t, c, k

diff --git a/tests/unit_tests/test_percolator.py b/tests/unit_tests/test_percolator.py
@@ -186,7 +186,7 @@ def test_linear_not_sorted(self):
 
     def test_noisy_logistic(self):
         """Test get_aligned_predicted_retention_times for a more realistic, similar to logistic case."""
-        methods = ["lowess", "spline", "logistic"]
+        methods = ["spline", "logistic"]
         x, y, correct_y = _create_noisy_logistic_data()
         for method in methods:
             self._get_aligned_predicted_retention_times_noisy_logistic_error(x, y, correct_y, method)