From a55297838ee0c657bf181c70cc748ed068fb87e4 Mon Sep 17 00:00:00 2001 From: Jonathan de Bruin Date: Sat, 28 Sep 2024 21:16:01 +0200 Subject: [PATCH] Update linter config and format code base --- .pre-commit-config.yaml | 23 ++++++++++------------- LICENSE | 2 +- README.md | 20 ++++++++++---------- asreviewcontrib/insights/plot.py | 15 +++++++++++---- docs/stats_explainer.py | 1 - pyproject.toml | 4 ++-- tests/README.md | 2 +- 7 files changed, 35 insertions(+), 32 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3815e0e..c4f00b2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,13 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.272 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 hooks: - - id: ruff -- repo: https://github.com/psf/black - rev: 23.3.0 + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.8 hooks: - - id: black + - id: ruff + - id: ruff-format diff --git a/LICENSE b/LICENSE index 4a70711..92a02ae 100644 --- a/LICENSE +++ b/LICENSE @@ -200,4 +200,4 @@ Copyright 2020 The ASReview Authors. All rights reserved. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. diff --git a/README.md b/README.md index 95af45c..50c642a 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,9 @@ extension can plot or compute the values for such metrics from ASReview project files. [O'Mara-Eves et al. (2015)](https://doi.org/10.1186/2046-4053-4-5) provides a comprehensive overview of different metrics used in the field of actrive learning. Below we -describe the metrics available in the software. +describe the metrics available in the software. -### Recall +### Recall The recall is the proportion of relevant records that have been found at a certain point during the screening phase. It is sometimes also called the @@ -58,12 +58,12 @@ The confusion matrix consist of the True Positives (TP), False Positives (FP), True Negatives (TN), and False Negatives (FN). Definitions are provided in the following table retrieved at a certain recall (r%). -| | Definition | Calculation | +| | Definition | Calculation | |----------------------|----------------------------------------------------------------------------------------|---------------------------------| -| True Positives (TP) | The number of relevant records found at recall level | Relevant Records * r% | +| True Positives (TP) | The number of relevant records found at recall level | Relevant Records * r% | | False Positives (FP) | The number of irrelevant records reviewed at recall level | Records Reviewed – TP | -| True Negatives (TN) | The number of irrelevant records correctly not reviewed at recall level | Irrelevant Records – FP | -| False Negatives (FN) | The number of relevant records not reviewed at recall level (missing relevant records) | Relevant Records – TP | +| True Negatives (TN) | The number of irrelevant records correctly not reviewed at recall level | Irrelevant Records – FP | +| False Negatives (FN) | The number of relevant records not reviewed at recall level (missing relevant records) | Relevant Records – TP | ### Work saved over sampling @@ -81,7 +81,7 @@ normalize the WSS for class imbalance (denoted as the nWSS). Moreover, Kusa et al. showed that nWSS is equal to the True Negative Rate (TNR). The TNR is the proportion of irrelevant records that were correctly not reviewed at level of recall. The nWSS is useful to compare performance in terms of work saved -across datasets and models while controlling for dataset class imbalance. +across datasets and models while controlling for dataset class imbalance. The following table provides a hypothetical dataset example: @@ -262,11 +262,11 @@ related to the steep recall curve. Optional arguments for the command line are `--priors` to include prior knowledge, `--x_absolute` and `--y_absolute` to use absolute axes. -See `asreview plot -h` for all command line arguments. +See `asreview plot -h` for all command line arguments. ### Plotting multiple files -It is possible to show the curves of multiple files in one plot. Use this -syntax (replace `YOUR_ASREVIEW_FILE_1` and `YOUR_ASREVIEW_FILE_2` by the +It is possible to show the curves of multiple files in one plot. Use this +syntax (replace `YOUR_ASREVIEW_FILE_1` and `YOUR_ASREVIEW_FILE_2` by the asreview_files that you want to include in the plot): ```bash diff --git a/asreviewcontrib/insights/plot.py b/asreviewcontrib/insights/plot.py index 9dd8600..661532b 100644 --- a/asreviewcontrib/insights/plot.py +++ b/asreviewcontrib/insights/plot.py @@ -263,7 +263,7 @@ def _plot_recall( ax = _add_recall_info(ax, labels, x_absolute, y_absolute) if show_random: - ax = _add_random_curve(ax, labels, x_absolute, y_absolute) + ax = _add_random_curve(ax, labels, x_absolute, y_absolute) if show_perfect: ax = _add_perfect_curve(ax, labels, x_absolute, y_absolute) @@ -423,8 +423,16 @@ def _add_perfect_curve(ax, labels, x_absolute, y_absolute): n_docs = len(labels) # Create x and y arrays for step plot - x = np.arange(0, n_pos_docs + 1) if x_absolute else np.arange(0, n_pos_docs + 1) / n_docs # noqa: E501 - y = np.arange(0, n_pos_docs + 1) if y_absolute else np.arange(0, n_pos_docs + 1) / n_pos_docs # noqa: E501 + x = ( + np.arange(0, n_pos_docs + 1) + if x_absolute + else np.arange(0, n_pos_docs + 1) / n_docs + ) + y = ( + np.arange(0, n_pos_docs + 1) + if y_absolute + else np.arange(0, n_pos_docs + 1) / n_pos_docs + ) # Plot the stepwise perfect curve ax.step(x, y, color="grey", where="post") @@ -432,7 +440,6 @@ def _add_perfect_curve(ax, labels, x_absolute, y_absolute): return ax - def _add_wss_curve(ax, labels, x_absolute=False, y_absolute=False, legend_label=None): x, y = _wss_values(labels, x_absolute=x_absolute, y_absolute=y_absolute) ax.step(x, y, where="post", label=legend_label) diff --git a/docs/stats_explainer.py b/docs/stats_explainer.py index 893091b..2fb14fa 100644 --- a/docs/stats_explainer.py +++ b/docs/stats_explainer.py @@ -6,7 +6,6 @@ python docs/stats_explainer.py """ - import matplotlib.pyplot as plt import numpy as np diff --git a/pyproject.toml b/pyproject.toml index 09212cc..a687cfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,8 @@ packages = ["asreviewcontrib"] [tool.setuptools_scm] write_to = "asreviewcontrib/insights/_version.py" -[tool.ruff] +[tool.ruff.lint] select = ["E", "F", "UP", "I", "B"] -[tool.ruff.isort] +[tool.ruff.lint.isort] force-single-line = true diff --git a/tests/README.md b/tests/README.md index 445d476..1ca34b5 100644 --- a/tests/README.md +++ b/tests/README.md @@ -5,4 +5,4 @@ ``` asreview simulate benchmark:van_de_schoot_2017 -s sim_van_de_schoot_2017_stop_if_min.asreview --init_seed 535 --seed 400 --stop_if min asreview simulate benchmark:van_de_schoot_2017 -s sim_van_de_schoot_2017_stop_if_full.asreview --init_seed 535 --seed 400 --stop_if -1 -``` \ No newline at end of file +```