centre-for-humanities-computing · KennethEnevoldsen · Oct 30, 2024 · Oct 30, 2024 · coderabbitai · Oct 30, 2024
diff --git a/.cruft.json b/.cruft.json
@@ -1,6 +1,6 @@
 {
   "template": "https://github.com/KennethEnevoldsen/swift-python-cookiecutter",
-  "commit": "e02068889310225ea4f65ea0b203c2949e1597a9",
+  "commit": "85413085032f305896da8bad287a83d53fb0b196",
   "checkout": null,
   "context": {
     "cookiecutter": {

diff --git a/.github/workflows/static_type_checks.yml b/.github/workflows/static_type_checks.yml
@@ -1,3 +1,10 @@
+# THIS ACTION WILL:
+  # 1. Install dependencies
+  # 2. Run static type checker
+
+# SETUP:
+  # None required except for the Makefile
+
 name: static_type_checks
 
 on:

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,17 +1,19 @@
-# This workflow will:
-# 1) install Python dependencies
-# 2) run make test
+# THIS ACTION WILL:
+  # 1) install Python dependencies
+  # 2) run make test
 
+# SETUP:
+  # None required except for the Makefile
 
-name: Tests
+name: test
 on:
   push:
     branches: [main]
   pull_request:
     branches: [main]
 
 jobs:
-  pytest:
+  test:
     runs-on: ${{ matrix.os }}
     permissions:
       contents: read

diff --git a/makefile b/makefile
@@ -9,9 +9,9 @@ static-type-check:
 lint:
 	@echo "--- 🧹 Running linters ---"
 	ruff format .  								# running ruff formatting
-	ruff src/ --fix  						    # running ruff linting
-	ruff tests/ --fix
-	ruff docs/conf.py --fix
+	ruff check src/ --fix  						    # running ruff linting
+	ruff check tests/ --fix
+	ruff check docs/conf.py --fix
 
 test:
 	@echo "--- 🧪 Running tests ---"

diff --git a/pyproject.toml b/pyproject.toml
@@ -48,11 +48,7 @@ repository = "https://github.com/centre-for-humanities-computing/DaCy"
 file = "LICENSE"
 name = "Apache License 2.0"
 [project.optional-dependencies]
-dev = [
-  "cruft>=2.0.0",
-  "pyright>=1.1.339",
-  "ruff>=0.0.270",
-]
+dev = ["cruft>=2.0.0", "pyright>=1.1.339", "ruff==0.7.1"]
 tests = ["pytest>=7.1.2", "pytest-cov>=3.0.0", "pytest-instafail>=0.4.2"]
 docs = [
   "sphinx==5.3.0",
@@ -110,6 +106,42 @@ pythonPlatform = "Darwin"
 
 [tool.ruff]
 # extend-include = ["*.ipynb"]
+
+# Exclude a variety of commonly ignored directories.
+exclude = [
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".hg",
+  ".nox",
+  ".pants.d",
+  ".pytype",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+  "__init__.py",
+  ".venv",
+  ".env",
+  ".git",
+  "__pycache__",
+  "dev/**",
+  "training/main/**",
+  "training/ner_fine_grained/**",
+  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
+  "docs/performance_testing_utils/**",
+]
-exclude = [
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".nox",
-  ".pants.d",
-  ".pytype",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "__init__.py",
-  ".venv",
-  ".env",
-  ".git",
-  "__pycache__",
-  "dev/**",
-  "training/main/**",
-  "training/ner_fine_grained/**",
-  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
-  "docs/performance_testing_utils/**",
-]
+exclude = [
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".hg",
+  ".nox",
+  ".pants.d",
+  ".pytype",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+  "__init__.py",
+  ".env",
+  "__pycache__",
+  "dev/**",
+  "training/main/**",
+  "training/ner_fine_grained/**",
+  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
+  "docs/performance_testing_utils/**",
+]
-exclude = [
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".nox",
-  ".pants.d",
-  ".pytype",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "__init__.py",
-  ".venv",
-  ".env",
-  ".git",
-  "__pycache__",
-  "dev/**",
-  "training/main/**",
-  "training/ner_fine_grained/**",
-  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
-  "docs/performance_testing_utils/**",
-]
+exclude = [
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".hg",
+  ".nox",
+  ".pants.d",
+  ".pytype",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+  "__init__.py",
+  ".env",
+  "__pycache__",
+  "dev/**",
+  "training/main/**",
+  "training/ner_fine_grained/**",
+  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
+  "docs/performance_testing_utils/**",
+]
+target-version = "py38"
-target-version = "py38"
+target-version = "py39"
-target-version = "py38"
+target-version = "py39"
+
+[tool.ruff.lint]
 # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
 select = [
   "A",
@@ -150,54 +182,23 @@ ignore = [
   "ANN202",
   "COM812",
 ]
-ignore-init-module-imports = true
 # Allow autofix for all enabled rules (when `--fix`) is provided.
 unfixable = ["ERA"]
-# Exclude a variety of commonly ignored directories.
-exclude = [
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".nox",
-  ".pants.d",
-  ".pytype",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "__init__.py",
-  ".venv",
-  ".env",
-  ".git",
-  "__pycache__",
-  "dev/**",
-  "training/main/**",
-  "training/ner_fine_grained/**",
-  "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
-  "docs/performance_testing_utils/**",
-]
+
+
 # Allow unused variables when underscore-prefixed.
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
-target-version = "py38"
+
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 
-[tool.ruff.flake8-annotations]
+[tool.ruff.lint.flake8-annotations]
 mypy-init-return = true
 suppress-none-returning = true
 
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10
 

diff --git a/src/dacy/download.py b/src/dacy/download.py
@@ -1,4 +1,5 @@
 """Functions for downloading DaCy models."""
+
 import os
 from importlib.metadata import version
 from pathlib import Path

diff --git a/src/dacy/load.py b/src/dacy/load.py
@@ -1,4 +1,5 @@
 """Functionality for loading and locating DaCy models."""
+
 import warnings
 from pathlib import Path
 from typing import Any, Union

diff --git a/src/dacy/score/input_length.py b/src/dacy/score/input_length.py
@@ -1,5 +1,6 @@
 """Contains functions for testing the performance of models on varying input
 length."""
+
 from typing import Callable, List, Union
 
 import pandas as pd

diff --git a/src/dacy/score/score.py b/src/dacy/score/score.py
@@ -1,4 +1,5 @@
 """This includes function for scoring models applied to a SpaCy corpus."""
+
 from __future__ import annotations
 
 from copy import copy
@@ -163,7 +164,7 @@ def __score(augmenter):  # noqa: ANN001
 
         # and collapse list to dict
         for key in scores:  # type: ignore
-            scores[key] = [s[key] if key in s else None for s in scores_ls]  # type: ignore
+            scores[key] = [s.get([key], None) for s in scores_ls]  # type: ignore
-            scores[key] = [s.get([key], None) for s in scores_ls]  # type: ignore
+            scores[key] = [s.get(key, None) for s in scores_ls]  # type: ignore
-            scores[key] = [s.get([key], None) for s in scores_ls]  # type: ignore
+            scores[key] = [s.get(key, None) for s in scores_ls]  # type: ignore
 
         scores["k"] = list(range(k))  # type: ignore
 

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -1,9 +1,10 @@
-import dacy
-from dacy.datasets import dane, female_names, male_names, muslim_names
 from spacy.lang.da import Danish
 from spacy.training import Example
 from spacy.training.corpus import Corpus
 
+import dacy
+from dacy.datasets import dane, female_names, male_names, muslim_names
+
 
 def test_dane():
     train, dev, test = dane(open_unverified_connection=True)  # type: ignore

diff --git a/tests/test_download.py b/tests/test_download.py
@@ -1,5 +1,6 @@
-import dacy
 import pytest
+
+import dacy
 from dacy.load import load
 
 

diff --git a/tests/test_hate_speech.py b/tests/test_hate_speech.py
@@ -1,6 +1,7 @@
-import dacy
 import spacy
 
+import dacy
+
 
 def test_add_hate_speech_detection():
     nlp = spacy.blank("da")

diff --git a/tests/test_score.py b/tests/test_score.py
@@ -1,9 +1,10 @@
 import pandas as pd
-from dacy.datasets import dane
-from dacy.score import n_sents_score, score
 from spacy.lang.da import Danish
 from spacy.training.augment import create_lower_casing_augmenter
 
+from dacy.datasets import dane
+from dacy.score import n_sents_score, score
+
 
 def test_score():
     nlp = Danish()

diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py
@@ -1,6 +1,7 @@
-import dacy
 import spacy
 
+import dacy
+
 
 def test_add_subjectivity():
     nlp = spacy.blank("da")