changed type checker to pyright

centre-for-humanities-computing · KennethEnevoldsen · Dec 6, 2023 · Dec 6, 2023 · Dec 6, 2023 · Dec 6, 2023
commit cac4d92fc50569f5658dfae2039584c1c5ddbbff
diff --git a/makefile b/makefile
@@ -4,7 +4,7 @@ install:
 
 static-type-check:
 	@echo "--- 🔍 Running static type check ---"
-	pyright .
+	pyright src/
 
 lint:
 	@echo "--- 🧹 Running linters ---"

diff --git a/pyproject.toml b/pyproject.toml
@@ -50,7 +50,7 @@ name = "Apache License 2.0"
 [project.optional-dependencies]
 dev = [
   "cruft>=2.0.0",
-  "pyright>=1.1.328",
+  "pyright>=1.1.339",
   "ruff>=0.0.270",
 ]
 tests = ["pytest>=7.1.2", "pytest-cov>=3.0.0", "pytest-instafail>=0.4.2"]

diff --git a/src/dacy/datasets/dane.py b/src/dacy/datasets/dane.py
@@ -14,13 +14,13 @@
 
 
 def dane(  # noqa
-    save_path: Optional[PathLike] = None,
-    splits: List[str] = ["train", "dev", "test"],  # noqa
+    save_path: Optional[PathLike] = None,  # type: ignore
+    splits: List[str] = ["train", "dev", "test"],  # noqa  # type: ignore
     redownload: bool = False,
     n_sents: int = 1,
     open_unverified_connection: bool = False,
     **kwargs,  # noqa
-) -> Union[List[Corpus], Corpus]:
+) -> Union[List[Corpus], Corpus]:   # type: ignore
     """Reads the DaNE dataset as a spacy Corpus.
 
     Args:
@@ -110,5 +110,5 @@ def dane(  # noqa
     for split in splits:
         corpora.append(Corpus(save_path / paths[split]))  # type: ignore
     if len(corpora) == 1:
-        return corpora[0]
+        return corpora[0]       # type: ignore
     return corpora
diff --git a/src/dacy/datasets/names.py b/src/dacy/datasets/names.py
@@ -8,10 +8,10 @@
 
 def load_names(
     min_count: int = 0,
-    ethnicity: Optional[str] = None,
-    gender: Optional[str] = None,
+    ethnicity: Optional[str] = None,  # type: ignore
+    gender: Optional[str] = None,  # type: ignore
     min_prop_gender: float = 0,
-) -> Dict[str, List[str]]:
+) -> Dict[str, List[str]]:  # type: ignore
     """Loads the names lookup table. Danish are from Danmarks statistik (2021).
     Muslim names are from Meldgaard (2005),
     https://nors.ku.dk/publikationer/webpublikationer/muslimske_fornavne/.
@@ -64,7 +64,7 @@ def load_names(
     }
 
 
-def muslim_names() -> Dict[str, List[str]]:
+def muslim_names() -> Dict[str, List[str]]:  # type: ignore
     """Returns a dictionary of Muslim names.
 
     Returns:
@@ -81,7 +81,7 @@ def muslim_names() -> Dict[str, List[str]]:
     return load_names(ethnicity="muslim")
 
 
-def danish_names() -> Dict[str, List[str]]:
+def danish_names() -> Dict[str, List[str]]: # type: ignore
     """Returns a dictionary of Danish names.
 
     Returns:
@@ -98,7 +98,7 @@ def danish_names() -> Dict[str, List[str]]:
     return load_names(ethnicity="danish")
 
 
-def female_names() -> Dict[str, List[str]]:
+def female_names() -> Dict[str, List[str]]:     # type: ignore
     """Returns a dictionary of Danish female names.
 
     Returns:
@@ -114,7 +114,7 @@ def female_names() -> Dict[str, List[str]]:
     return load_names(ethnicity="danish", gender="female", min_prop_gender=0.5)
 
 
-def male_names() -> Dict[str, List[str]]:
+def male_names() -> Dict[str, List[str]]:   # type: ignore
     """Returns a dictionary of Danish male names.
 
     Returns:

diff --git a/src/dacy/download.py b/src/dacy/download.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 
 from spacy.util import get_installed_models
-from tqdm import tqdm
+from tqdm import tqdm  # type: ignore
 
 DACY_DEFAULT_PATH = Path.home() / ".dacy"
 
@@ -40,10 +40,10 @@ def get_latest_version(model: str) -> str:
     versions = [mdl.split("-")[-1] for mdl in models_url if mdl.startswith(model)]
     versions = sorted(
         versions,
-        key=lambda s: [int(u) for u in s.split(".")],
+        key=lambda s: [int(u) for u in s.split(".")],  # type: ignore
         reverse=True,
     )
-    return versions[0]
+    return versions[0]  # type: ignore
 
 
 def models() -> list[str]:
@@ -69,7 +69,7 @@ def download_url(url: str, output_path: str) -> None:
         unit="B",
         unit_scale=True,
         miniters=1,
-        desc=url.split("/")[-1],
+        desc=url.split("/")[-1],  # type: ignore
     ) as t:
         urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
 
@@ -104,15 +104,15 @@ def download_model(
     if model in {"small", "medium", "large"}:
         latest_version = get_latest_version(model)
         model = f"da_dacy_{model}_trf-{latest_version}"
-    mdl_version = model.split("-")[-1]
+    mdl_version = model.split("-")[-1]  # type: ignore
 
     if model not in models_url:
         raise ValueError(
             f"The model '{model}' is not available in DaCy. Please use dacy.models() to see a"
             + " list of all models",
         )
 
-    mdl = model.split("-")[0]
+    mdl = model.split("-")[0]   # type: ignore
     if mdl in get_installed_models() and not force and version(mdl) == mdl_version:
         return mdl
     install(models_url[model])

diff --git a/src/dacy/hate_speech/wrapped_models.py b/src/dacy/hate_speech/wrapped_models.py
@@ -76,11 +76,11 @@ def make_offensive_transformer(
     nlp: Language,
     name: str,
     model: Model[List[Doc], FullTransformerBatch],
-    set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],
+    set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],  # type: ignore
     max_batch_items: int,
     doc_extension_trf_data: str,
     doc_extension_prediction: str,
-    labels: List[str],
+    labels: List[str],  # type: ignore
 ) -> SequenceClassificationTransformer:
     if not Doc.has_extension("is_offensive"):
         warn(
@@ -107,11 +107,11 @@ def make_offensive_transformer(
     # offensive
     if Doc.has_extension("is_offensive"):
 
-        def label_getter(doc) -> Optional[str]:  # noqa
+        def label_getter(doc) -> Optional[str]:  # noqa  # type: ignore
             if doc._.is_offensive == "offensive":
-                prob = getattr(doc._, f"{doc_extension_prediction}_prob")
+                prob = getattr(doc._, f"{doc_extension_prediction}_prob")  # type: ignore
                 if prob["prob"] is not None:
-                    return labels[int(prob["prob"].argmax())]
+                    return labels[int(prob["prob"].argmax())]  # type: ignore
             return doc._.is_offensive
 
         Doc.set_extension(doc_extension_prediction, getter=label_getter, force=True)

diff --git a/src/dacy/load.py b/src/dacy/load.py
@@ -12,7 +12,7 @@
 def load(
     model: str,
     force: bool = False,
-    **kwargs: Any,
+    **kwargs: Any,  # type: ignore
 ) -> Language:
     """Load a DaCy model as a SpaCy text processing pipeline. If the model is
     not downloaded it will also download the model.
@@ -38,7 +38,7 @@ def load(
     return spacy.load(path, **kwargs)
 
 
-def where_is_my_dacy(verbose: bool = True) -> Union[str, Path]:
+def where_is_my_dacy(verbose: bool = True) -> Union[str, Path]:  # type: ignore
     """Returns a path to where DaCy models are located. The default the model
     location can be configured with the environmental variable
     `DACY_CACHE_DIR`.

diff --git a/src/dacy/ner/fine_grained.py b/src/dacy/ner/fine_grained.py
@@ -18,10 +18,10 @@
 def create_finegrained_ner_component(
     nlp: Language,
     name: str,
-    size: Literal["small", "medium", "large"],
+    size: Literal["small", "medium", "large"],  # type: ignore
     transformer_name: str,
-    version: Optional[str],
-) -> Callable[[Doc], Doc]:
+    version: Optional[str],  # type: ignore
+) -> Callable[[Doc], Doc]:  # type: ignore
     """Create a fine grained NER component using the dacy models.
 
     Args:

diff --git a/src/dacy/score/input_length.py b/src/dacy/score/input_length.py
@@ -10,11 +10,11 @@
 
 
 def n_sents_score(
-    n_sents: Union[int, List[int]],
-    apply_fn: Callable,
+    n_sents: Union[int, List[int]],  # type: ignore
+    apply_fn: Callable,  # type: ignore
     dataset: str = "dane",
     split: str = "test",
-    score_fn: List[Union[str, Callable]] = ["token", "pos", "ents", "dep"],  # noqa
+    score_fn: List[Union[str, Callable]] = ["token", "pos", "ents", "dep"],  # noqa # type: ignore
     verbose: bool = True,
     **kwargs,  # noqa
 ) -> pd.DataFrame:

diff --git a/src/dacy/score/score.py b/src/dacy/score/score.py
@@ -3,7 +3,7 @@
 
 from copy import copy
 from functools import partial
-from time import time
+from time import time  # type: ignore
 from typing import Callable, Iterable
 
 import pandas as pd
@@ -17,7 +17,7 @@
 from ..utils import flatten_dict
 
 
-def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:
+def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:  # type: ignore
     """A utility getter for scoring entities without including MISC.
 
     Args:
@@ -27,15 +27,15 @@ def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:
     Returns:
         Iterable[Span]
     """
-    spans = getattr(doc, attr)
+    spans = getattr(doc, attr)  # type: ignore
     for span in spans:
         if span.label_ == "MISC":
             continue
         yield span
 
 
 def dep_getter(token, attr):  # noqa
-    dep = getattr(token, attr)
+    dep = getattr(token, attr)  # type: ignore
     dep = token.vocab.strings.as_string(dep).lower()
     return dep
 
@@ -149,7 +149,7 @@ def __score(augmenter):  # noqa: ANN001
         corpus_ = copy(corpus)
         corpus_.augmenter = augmenter
         scores_ls = []
-        for _i in range(k):
+        for _i in range(k):  # type: ignore
             s = time()
             examples = apply_fn(corpus_(nlp))  # type: ignore
             speed = time() - s

diff --git a/src/dacy/sentiment/wrapped_models.py b/src/dacy/sentiment/wrapped_models.py
@@ -133,11 +133,11 @@ def make_emotion_transformer(
     nlp: Language,
     name: str,
     model: Model[List[Doc], FullTransformerBatch],
-    set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],
+    set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],  # type: ignore
     max_batch_items: int,
     doc_extension_trf_data: str,
     doc_extension_prediction: str,
-    labels: List[str],
+    labels: List[str],  # type: ignore
 ) -> SequenceClassificationTransformer:
     if not Doc.has_extension("dacy/emotionally_laden"):
         warn(
@@ -164,11 +164,11 @@ def make_emotion_transformer(
     # an emotion
     if Doc.has_extension("dacy/emotionally_laden"):
 
-        def label_getter(doc) -> Optional[str]:  # noqa: ANN001
+        def label_getter(doc) -> Optional[str]:  # noqa: ANN001  # type: ignore
             if doc._.emotionally_laden == "emotional":
-                prob = getattr(doc._, f"{doc_extension_prediction}_prob")
+                prob = getattr(doc._, f"{doc_extension_prediction}_prob")  # type: ignore
                 if prob["prob"] is not None:
-                    return labels[int(prob["prob"].argmax())]
+                    return labels[int(prob["prob"].argmax())]  # type: ignore
             return doc._.emotionally_laden
 
         Doc.set_extension(doc_extension_prediction, getter=label_getter, force=True)