Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated cruft template #278

Merged
merged 6 commits into from
Dec 6, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
changed type checker to pyright
  • Loading branch information
KennethEnevoldsen committed Dec 6, 2023
commit cac4d92fc50569f5658dfae2039584c1c5ddbbff
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ install:

static-type-check:
@echo "--- 🔍 Running static type check ---"
pyright .
pyright src/

lint:
@echo "--- 🧹 Running linters ---"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ name = "Apache License 2.0"
[project.optional-dependencies]
dev = [
"cruft>=2.0.0",
"pyright>=1.1.328",
"pyright>=1.1.339",
"ruff>=0.0.270",
]
tests = ["pytest>=7.1.2", "pytest-cov>=3.0.0", "pytest-instafail>=0.4.2"]
8 changes: 4 additions & 4 deletions src/dacy/datasets/dane.py
Original file line number Diff line number Diff line change
@@ -14,13 +14,13 @@


def dane( # noqa
save_path: Optional[PathLike] = None,
splits: List[str] = ["train", "dev", "test"], # noqa
save_path: Optional[PathLike] = None, # type: ignore
splits: List[str] = ["train", "dev", "test"], # noqa # type: ignore
redownload: bool = False,
n_sents: int = 1,
open_unverified_connection: bool = False,
**kwargs, # noqa
) -> Union[List[Corpus], Corpus]:
) -> Union[List[Corpus], Corpus]: # type: ignore
"""Reads the DaNE dataset as a spacy Corpus.

Args:
@@ -110,5 +110,5 @@ def dane( # noqa
for split in splits:
corpora.append(Corpus(save_path / paths[split])) # type: ignore
if len(corpora) == 1:
return corpora[0]
return corpora[0] # type: ignore
return corpora
14 changes: 7 additions & 7 deletions src/dacy/datasets/names.py
Original file line number Diff line number Diff line change
@@ -8,10 +8,10 @@

def load_names(
min_count: int = 0,
ethnicity: Optional[str] = None,
gender: Optional[str] = None,
ethnicity: Optional[str] = None, # type: ignore
gender: Optional[str] = None, # type: ignore
min_prop_gender: float = 0,
) -> Dict[str, List[str]]:
) -> Dict[str, List[str]]: # type: ignore
"""Loads the names lookup table. Danish are from Danmarks statistik (2021).
Muslim names are from Meldgaard (2005),
https://nors.ku.dk/publikationer/webpublikationer/muslimske_fornavne/.
@@ -64,7 +64,7 @@ def load_names(
}


def muslim_names() -> Dict[str, List[str]]:
def muslim_names() -> Dict[str, List[str]]: # type: ignore
"""Returns a dictionary of Muslim names.

Returns:
@@ -81,7 +81,7 @@ def muslim_names() -> Dict[str, List[str]]:
return load_names(ethnicity="muslim")


def danish_names() -> Dict[str, List[str]]:
def danish_names() -> Dict[str, List[str]]: # type: ignore
"""Returns a dictionary of Danish names.

Returns:
@@ -98,7 +98,7 @@ def danish_names() -> Dict[str, List[str]]:
return load_names(ethnicity="danish")


def female_names() -> Dict[str, List[str]]:
def female_names() -> Dict[str, List[str]]: # type: ignore
"""Returns a dictionary of Danish female names.

Returns:
@@ -114,7 +114,7 @@ def female_names() -> Dict[str, List[str]]:
return load_names(ethnicity="danish", gender="female", min_prop_gender=0.5)


def male_names() -> Dict[str, List[str]]:
def male_names() -> Dict[str, List[str]]: # type: ignore
"""Returns a dictionary of Danish male names.

Returns:
12 changes: 6 additions & 6 deletions src/dacy/download.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
from pathlib import Path

from spacy.util import get_installed_models
from tqdm import tqdm
from tqdm import tqdm # type: ignore

DACY_DEFAULT_PATH = Path.home() / ".dacy"

@@ -40,10 +40,10 @@ def get_latest_version(model: str) -> str:
versions = [mdl.split("-")[-1] for mdl in models_url if mdl.startswith(model)]
versions = sorted(
versions,
key=lambda s: [int(u) for u in s.split(".")],
key=lambda s: [int(u) for u in s.split(".")], # type: ignore
reverse=True,
)
return versions[0]
return versions[0] # type: ignore


def models() -> list[str]:
@@ -69,7 +69,7 @@ def download_url(url: str, output_path: str) -> None:
unit="B",
unit_scale=True,
miniters=1,
desc=url.split("/")[-1],
desc=url.split("/")[-1], # type: ignore
) as t:
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)

@@ -104,15 +104,15 @@ def download_model(
if model in {"small", "medium", "large"}:
latest_version = get_latest_version(model)
model = f"da_dacy_{model}_trf-{latest_version}"
mdl_version = model.split("-")[-1]
mdl_version = model.split("-")[-1] # type: ignore

if model not in models_url:
raise ValueError(
f"The model '{model}' is not available in DaCy. Please use dacy.models() to see a"
+ " list of all models",
)

mdl = model.split("-")[0]
mdl = model.split("-")[0] # type: ignore
if mdl in get_installed_models() and not force and version(mdl) == mdl_version:
return mdl
install(models_url[model])
10 changes: 5 additions & 5 deletions src/dacy/hate_speech/wrapped_models.py
Original file line number Diff line number Diff line change
@@ -76,11 +76,11 @@ def make_offensive_transformer(
nlp: Language,
name: str,
model: Model[List[Doc], FullTransformerBatch],
set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],
set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None], # type: ignore
max_batch_items: int,
doc_extension_trf_data: str,
doc_extension_prediction: str,
labels: List[str],
labels: List[str], # type: ignore
) -> SequenceClassificationTransformer:
if not Doc.has_extension("is_offensive"):
warn(
@@ -107,11 +107,11 @@ def make_offensive_transformer(
# offensive
if Doc.has_extension("is_offensive"):

def label_getter(doc) -> Optional[str]: # noqa
def label_getter(doc) -> Optional[str]: # noqa # type: ignore
if doc._.is_offensive == "offensive":
prob = getattr(doc._, f"{doc_extension_prediction}_prob")
prob = getattr(doc._, f"{doc_extension_prediction}_prob") # type: ignore
if prob["prob"] is not None:
return labels[int(prob["prob"].argmax())]
return labels[int(prob["prob"].argmax())] # type: ignore
return doc._.is_offensive

Doc.set_extension(doc_extension_prediction, getter=label_getter, force=True)
4 changes: 2 additions & 2 deletions src/dacy/load.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
def load(
model: str,
force: bool = False,
**kwargs: Any,
**kwargs: Any, # type: ignore
) -> Language:
"""Load a DaCy model as a SpaCy text processing pipeline. If the model is
not downloaded it will also download the model.
@@ -38,7 +38,7 @@ def load(
return spacy.load(path, **kwargs)


def where_is_my_dacy(verbose: bool = True) -> Union[str, Path]:
def where_is_my_dacy(verbose: bool = True) -> Union[str, Path]: # type: ignore
"""Returns a path to where DaCy models are located. The default the model
location can be configured with the environmental variable
`DACY_CACHE_DIR`.
6 changes: 3 additions & 3 deletions src/dacy/ner/fine_grained.py
Original file line number Diff line number Diff line change
@@ -18,10 +18,10 @@
def create_finegrained_ner_component(
nlp: Language,
name: str,
size: Literal["small", "medium", "large"],
size: Literal["small", "medium", "large"], # type: ignore
transformer_name: str,
version: Optional[str],
) -> Callable[[Doc], Doc]:
version: Optional[str], # type: ignore
) -> Callable[[Doc], Doc]: # type: ignore
"""Create a fine grained NER component using the dacy models.

Args:
6 changes: 3 additions & 3 deletions src/dacy/score/input_length.py
Original file line number Diff line number Diff line change
@@ -10,11 +10,11 @@


def n_sents_score(
n_sents: Union[int, List[int]],
apply_fn: Callable,
n_sents: Union[int, List[int]], # type: ignore
apply_fn: Callable, # type: ignore
dataset: str = "dane",
split: str = "test",
score_fn: List[Union[str, Callable]] = ["token", "pos", "ents", "dep"], # noqa
score_fn: List[Union[str, Callable]] = ["token", "pos", "ents", "dep"], # noqa # type: ignore
verbose: bool = True,
**kwargs, # noqa
) -> pd.DataFrame:
10 changes: 5 additions & 5 deletions src/dacy/score/score.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@

from copy import copy
from functools import partial
from time import time
from time import time # type: ignore
from typing import Callable, Iterable

import pandas as pd
@@ -17,7 +17,7 @@
from ..utils import flatten_dict


def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:
def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]: # type: ignore
"""A utility getter for scoring entities without including MISC.

Args:
@@ -27,15 +27,15 @@ def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:
Returns:
Iterable[Span]
"""
spans = getattr(doc, attr)
spans = getattr(doc, attr) # type: ignore
for span in spans:
if span.label_ == "MISC":
continue
yield span


def dep_getter(token, attr): # noqa
dep = getattr(token, attr)
dep = getattr(token, attr) # type: ignore
dep = token.vocab.strings.as_string(dep).lower()
return dep

@@ -149,7 +149,7 @@ def __score(augmenter): # noqa: ANN001
corpus_ = copy(corpus)
corpus_.augmenter = augmenter
scores_ls = []
for _i in range(k):
for _i in range(k): # type: ignore
s = time()
examples = apply_fn(corpus_(nlp)) # type: ignore
speed = time() - s
10 changes: 5 additions & 5 deletions src/dacy/sentiment/wrapped_models.py
Original file line number Diff line number Diff line change
@@ -133,11 +133,11 @@ def make_emotion_transformer(
nlp: Language,
name: str,
model: Model[List[Doc], FullTransformerBatch],
set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None],
set_extra_annotations: Callable[[List[Doc], FullTransformerBatch], None], # type: ignore
max_batch_items: int,
doc_extension_trf_data: str,
doc_extension_prediction: str,
labels: List[str],
labels: List[str], # type: ignore
) -> SequenceClassificationTransformer:
if not Doc.has_extension("dacy/emotionally_laden"):
warn(
@@ -164,11 +164,11 @@ def make_emotion_transformer(
# an emotion
if Doc.has_extension("dacy/emotionally_laden"):

def label_getter(doc) -> Optional[str]: # noqa: ANN001
def label_getter(doc) -> Optional[str]: # noqa: ANN001 # type: ignore
if doc._.emotionally_laden == "emotional":
prob = getattr(doc._, f"{doc_extension_prediction}_prob")
prob = getattr(doc._, f"{doc_extension_prediction}_prob") # type: ignore
if prob["prob"] is not None:
return labels[int(prob["prob"].argmax())]
return labels[int(prob["prob"].argmax())] # type: ignore
return doc._.emotionally_laden

Doc.set_extension(doc_extension_prediction, getter=label_getter, force=True)
Loading