Skip to content

Commit

Permalink
Merge pull request #1375 from Sage-Bionetworks/develop-FDS-241-mypy-u…
Browse files Browse the repository at this point in the history
…tils

FDS-241 mypy utils files
  • Loading branch information
andrewelamb authored Feb 26, 2024
2 parents 3a6ff92 + 22b2172 commit 6462e63
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 52 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ jobs:
# poetry run mypy --install-types --non-interactive
# add here when enforced
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py schematic/visualization
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/utils/cli_utils.py schematic/utils/curie_utils.py schematic/utils/df_utils.py schematic/utils/general.py schematic/utils/google_api_utils.py schematic/utils/validate_rules_utils.py schematic/utils/viz_utils.py
#----------------------------------------------
# linting
Expand Down
2 changes: 1 addition & 1 deletion schematic/utils/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def extract(dictionary: Any, key: Any) -> Union[Any, None]:
return None
return dictionary.get(key)

return reduce(extract, keys, dictionary)
return reduce(extract, keys, dictionary) # type: ignore


def log_value_from_config(arg_name: str, config_value: Any) -> None:
Expand Down
27 changes: 16 additions & 11 deletions schematic/utils/curie_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""Curie utils"""

import logging
from typing import Any, Union

Context = dict[str, str]
Record = dict[str, Union[str, list, dict, None]]
Graph = list[Record]
Schema = dict[str, Any]

logger = logging.getLogger(__name__)

Expand All @@ -15,7 +20,7 @@ def extract_name_from_uri_or_curie(item: str) -> str:
raise ValueError("Error extracting name from URI or Curie.")


def expand_curie_to_uri(curie: str, context_info: dict[str, str]) -> str:
def expand_curie_to_uri(curie: str, context_info: Context) -> str:
"""Expand curie to uri based on the context given
parmas
Expand All @@ -36,27 +41,27 @@ def expand_curie_to_uri(curie: str, context_info: dict[str, str]) -> str:
return curie


def expand_curies_in_schema(schema):
def expand_curies_in_schema(schema: Schema) -> Schema:
"""Expand all curies in a SchemaOrg JSON-LD file into URI"""
context = schema["@context"]
graph = schema["@graph"]
context: Context = schema["@context"]
graph: Graph = schema["@graph"]
new_schema = {"@context": context, "@graph": [], "@id": schema["@id"]}
for record in graph:
new_record = {}
new_record: Record = {}
for key, value in record.items():
if isinstance(value, str):
new_record[expand_curie_to_uri(key, context)] = expand_curie_to_uri(
value, context
)
elif isinstance(value, list):
uri = expand_curie_to_uri(key, context)
if isinstance(value[0], dict):
new_record[expand_curie_to_uri(key, context)] = []
lst: list[dict[str, str]] = []
new_record[uri] = lst
for _item in value:
new_record[expand_curie_to_uri(key, context)].append(
{"@id": expand_curie_to_uri(_item["@id"], context)}
)
lst.append({"@id": expand_curie_to_uri(_item["@id"], context)})
else:
new_record[expand_curie_to_uri(key, context)] = [
new_record[uri] = [
expand_curie_to_uri(_item, context) for _item in value
]
elif isinstance(value, dict) and "@id" in value:
Expand All @@ -69,7 +74,7 @@ def expand_curies_in_schema(schema):
return new_schema


def uri2label(uri, schema):
def uri2label(uri: str, schema: Schema) -> list:
"""Given a URI, return the label"""
return [
record["rdfs:label"] for record in schema["@graph"] if record["@id"] == uri
Expand Down
9 changes: 6 additions & 3 deletions schematic/utils/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,21 @@ def find_and_convert_ints(dataframe: pd.DataFrame) -> tuple[pd.DataFrame, pd.Dat
if (
dataframe.size < large_manifest_cutoff_size
): # If small manifest, iterate as normal for improved performance
ints = dataframe.map(
ints = dataframe.map( # type:ignore
lambda cell: convert_ints(cell), na_action="ignore"
).fillna(False)

else: # parallelize iterations for large manifests
pandarallel.initialize(verbose=1)
ints = dataframe.parallel_applymap(
ints = dataframe.parallel_applymap( # type:ignore
lambda cell: convert_ints(cell), na_action="ignore"
).fillna(False)

# Identify cells converted to integers
is_int = ints.map(pd.api.types.is_integer)
is_int = ints.map(pd.api.types.is_integer) # type:ignore

assert isinstance(ints, pd.DataFrame)
assert isinstance(is_int, pd.DataFrame)

return ints, is_int

Expand Down
19 changes: 11 additions & 8 deletions schematic/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,17 @@ def str2list(item: Any) -> Optional[list]:
return None


def unlist(seq: Sequence) -> Any:
X = TypeVar("X")


def unlist(seq: Sequence[X]) -> Union[Sequence[X], X]:
"""Returns the first item of a sequence
Args:
seq (Sequence): Any sequence
seq (Sequence[X]): A Sequence of any type
Returns:
Any:
Union[Sequence[X], X]:
if sequence is length one, return the first item
otherwise return the sequence
"""
Expand Down Expand Up @@ -151,12 +154,12 @@ def check_synapse_cache_size(
size_in_mb = float(size.rstrip("M"))
byte_size = size_in_mb * 1000000
elif "G" in size:
size_in_gb = float(size.rstrip("G"))
size_in_gb = int(size.rstrip("G"))
byte_size = convert_gb_to_bytes(size_in_gb)
elif "B" in size:
byte_size = float(size.rstrip("B"))
else:
logger.error("Cannot recongize the file size unit")
logger.error("Cannot recognize the file size unit")
return byte_size


Expand Down Expand Up @@ -275,9 +278,9 @@ def profile(
Callable: Profile of the decorated function
"""

def inner(func):
def inner(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
def wrapper(*args: Any, **kwargs: Any) -> Callable:
_output_file = output_file or func.__name__ + ".prof"
profiler = Profile()
profiler.enable()
Expand All @@ -299,7 +302,7 @@ def wrapper(*args, **kwargs):
p_stats.sort_stats(*sort_by)
else:
p_stats.sort_stats(sort_by)
p_stats.print_stats(lines_to_print)
p_stats.print_stats(lines_to_print) # type: ignore
return retval

return wrapper
Expand Down
25 changes: 17 additions & 8 deletions schematic/utils/google_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import os
import logging
import json
from typing import Any, Union
from typing import Any, Union, no_type_check, TypedDict

import pandas as pd
from googleapiclient.discovery import build # type: ignore
from googleapiclient.discovery import build, Resource # type: ignore
from google.oauth2 import service_account # type: ignore
from schematic.configuration.configuration import CONFIG
from schematic.store.synapse import SynapseStorage
Expand All @@ -23,11 +23,18 @@
]


def build_service_account_creds() -> dict[str, Any]:
class GoogleServiceAcountCreds(TypedDict):
"Service account credentials for Google sheets"
sheet_service: Resource
drive_service: Resource
creds: service_account.Credentials


def build_service_account_creds() -> GoogleServiceAcountCreds:
"""Build Google service account credentials
Returns:
dict[str, Any]: The credentials
GoogleServiceAcountCreds: The credentials
"""
if "SERVICE_ACCOUNT_CREDS" in os.environ:
dict_creds = json.loads(os.environ["SERVICE_ACCOUNT_CREDS"])
Expand All @@ -48,15 +55,16 @@ def build_service_account_creds() -> dict[str, Any]:
)

# get a Google Sheet API service
sheet_service = build("sheets", "v4", credentials=credentials)
sheet_service: Resource = build("sheets", "v4", credentials=credentials)
# get a Google Drive API service
drive_service = build("drive", "v3", credentials=credentials)
drive_service: Resource = build("drive", "v3", credentials=credentials)

return {
creds: GoogleServiceAcountCreds = {
"sheet_service": sheet_service,
"drive_service": drive_service,
"creds": credentials,
}
return creds


def download_creds_file() -> None:
Expand Down Expand Up @@ -92,7 +100,8 @@ def download_creds_file() -> None:
)


def execute_google_api_requests(service: Any, requests_body: Any, **kwargs) -> Any:
@no_type_check
def execute_google_api_requests(service, requests_body, **kwargs) -> Any:
"""
Execute google API requests batch; attempt to execute in parallel.
Expand Down
Loading

0 comments on commit 6462e63

Please sign in to comment.