Skip to content

Commit

Permalink
add ruff, run formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
glass-ships committed Oct 24, 2023
1 parent 6cb89b3 commit 773e959
Show file tree
Hide file tree
Showing 35 changed files with 1,010 additions and 860 deletions.
21 changes: 7 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ build:
poetry build

.PHONY: test
test: install
$(RUN) python -m pytest
test:
$(RUN) pytest tests

.PHONY: docs
docs: install
docs:
$(RUN) typer src/koza/main.py utils docs --name koza --output docs/Usage/CLI.md
$(RUN) mkdocs build

Expand All @@ -43,17 +43,10 @@ clean:

.PHONY: lint
lint:
$(RUN) flake8 --exit-zero --max-line-length 120 koza/ tests/ examples/
$(RUN) black --check --diff koza tests
$(RUN) isort --check-only --diff koza tests
$(RUN) ruff check --diff --exit-zero src/ tests/ examples/
$(RUN) black --check --diff -l 120 src/ tests/ examples/

.PHONY: format
format:
$(RUN) autoflake \
--recursive \
--remove-all-unused-imports \
--remove-unused-variables \
--ignore-init-module-imports \
--in-place koza tests examples
$(RUN) isort koza tests examples
$(RUN) black koza tests examples
$(RUN) ruff check --fix --exit-zero src/ tests/ examples/
$(RUN) black -l 120 src/ tests/ examples/
4 changes: 0 additions & 4 deletions examples/maps/custom-entrez-2-string.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
from pathlib import Path
import yaml

from koza.model.config.source_config import FormatType, OutputFormat, PrimaryFileConfig
from koza.cli_runner import get_koza_app

source_name = 'custom-map-protein-links-detailed'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
gene_b = Gene(id='NCBIGene:' + entrez_2_string[row['protein2']]['entrez'])

pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
id="uuid:" + str(uuid.uuid1()),
subject=gene_a.id,
object=gene_b.id,
predicate="biolink:interacts_with"
id="uuid:" + str(uuid.uuid1()), subject=gene_a.id, object=gene_b.id, predicate="biolink:interacts_with"
)

koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
6 changes: 2 additions & 4 deletions examples/string-w-map/map-protein-links-detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@
koza_map = koza_app.get_map(map_name)

from loguru import logger

logger.info(koza_map)

gene_a = Gene(id='NCBIGene:' + koza_map[row['protein1']]['entrez'])
gene_b = Gene(id='NCBIGene:' + koza_map[row['protein2']]['entrez'])

pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
id="uuid:" + str(uuid.uuid1()),
subject=gene_a.id,
object=gene_b.id,
predicate="biolink:interacts_with"
id="uuid:" + str(uuid.uuid1()), subject=gene_a.id, object=gene_b.id, predicate="biolink:interacts_with"
)

koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
1 change: 0 additions & 1 deletion examples/string/protein-links-detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
koza_app = get_koza_app('protein-links-detailed')

for row in koza_app.source:

protein_a = Protein(id='ENSEMBL:' + re.sub(r'\d+\.', '', row['protein1']))
protein_b = Protein(id='ENSEMBL:' + re.sub(r'\d+\.', '', row['protein2']))

Expand Down
1,523 changes: 877 additions & 646 deletions poetry.lock

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ packages = [

[tool.poetry.dependencies]
python = "^3.8"
linkml-validator = ">=0.4.4"
pydantic = "^1.0.0"
# linkml-validator = ">=0.4.4"
pydantic = "^2.4"
pyyaml = ">=5.0.0"
requests = "^2.24.0"
ordered-set = ">=4.1.0"
Expand All @@ -27,16 +27,14 @@ loguru = "*"
sssom = "^0.3.41"

[tool.poetry.dev-dependencies]
black = "^23.10.0"
ruff = "*"
pytest = ">=6.0.0"
biolink-model = ">=3.0.1"
autoflake = "^1.3.1"
black = "22.3.0"
isort = "^5.0.6"
dask = ">=2022.5.2"
mkdocs = ">=1.3.0"
mkdocs-material = ">=8.3.4"
mkdocstrings = "^0.19.0"
mkdocstrings-python = "*"
mkdocs = ">=1.4.2"
mkdocs-material = ">=9.1.16"
mkdocstrings = {extras = ["python"], version = ">=0.22.0"}

[tool.poetry.scripts]
koza = "koza.main:typer_app"
Expand Down
3 changes: 2 additions & 1 deletion src/koza/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from importlib import metadata
__version__ = metadata.version("koza")

__version__ = metadata.version("koza")
5 changes: 2 additions & 3 deletions src/koza/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
output_dir: str = './output',
output_format: OutputFormat = OutputFormat('jsonl'),
schema: str = None,
logger = None,
logger=None,
):
self.source = source
self.translation_table = translation_table
Expand All @@ -50,7 +50,7 @@ def __init__(
for map_file in source.config.depends_on:
with open(map_file, 'r') as map_file_fh:
map_file_config = MapFileConfig(**yaml.load(map_file_fh, Loader=UniqueIncludeLoader))
map_file_config.transform_code = (str(Path(map_file).parent / Path(map_file).stem) + '.py')
map_file_config.transform_code = str(Path(map_file).parent / Path(map_file).stem) + '.py'
self._map_registry[map_file_config.name] = Source(map_file_config)

def get_map(self, map_name: str):
Expand Down Expand Up @@ -169,7 +169,6 @@ def _get_writer(self) -> Union[TSVWriter, JSONLWriter]:
return JSONLWriter(*writer_params)

def _load_map(self, map_file: Source):

if not isinstance(map_file.config, MapFileConfig):
raise ValueError(f"Error loading map: {map_file.config.name} is not a MapFileConfig")

Expand Down
20 changes: 9 additions & 11 deletions src/koza/cli_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from pathlib import Path
from typing import Dict, List, Optional, Union
from typing import Dict, Optional, Union
import yaml

from koza.app import KozaApp
Expand Down Expand Up @@ -45,7 +45,7 @@ def transform_source(
log: bool = False,
):
"""Create a KozaApp object, process maps, and run the transform
Args:
source (str): Path to source metadata file
output_dir (str): Path to output directory
Expand All @@ -57,11 +57,11 @@ def transform_source(
verbose (bool, optional): Verbose logging. Defaults to None.
log (bool, optional): Log to file. Defaults to False.
"""
logger = get_logger(name = Path(source).name if log else None, verbose = verbose)
logger = get_logger(name=Path(source).name if log else None, verbose=verbose)

with open(source, 'r') as source_fh:
source_config = PrimaryFileConfig(**yaml.load(source_fh, Loader=UniqueIncludeLoader))

if not source_config.name:
source_config.name = Path(source).stem

Expand All @@ -74,7 +74,7 @@ def transform_source(
translation_table = get_translation_table(
global_table if global_table else source_config.global_table,
local_table if local_table else source_config.local_table,
logger
logger,
)

koza_app = _set_koza_app(koza_source, translation_table, output_dir, output_format, schema, logger)
Expand All @@ -97,7 +97,6 @@ def validate_file(
"""

with open_resource(file) as resource_io:

if format == FormatType.csv:
reader = CSVReader(
resource_io,
Expand All @@ -117,9 +116,9 @@ def validate_file(


def get_translation_table(
global_table: Union[str, Dict] = None,
global_table: Union[str, Dict] = None,
local_table: Union[str, Dict] = None,
logger = None,
logger=None,
) -> TranslationTable:
"""Create a translation table object from two file paths
Expand All @@ -140,7 +139,6 @@ def get_translation_table(
else:
logger.debug("No global table used for transform")
else:

if isinstance(global_table, str):
with open(global_table, 'r') as global_tt_fh:
global_tt = yaml.safe_load(global_tt_fh)
Expand All @@ -166,7 +164,7 @@ def _set_koza_app(
output_dir: str = './output',
output_format: OutputFormat = OutputFormat('tsv'),
schema: str = None,
logger = None,
logger=None,
) -> KozaApp:
"""Create a KozaApp object for a given source"""

Expand Down
11 changes: 4 additions & 7 deletions src/koza/converter/kgx_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Iterable, Tuple
from pydantic import BaseModel


class KGXConverter:
"""
Converts the biolink model to the KGX format, which splits
Expand All @@ -15,28 +16,24 @@ class KGXConverter:
"""

def convert(self, entities: Iterable) -> Tuple[list, list]:

nodes = []
edges = []

for entity in entities:

# if entity has subject + object + predicate, treat as edge
if all(hasattr(entity, attr) for attr in ["subject", "object", "predicate"]):
edges.append(self.convert_association(entity))

# if entity has id and name, but not subject/object/predicate, treat as node
elif (
all(hasattr(entity, attr) for attr in ["id", "name"])
and not all(hasattr(entity, attr) for attr in ["subject", "object", "predicate"])
):
elif all(hasattr(entity, attr) for attr in ["id", "name"]) and not all(
hasattr(entity, attr) for attr in ["subject", "object", "predicate"]
):
nodes.append(self.convert_node(entity))

# otherwise, not a valid entity
else:
raise ValueError(
f"Cannot convert {entity}: Can only convert NamedThing or Association entities to KGX compatible dictionaries"

)

return nodes, edges
Expand Down
27 changes: 7 additions & 20 deletions src/koza/io/reader/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def __iter__(self) -> Iterator:
return self

def __next__(self) -> Dict[str, Any]:

if not self._header:
self._set_header()

Expand Down Expand Up @@ -148,14 +147,10 @@ def __next__(self) -> Dict[str, Any]:
field_map = dict(zip(self._header, stripped_row))

if fields_len > row_len:
raise ValueError(
f"CSV file {self.name} has {fields_len - row_len} fewer columns at {self.reader.line_num}"
)
raise ValueError(f"CSV file {self.name} has {fields_len - row_len} fewer columns at {self.reader.line_num}")

elif fields_len < row_len:
logger.warning(
f"CSV file {self.name} has {row_len - fields_len} extra columns at {self.reader.line_num}"
)
logger.warning(f"CSV file {self.name} has {row_len - fields_len} extra columns at {self.reader.line_num}")
# Not sure if this would serve a purpose
#
# if not 'extra_cols' in self.field_type_map:
Expand Down Expand Up @@ -201,27 +196,19 @@ def _set_header(self):
self._header = list(self.field_type_map.keys())
else:
raise ValueError(
f"there is no header and columns have not been supplied\n"
f"configure the 'columns' property in the source yaml"
"there is no header and columns have not been supplied\n"
"configure the 'columns' property in the source yaml"
)

def _parse_header_line(self, skip_blank_or_commented_lines: bool = False) -> List[str]:
"""
Parse the header line and return a list of headers
"""
fieldnames = next(
reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect})
)
fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))
if skip_blank_or_commented_lines:
# there has to be a cleaner way to do this
while not fieldnames or (
self.comment_char is not None and fieldnames[0].startswith(self.comment_char)
):
fieldnames = next(
reader(
self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}
)
)
while not fieldnames or (self.comment_char is not None and fieldnames[0].startswith(self.comment_char)):
fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))

fieldnames[0] = fieldnames[0].lstrip(self.comment_char)
return [f.strip() for f in fieldnames]
Expand Down
8 changes: 4 additions & 4 deletions src/koza/io/reader/json_reader.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import json, yaml
from typing import IO, Any, Dict, Iterator, List, Union
#from xmlrpc.client import Boolean

# from xmlrpc.client import Boolean

from koza.io.utils import check_data

# from koza.utils.log_utils import get_logger
# logger = get_logger(__name__)
# import logging
# logger = logging.getLogger(__name__)
from loguru import logger


class JSONReader:
"""
A JSON reader that optionally iterates over a json list
Expand Down Expand Up @@ -36,8 +39,6 @@ def __init__(
self.json_path = json_path
self.name = name



if self.json_path:
if is_yaml:
self.json_obj = yaml.safe_load(self.io_str)
Expand Down Expand Up @@ -68,7 +69,6 @@ def __iter__(self) -> Iterator:
return self

def __next__(self) -> Dict[str, Any]:

if self._line_num == self._line_limit:
logger.info(f"Finished processing {self._line_num} rows for {self.name} from file {self.io_str.name}")
raise StopIteration
Expand Down
Loading

0 comments on commit 773e959

Please sign in to comment.