add ruff, run formatting

monarch-initiative · Oct 24, 2023 · 773e959 · 773e959
1 parent 6cb89b3
commit 773e959
Show file tree

Hide file tree

Showing 35 changed files with 1,010 additions and 860 deletions.
diff --git a/Makefile b/Makefile
@@ -25,11 +25,11 @@ build:
 	poetry build
 
 .PHONY: test
-test: install
-	$(RUN) python -m pytest
+test:
+	$(RUN) pytest tests
 
 .PHONY: docs
-docs: install
+docs:
 	$(RUN) typer src/koza/main.py utils docs --name koza --output docs/Usage/CLI.md
 	$(RUN) mkdocs build
 
@@ -43,17 +43,10 @@ clean:
 
 .PHONY: lint
 lint:
-	$(RUN) flake8 --exit-zero --max-line-length 120 koza/ tests/ examples/
-	$(RUN) black --check --diff koza tests
-	$(RUN) isort --check-only --diff koza tests
+	$(RUN) ruff check --diff --exit-zero src/ tests/ examples/
+	$(RUN) black --check --diff -l 120 src/ tests/ examples/
 
 .PHONY: format
 format:
-	$(RUN) autoflake \
-		--recursive \
-		--remove-all-unused-imports \
-		--remove-unused-variables \
-		--ignore-init-module-imports \
-		--in-place koza tests examples
-	$(RUN) isort koza tests examples
-	$(RUN) black koza tests examples
+	$(RUN) ruff check --fix --exit-zero src/ tests/ examples/
+	$(RUN) black -l 120 src/ tests/ examples/
diff --git a/examples/maps/custom-entrez-2-string.py b/examples/maps/custom-entrez-2-string.py
@@ -1,7 +1,3 @@
-from pathlib import Path
-import yaml
-
-from koza.model.config.source_config import FormatType, OutputFormat, PrimaryFileConfig
 from koza.cli_runner import get_koza_app
 
 source_name = 'custom-map-protein-links-detailed'

diff --git a/examples/string-w-custom-map/custom-map-protein-links-detailed.py b/examples/string-w-custom-map/custom-map-protein-links-detailed.py
@@ -13,10 +13,7 @@
 gene_b = Gene(id='NCBIGene:' + entrez_2_string[row['protein2']]['entrez'])
 
 pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
-    id="uuid:" + str(uuid.uuid1()),
-    subject=gene_a.id,
-    object=gene_b.id,
-    predicate="biolink:interacts_with"
+    id="uuid:" + str(uuid.uuid1()), subject=gene_a.id, object=gene_b.id, predicate="biolink:interacts_with"
 )
 
 koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
diff --git a/examples/string-w-map/map-protein-links-detailed.py b/examples/string-w-map/map-protein-links-detailed.py
@@ -12,16 +12,14 @@
 koza_map = koza_app.get_map(map_name)
 
 from loguru import logger
+
 logger.info(koza_map)
 
 gene_a = Gene(id='NCBIGene:' + koza_map[row['protein1']]['entrez'])
 gene_b = Gene(id='NCBIGene:' + koza_map[row['protein2']]['entrez'])
 
 pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
-    id="uuid:" + str(uuid.uuid1()),
-    subject=gene_a.id,
-    object=gene_b.id,
-    predicate="biolink:interacts_with"
+    id="uuid:" + str(uuid.uuid1()), subject=gene_a.id, object=gene_b.id, predicate="biolink:interacts_with"
 )
 
 koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
diff --git a/examples/string/protein-links-detailed.py b/examples/string/protein-links-detailed.py
@@ -8,7 +8,6 @@
 koza_app = get_koza_app('protein-links-detailed')
 
 for row in koza_app.source:
-
     protein_a = Protein(id='ENSEMBL:' + re.sub(r'\d+\.', '', row['protein1']))
     protein_b = Protein(id='ENSEMBL:' + re.sub(r'\d+\.', '', row['protein2']))
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,8 +16,8 @@ packages = [
 
 [tool.poetry.dependencies]
 python = "^3.8"
-linkml-validator = ">=0.4.4"
-pydantic = "^1.0.0"
+# linkml-validator = ">=0.4.4"
+pydantic = "^2.4"
 pyyaml = ">=5.0.0"
 requests = "^2.24.0"
 ordered-set = ">=4.1.0"
@@ -27,16 +27,14 @@ loguru = "*"
 sssom = "^0.3.41"
 
 [tool.poetry.dev-dependencies]
+black = "^23.10.0"
+ruff = "*"
 pytest = ">=6.0.0"
 biolink-model = ">=3.0.1"
-autoflake = "^1.3.1"
-black = "22.3.0"
-isort = "^5.0.6"
 dask = ">=2022.5.2"
-mkdocs = ">=1.3.0"
-mkdocs-material = ">=8.3.4"
-mkdocstrings = "^0.19.0"
-mkdocstrings-python = "*"
+mkdocs = ">=1.4.2"
+mkdocs-material = ">=9.1.16"
+mkdocstrings = {extras = ["python"], version = ">=0.22.0"}
 
 [tool.poetry.scripts]
 koza = "koza.main:typer_app"

diff --git a/src/koza/__init__.py b/src/koza/__init__.py
@@ -1,2 +1,3 @@
 from importlib import metadata
-__version__ = metadata.version("koza")
+
+__version__ = metadata.version("koza")
diff --git a/src/koza/app.py b/src/koza/app.py
@@ -30,7 +30,7 @@ def __init__(
         output_dir: str = './output',
         output_format: OutputFormat = OutputFormat('jsonl'),
         schema: str = None,
-        logger = None,
+        logger=None,
     ):
         self.source = source
         self.translation_table = translation_table
@@ -50,7 +50,7 @@ def __init__(
             for map_file in source.config.depends_on:
                 with open(map_file, 'r') as map_file_fh:
                     map_file_config = MapFileConfig(**yaml.load(map_file_fh, Loader=UniqueIncludeLoader))
-                    map_file_config.transform_code = (str(Path(map_file).parent / Path(map_file).stem) + '.py')
+                    map_file_config.transform_code = str(Path(map_file).parent / Path(map_file).stem) + '.py'
                 self._map_registry[map_file_config.name] = Source(map_file_config)
 
     def get_map(self, map_name: str):
@@ -169,7 +169,6 @@ def _get_writer(self) -> Union[TSVWriter, JSONLWriter]:
             return JSONLWriter(*writer_params)
 
     def _load_map(self, map_file: Source):
-
         if not isinstance(map_file.config, MapFileConfig):
             raise ValueError(f"Error loading map: {map_file.config.name} is not a MapFileConfig")
 

diff --git a/src/koza/cli_runner.py b/src/koza/cli_runner.py
@@ -3,7 +3,7 @@
 """
 
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, Optional, Union
 import yaml
 
 from koza.app import KozaApp
@@ -45,7 +45,7 @@ def transform_source(
     log: bool = False,
 ):
     """Create a KozaApp object, process maps, and run the transform
-    
+
     Args:
         source (str): Path to source metadata file
         output_dir (str): Path to output directory
@@ -57,11 +57,11 @@ def transform_source(
         verbose (bool, optional): Verbose logging. Defaults to None.
         log (bool, optional): Log to file. Defaults to False.
     """
-    logger = get_logger(name = Path(source).name if log else None, verbose = verbose)
-    
+    logger = get_logger(name=Path(source).name if log else None, verbose=verbose)
+
     with open(source, 'r') as source_fh:
         source_config = PrimaryFileConfig(**yaml.load(source_fh, Loader=UniqueIncludeLoader))
-        
+
     if not source_config.name:
         source_config.name = Path(source).stem
 
@@ -74,7 +74,7 @@ def transform_source(
     translation_table = get_translation_table(
         global_table if global_table else source_config.global_table,
         local_table if local_table else source_config.local_table,
-        logger
+        logger,
     )
 
     koza_app = _set_koza_app(koza_source, translation_table, output_dir, output_format, schema, logger)
@@ -97,7 +97,6 @@ def validate_file(
     """
 
     with open_resource(file) as resource_io:
-
         if format == FormatType.csv:
             reader = CSVReader(
                 resource_io,
@@ -117,9 +116,9 @@ def validate_file(
 
 
 def get_translation_table(
-    global_table: Union[str, Dict] = None, 
+    global_table: Union[str, Dict] = None,
     local_table: Union[str, Dict] = None,
-    logger = None,
+    logger=None,
 ) -> TranslationTable:
     """Create a translation table object from two file paths
 
@@ -140,7 +139,6 @@ def get_translation_table(
         else:
             logger.debug("No global table used for transform")
     else:
-
         if isinstance(global_table, str):
             with open(global_table, 'r') as global_tt_fh:
                 global_tt = yaml.safe_load(global_tt_fh)
@@ -166,7 +164,7 @@ def _set_koza_app(
     output_dir: str = './output',
     output_format: OutputFormat = OutputFormat('tsv'),
     schema: str = None,
-    logger = None,
+    logger=None,
 ) -> KozaApp:
     """Create a KozaApp object for a given source"""
 

diff --git a/src/koza/converter/kgx_converter.py b/src/koza/converter/kgx_converter.py
@@ -2,6 +2,7 @@
 from typing import Iterable, Tuple
 from pydantic import BaseModel
 
+
 class KGXConverter:
     """
     Converts the biolink model to the KGX format, which splits
@@ -15,28 +16,24 @@ class KGXConverter:
     """
 
     def convert(self, entities: Iterable) -> Tuple[list, list]:
-
         nodes = []
         edges = []
 
         for entity in entities:
-
             # if entity has subject + object + predicate, treat as edge
             if all(hasattr(entity, attr) for attr in ["subject", "object", "predicate"]):
                 edges.append(self.convert_association(entity))
 
             # if entity has id and name, but not subject/object/predicate, treat as node
-            elif (
-                    all(hasattr(entity, attr) for attr in ["id", "name"])
-                    and not all(hasattr(entity, attr) for attr in ["subject", "object", "predicate"])
-                ):
+            elif all(hasattr(entity, attr) for attr in ["id", "name"]) and not all(
+                hasattr(entity, attr) for attr in ["subject", "object", "predicate"]
+            ):
                 nodes.append(self.convert_node(entity))
 
             # otherwise, not a valid entity
             else:
                 raise ValueError(
                     f"Cannot convert {entity}: Can only convert NamedThing or Association entities to KGX compatible dictionaries"
-
                 )
 
         return nodes, edges

diff --git a/src/koza/io/reader/csv_reader.py b/src/koza/io/reader/csv_reader.py
@@ -106,7 +106,6 @@ def __iter__(self) -> Iterator:
         return self
 
     def __next__(self) -> Dict[str, Any]:
-
         if not self._header:
             self._set_header()
 
@@ -148,14 +147,10 @@ def __next__(self) -> Dict[str, Any]:
         field_map = dict(zip(self._header, stripped_row))
 
         if fields_len > row_len:
-            raise ValueError(
-                f"CSV file {self.name} has {fields_len - row_len} fewer columns at {self.reader.line_num}"
-            )
+            raise ValueError(f"CSV file {self.name} has {fields_len - row_len} fewer columns at {self.reader.line_num}")
 
         elif fields_len < row_len:
-            logger.warning(
-                f"CSV file {self.name} has {row_len - fields_len} extra columns at {self.reader.line_num}"
-            )
+            logger.warning(f"CSV file {self.name} has {row_len - fields_len} extra columns at {self.reader.line_num}")
             # Not sure if this would serve a purpose
             #
             # if not 'extra_cols' in self.field_type_map:
@@ -201,27 +196,19 @@ def _set_header(self):
                 self._header = list(self.field_type_map.keys())
             else:
                 raise ValueError(
-                    f"there is no header and columns have not been supplied\n"
-                    f"configure the 'columns' property in the source yaml"
+                    "there is no header and columns have not been supplied\n"
+                    "configure the 'columns' property in the source yaml"
                 )
 
     def _parse_header_line(self, skip_blank_or_commented_lines: bool = False) -> List[str]:
         """
         Parse the header line and return a list of headers
         """
-        fieldnames = next(
-            reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect})
-        )
+        fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))
         if skip_blank_or_commented_lines:
             # there has to be a cleaner way to do this
-            while not fieldnames or (
-                self.comment_char is not None and fieldnames[0].startswith(self.comment_char)
-            ):
-                fieldnames = next(
-                    reader(
-                        self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}
-                    )
-                )
+            while not fieldnames or (self.comment_char is not None and fieldnames[0].startswith(self.comment_char)):
+                fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))
 
         fieldnames[0] = fieldnames[0].lstrip(self.comment_char)
         return [f.strip() for f in fieldnames]

diff --git a/src/koza/io/reader/json_reader.py b/src/koza/io/reader/json_reader.py
@@ -1,14 +1,17 @@
 import json, yaml
 from typing import IO, Any, Dict, Iterator, List, Union
-#from xmlrpc.client import Boolean
+
+# from xmlrpc.client import Boolean
 
 from koza.io.utils import check_data
+
 # from koza.utils.log_utils import get_logger
 # logger = get_logger(__name__)
 # import logging
 # logger = logging.getLogger(__name__)
 from loguru import logger
 
+
 class JSONReader:
     """
     A JSON reader that optionally iterates over a json list
@@ -36,8 +39,6 @@ def __init__(
         self.json_path = json_path
         self.name = name
 
-
-
         if self.json_path:
             if is_yaml:
                 self.json_obj = yaml.safe_load(self.io_str)
@@ -68,7 +69,6 @@ def __iter__(self) -> Iterator:
         return self
 
     def __next__(self) -> Dict[str, Any]:
-
         if self._line_num == self._line_limit:
             logger.info(f"Finished processing {self._line_num} rows for {self.name} from file {self.io_str.name}")
             raise StopIteration