diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..9581b95 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# .git-blame-ignore-revs +# Initial formatting with Ruff +7c35332e15e8923992d77231fbcf2f19706bf625 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index db2f204..a9084c4 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -33,9 +33,9 @@ jobs: run: | isort --check src - - name: Format check with black + - name: Format check with ruff run: | - black --check src + ruff format --check src - name: Security check with bandit run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 384b910..eb0eb87 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,8 +16,8 @@ repos: entry: isort language: system types: [python] - - id: black - name: black - entry: black - language: system - types: [python] +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff-format + args: [ --check ] diff --git a/Makefile b/Makefile index 8464cfa..1474805 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ build: %: cqa: flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics isort --profile black --check src - black --check src + ruff format --check src tests bandit -ll -r src #=> test: execute tests @@ -97,7 +97,7 @@ cqa: flake8 src --show-source --statistics pyright isort --check src --profile black - black --check src + ruff format --check src bandit -ll -r src #=> reformat: reformat code @@ -108,13 +108,13 @@ reformat: ############################################################################ #= UTILITY TARGETS -#=> reformat: reformat code with yapf and commit +#=> reformat: reformat code and commit .PHONY: reformat reformat: @if ! git diff --cached --exit-code >/dev/null; then echo "Repository not clean" 1>&2; exit 1; fi - black src tests + ruff src tests isort src tests - git commit -a -m "reformatted with black and isort" + git commit -a -m "reformatted with ruff and isort" #=> rename: rename files and substitute content for new repo name .PHONY: rename diff --git a/docs/store.rst b/docs/store.rst index 4ddee91..6ad8389 100644 --- a/docs/store.rst +++ b/docs/store.rst @@ -24,9 +24,12 @@ To add a new sequence and/or aliases in a Python environment, construct a ``SeqR n_seqs_added, n_aliases_added = sr.store(sequence, nsaliases) print(n_seqs_added, n_aliases_added) # (1, 1) + sr.commit() ``store()`` returns a tuple containing the number of new sequences and aliases that were successfully added (the sha512t24u sequence hash is not counted as a new alias, because it is automatically added with a new sequence as the main sequence identifier). +Note that the ``commit()`` method MUST be called before the end of an interpreter session for data to be durably committed to the database. ``store()`` only stages pending database additions, but only makes calls to ``commit()`` when the number of staged changes exceeds a (relatively large) threshold, for performance reasons. + .. and this should be a "tip" admonition or something of that nature Load FASTA files diff --git a/misc/docker/seqrepo.df b/misc/docker/seqrepo.df index 08f8a0f..8e10ede 100644 --- a/misc/docker/seqrepo.df +++ b/misc/docker/seqrepo.df @@ -37,7 +37,6 @@ RUN pip3 install --upgrade \ pygments \ pysam \ simplegeneric \ - six \ tqdm \ traitlets \ wcwidth \ diff --git a/pyproject.toml b/pyproject.toml index 0cd0dd9..2644064 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ] dynamic = ["version"] @@ -22,7 +22,6 @@ dependencies = [ "ipython ~= 8.4", "pysam ~= 0.22", "requests ~= 2.31", - "six ~= 1.16", "tqdm ~= 4.66", "yoyo-migrations ~= 8.2", "setuptools", # pin until yoyo-migrations doesn't use pkg_resources @@ -31,7 +30,6 @@ dependencies = [ [project.optional-dependencies] dev = [ "bandit ~= 1.7", - "black ~= 22.3", "build ~= 0.8", "flake8 ~= 4.0", "ipython ~= 8.4", @@ -44,6 +42,7 @@ dev = [ "pytest ~= 7.1", "pyright~=1.1", "requests_html ~= 0.10", + "ruff == 0.4.4", "tox ~= 3.25", "vcrpy", ] @@ -118,9 +117,6 @@ exclude_lines = [ "if __name__ == .__main__.:", ] -[tool.black] -line-length = 100 - [tool.isort] profile = "black" src_paths = ["src", "tests"] @@ -135,3 +131,7 @@ disable = "R0913" [tool.pylint.format] max-line-length = 100 + +[tool.ruff] +src = ["src", "tests"] +line-length = 100 diff --git a/src/biocommons/seqrepo/cli.py b/src/biocommons/seqrepo/cli.py index 063b142..a767d34 100644 --- a/src/biocommons/seqrepo/cli.py +++ b/src/biocommons/seqrepo/cli.py @@ -11,8 +11,6 @@ """ -from __future__ import division, print_function - import argparse import datetime import gzip @@ -527,7 +525,10 @@ def load(opts: argparse.Namespace) -> None: fh = io.open(fn, mode="rt", encoding="ascii") _logger.info("Opened " + fn) seq_bar = tqdm.tqdm( - FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False # type: ignore noqa: E501 + FastaIter(fh), # type: ignore + unit=" seqs", + disable=disable_bar, + leave=False, ) for defline, seq in seq_bar: # type: ignore n_seqs_seen += 1 diff --git a/src/biocommons/seqrepo/fastadir/bases.py b/src/biocommons/seqrepo/fastadir/bases.py index 5804a1f..3ee2a27 100644 --- a/src/biocommons/seqrepo/fastadir/bases.py +++ b/src/biocommons/seqrepo/fastadir/bases.py @@ -1,11 +1,8 @@ import abc from typing import Optional -import six - -@six.add_metaclass(abc.ABCMeta) -class BaseReader: +class BaseReader(metaclass=abc.ABCMeta): @abc.abstractmethod def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = None) -> str: raise NotImplementedError @@ -14,8 +11,7 @@ def __getitem__(self, ac: str) -> str: return self.fetch(ac) -@six.add_metaclass(abc.ABCMeta) -class BaseWriter: +class BaseWriter(metaclass=abc.ABCMeta): @abc.abstractmethod def store(self, seq_id: str, seq: str) -> str: pass # pragma: no cover diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 936b9d1..10f9a40 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -6,8 +6,6 @@ """ -from __future__ import unicode_literals - import io import logging import os @@ -19,7 +17,6 @@ from types import TracebackType from typing import Optional, Type -import six from pysam import FastaFile from typing_extensions import Self @@ -48,7 +45,6 @@ def _get_bgzip_version(exe: str) -> str: def _find_bgzip() -> str: """return path to bgzip if found and meets version requirements, else exception""" - missing_file_exception = OSError if six.PY2 else FileNotFoundError min_bgzip_version = ".".join(map(str, min_bgzip_version_info)) exe = os.environ.get("SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip") @@ -56,7 +52,7 @@ def _find_bgzip() -> str: bgzip_version = _get_bgzip_version(exe) except AttributeError: raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe)) - except missing_file_exception: + except FileNotFoundError: raise RuntimeError( "{exe} doesn't exist; you need to install htslib and tabix " "(See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(exe=exe) @@ -119,7 +115,7 @@ def __init__(self, filename: str) -> None: super(FabgzWriter, self).__init__() self.filename = filename - self.fh = None + self._fh = None self._basepath, suffix = os.path.splitext(self.filename) if suffix != ".bgz": raise RuntimeError("Path must end with .bgz") diff --git a/src/biocommons/seqrepo/fastadir/fastadir.py b/src/biocommons/seqrepo/fastadir/fastadir.py index 83b3851..1336a3c 100644 --- a/src/biocommons/seqrepo/fastadir/fastadir.py +++ b/src/biocommons/seqrepo/fastadir/fastadir.py @@ -81,9 +81,7 @@ def __init__( if schema_version != expected_schema_version: raise RuntimeError( """Upgrade required: Database schema - version is {} and code expects {}""".format( - schema_version, expected_schema_version - ) + version is {} and code expects {}""".format(schema_version, expected_schema_version) ) if fd_cache_size == 0: @@ -142,9 +140,7 @@ def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = N if self._writing and self._writing["relpath"] == rec["relpath"]: _logger.warning( """Fetching from file opened for writing; - closing first ({})""".format( - rec["relpath"] - ) + closing first ({})""".format(rec["relpath"]) ) self.commit() diff --git a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py index 916106d..2a26ba8 100644 --- a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py +++ b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py @@ -53,8 +53,9 @@ def __init__( # if we're not at the expected schema version for this code, bail if schema_version != expected_schema_version: # pragma: no cover raise RuntimeError( - "Upgrade required: Database schema" - "version is {} and code expects {}".format(schema_version, expected_schema_version) + "Upgrade required: Database schema" "version is {} and code expects {}".format( + schema_version, expected_schema_version + ) ) # ############################################################################ diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000..2b44c08 Binary files /dev/null and b/tests/.DS_Store differ diff --git a/tests/test_cli.py b/tests/test_cli.py index 6250f1a..5abbdbe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals - import io import os import tempfile diff --git a/tests/test_fabgz.py b/tests/test_fabgz.py index 56cfd3b..89d8214 100644 --- a/tests/test_fabgz.py +++ b/tests/test_fabgz.py @@ -3,7 +3,6 @@ import tempfile import pytest -import six from biocommons.seqrepo.fastadir.fabgz import FabgzReader, FabgzWriter @@ -19,18 +18,18 @@ def test_write_reread(): # write sequences faw = FabgzWriter(fabgz_fn) - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) # add twice to demonstrate non-redundancy - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) faw.close() # now read them back far = FabgzReader(fabgz_fn) - assert far.filename.startswith(tmpdir.encode()) + assert far.filename.startswith(tmpdir.encode()) # type: ignore assert set(far.keys()) == set(sequences.keys()) - assert 5 == len(far), "expected 5 sequences" + assert 5 == len(far), "expected 5 sequences" # type: ignore assert "l10" in far.keys() assert far["l10"] == seed * 10 for seq_id in far.keys(): diff --git a/tests/test_fastaiter.py b/tests/test_fastaiter.py index 0d31310..93a0659 100644 --- a/tests/test_fastaiter.py +++ b/tests/test_fastaiter.py @@ -1,6 +1,6 @@ +from io import StringIO + import pytest -import six -from six.moves import StringIO from biocommons.seqrepo.fastaiter import FastaIter @@ -12,7 +12,7 @@ def test_empty(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_noheader(): @@ -22,7 +22,7 @@ def test_noheader(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_single(): @@ -30,13 +30,13 @@ def test_single(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiple(): @@ -44,21 +44,21 @@ def test_multiple(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq2" assert seq == "TGCA" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq3" assert seq == "TTTT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiline(): @@ -66,10 +66,10 @@ def test_multiline(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGTTGCA" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator)