From 79a16f9f22ace061f7d6f124ae7078fedd8b8121 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 5 Jun 2024 08:05:26 -0400 Subject: [PATCH 1/7] fix: remove unneeded __future__ imports (#153) --- src/biocommons/seqrepo/cli.py | 3 --- src/biocommons/seqrepo/fastadir/fabgz.py | 3 --- tests/.DS_Store | Bin 0 -> 6148 bytes tests/test_cli.py | 2 -- 4 files changed, 8 deletions(-) create mode 100644 tests/.DS_Store diff --git a/src/biocommons/seqrepo/cli.py b/src/biocommons/seqrepo/cli.py index 063b142..d2b76a6 100644 --- a/src/biocommons/seqrepo/cli.py +++ b/src/biocommons/seqrepo/cli.py @@ -10,9 +10,6 @@ $ seqrepo --help """ - -from __future__ import division, print_function - import argparse import datetime import gzip diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 936b9d1..cfa20a6 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -5,9 +5,6 @@ Files must be named as .fa.bgz to be recognized as blocked gzip compressed """ - -from __future__ import unicode_literals - import io import logging import os diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2b44c08c419ab4421dd39e2265f42a7113c18362 GIT binary patch literal 6148 zcmeHKJx;?=4EAd?q_A{EVtL$v-XK(AVnOQ4K-*MdNR&!##hQgXaR4?>!vzqZpRJH6 zsl{PW}PSY-Cc$n8se=UXly zq73!zX<6iL-}PJD*RJL1p{b?GEvw%OhWy=qA9-IBW55_N2F{fM^lX;+QqWdoz!)$F z8V30L;Gv9(Vl5ax9azE$0E}P`f^(iFIL9j{inSm-5GScXNp(76I7x@yYh0pO3radU z9X_1y>~unLVRzi$hj4OSNZMO_ w568XML!Y5699IiADL4tO7_r=nub@F-_k03O6l+0PAoe2=Xt2c?_*Vvg0GdKi_W%F@ literal 0 HcmV?d00001 diff --git a/tests/test_cli.py b/tests/test_cli.py index 6250f1a..5abbdbe 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals - import io import os import tempfile From 9633d0c78b7b43f2f59e168080f9bb6097d77c69 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 5 Jun 2024 08:05:48 -0400 Subject: [PATCH 2/7] chore: fix license classifier (#154) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e6cc30c..863f2ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ] dynamic = ["version"] From c4e620b0ead9270562231144a996ef0b367356f1 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 5 Jun 2024 08:06:05 -0400 Subject: [PATCH 3/7] docs: add note about commit() (#157) --- docs/store.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/store.rst b/docs/store.rst index 4ddee91..6ad8389 100644 --- a/docs/store.rst +++ b/docs/store.rst @@ -24,9 +24,12 @@ To add a new sequence and/or aliases in a Python environment, construct a ``SeqR n_seqs_added, n_aliases_added = sr.store(sequence, nsaliases) print(n_seqs_added, n_aliases_added) # (1, 1) + sr.commit() ``store()`` returns a tuple containing the number of new sequences and aliases that were successfully added (the sha512t24u sequence hash is not counted as a new alias, because it is automatically added with a new sequence as the main sequence identifier). +Note that the ``commit()`` method MUST be called before the end of an interpreter session for data to be durably committed to the database. ``store()`` only stages pending database additions, but only makes calls to ``commit()`` when the number of staged changes exceeds a (relatively large) threshold, for performance reasons. + .. and this should be a "tip" admonition or something of that nature Load FASTA files From da4ca241666700b68481c779646d20bceba92707 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 13 Jun 2024 11:54:09 -0400 Subject: [PATCH 4/7] fix: properly initialize file handler (#160) --- src/biocommons/seqrepo/fastadir/fabgz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index cfa20a6..1948dd1 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -116,7 +116,7 @@ def __init__(self, filename: str) -> None: super(FabgzWriter, self).__init__() self.filename = filename - self.fh = None + self._fh = None self._basepath, suffix = os.path.splitext(self.filename) if suffix != ".bgz": raise RuntimeError("Path must end with .bgz") From 0730c629efd665c4a5d07380ead89af7ae7ef8a8 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 13 Jun 2024 11:54:50 -0400 Subject: [PATCH 5/7] refactor: use Python 3 stdlib instead of six (#164) --- misc/docker/seqrepo.df | 1 - pyproject.toml | 1 - src/biocommons/seqrepo/fastadir/bases.py | 8 ++------ src/biocommons/seqrepo/fastadir/fabgz.py | 4 +--- tests/test_fabgz.py | 9 ++++----- tests/test_fastaiter.py | 24 ++++++++++++------------ 6 files changed, 19 insertions(+), 28 deletions(-) diff --git a/misc/docker/seqrepo.df b/misc/docker/seqrepo.df index 08f8a0f..8e10ede 100644 --- a/misc/docker/seqrepo.df +++ b/misc/docker/seqrepo.df @@ -37,7 +37,6 @@ RUN pip3 install --upgrade \ pygments \ pysam \ simplegeneric \ - six \ tqdm \ traitlets \ wcwidth \ diff --git a/pyproject.toml b/pyproject.toml index 863f2ff..cc89bd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ dependencies = [ "ipython ~= 8.4", "pysam ~= 0.22", "requests ~= 2.31", - "six ~= 1.16", "tqdm ~= 4.66", "yoyo-migrations ~= 8.2", ] diff --git a/src/biocommons/seqrepo/fastadir/bases.py b/src/biocommons/seqrepo/fastadir/bases.py index 5804a1f..3ee2a27 100644 --- a/src/biocommons/seqrepo/fastadir/bases.py +++ b/src/biocommons/seqrepo/fastadir/bases.py @@ -1,11 +1,8 @@ import abc from typing import Optional -import six - -@six.add_metaclass(abc.ABCMeta) -class BaseReader: +class BaseReader(metaclass=abc.ABCMeta): @abc.abstractmethod def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = None) -> str: raise NotImplementedError @@ -14,8 +11,7 @@ def __getitem__(self, ac: str) -> str: return self.fetch(ac) -@six.add_metaclass(abc.ABCMeta) -class BaseWriter: +class BaseWriter(metaclass=abc.ABCMeta): @abc.abstractmethod def store(self, seq_id: str, seq: str) -> str: pass # pragma: no cover diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 1948dd1..987a131 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -16,7 +16,6 @@ from types import TracebackType from typing import Optional, Type -import six from pysam import FastaFile from typing_extensions import Self @@ -45,7 +44,6 @@ def _get_bgzip_version(exe: str) -> str: def _find_bgzip() -> str: """return path to bgzip if found and meets version requirements, else exception""" - missing_file_exception = OSError if six.PY2 else FileNotFoundError min_bgzip_version = ".".join(map(str, min_bgzip_version_info)) exe = os.environ.get("SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip") @@ -53,7 +51,7 @@ def _find_bgzip() -> str: bgzip_version = _get_bgzip_version(exe) except AttributeError: raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe)) - except missing_file_exception: + except FileNotFoundError: raise RuntimeError( "{exe} doesn't exist; you need to install htslib and tabix " "(See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(exe=exe) diff --git a/tests/test_fabgz.py b/tests/test_fabgz.py index 56cfd3b..89d8214 100644 --- a/tests/test_fabgz.py +++ b/tests/test_fabgz.py @@ -3,7 +3,6 @@ import tempfile import pytest -import six from biocommons.seqrepo.fastadir.fabgz import FabgzReader, FabgzWriter @@ -19,18 +18,18 @@ def test_write_reread(): # write sequences faw = FabgzWriter(fabgz_fn) - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) # add twice to demonstrate non-redundancy - for seq_id, seq in six.iteritems(sequences): + for seq_id, seq in sequences.items(): faw.store(seq_id, seq) faw.close() # now read them back far = FabgzReader(fabgz_fn) - assert far.filename.startswith(tmpdir.encode()) + assert far.filename.startswith(tmpdir.encode()) # type: ignore assert set(far.keys()) == set(sequences.keys()) - assert 5 == len(far), "expected 5 sequences" + assert 5 == len(far), "expected 5 sequences" # type: ignore assert "l10" in far.keys() assert far["l10"] == seed * 10 for seq_id in far.keys(): diff --git a/tests/test_fastaiter.py b/tests/test_fastaiter.py index 0d31310..93a0659 100644 --- a/tests/test_fastaiter.py +++ b/tests/test_fastaiter.py @@ -1,6 +1,6 @@ +from io import StringIO + import pytest -import six -from six.moves import StringIO from biocommons.seqrepo.fastaiter import FastaIter @@ -12,7 +12,7 @@ def test_empty(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_noheader(): @@ -22,7 +22,7 @@ def test_noheader(): # should return an empty generator with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_single(): @@ -30,13 +30,13 @@ def test_single(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiple(): @@ -44,21 +44,21 @@ def test_multiple(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGT" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq2" assert seq == "TGCA" - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq3" assert seq == "TTTT" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) def test_multiline(): @@ -66,10 +66,10 @@ def test_multiline(): iterator = FastaIter(data) - header, seq = six.next(iterator) + header, seq = next(iterator) assert header == "seq1" assert seq == "ACGTTGCA" # should be empty now with pytest.raises(StopIteration): - six.next(iterator) + next(iterator) From 7c35332e15e8923992d77231fbcf2f19706bf625 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 21 Jun 2024 09:52:42 -0400 Subject: [PATCH 6/7] style: format with Ruff (#158) --- .github/workflows/python-package.yml | 4 ++-- .pre-commit-config.yaml | 10 +++++----- Makefile | 10 +++++----- pyproject.toml | 9 +++++---- src/biocommons/seqrepo/cli.py | 6 +++++- src/biocommons/seqrepo/fastadir/fabgz.py | 1 + src/biocommons/seqrepo/fastadir/fastadir.py | 8 ++------ src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py | 5 +++-- 8 files changed, 28 insertions(+), 25 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a2ed63d..69a2464 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -33,9 +33,9 @@ jobs: run: | isort --check src - - name: Format check with black + - name: Format check with ruff run: | - black --check src + ruff format --check src - name: Security check with bandit run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 384b910..eb0eb87 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,8 +16,8 @@ repos: entry: isort language: system types: [python] - - id: black - name: black - entry: black - language: system - types: [python] +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff-format + args: [ --check ] diff --git a/Makefile b/Makefile index 8464cfa..1474805 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ build: %: cqa: flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics isort --profile black --check src - black --check src + ruff format --check src tests bandit -ll -r src #=> test: execute tests @@ -97,7 +97,7 @@ cqa: flake8 src --show-source --statistics pyright isort --check src --profile black - black --check src + ruff format --check src bandit -ll -r src #=> reformat: reformat code @@ -108,13 +108,13 @@ reformat: ############################################################################ #= UTILITY TARGETS -#=> reformat: reformat code with yapf and commit +#=> reformat: reformat code and commit .PHONY: reformat reformat: @if ! git diff --cached --exit-code >/dev/null; then echo "Repository not clean" 1>&2; exit 1; fi - black src tests + ruff src tests isort src tests - git commit -a -m "reformatted with black and isort" + git commit -a -m "reformatted with ruff and isort" #=> rename: rename files and substitute content for new repo name .PHONY: rename diff --git a/pyproject.toml b/pyproject.toml index cc89bd0..e5a386b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ dependencies = [ [project.optional-dependencies] dev = [ "bandit ~= 1.7", - "black ~= 22.3", "build ~= 0.8", "flake8 ~= 4.0", "ipython ~= 8.4", @@ -42,6 +41,7 @@ dev = [ "pytest ~= 7.1", "pyright~=1.1", "requests_html ~= 0.10", + "ruff == 0.4.4", "tox ~= 3.25", "vcrpy", ] @@ -116,9 +116,6 @@ exclude_lines = [ "if __name__ == .__main__.:", ] -[tool.black] -line-length = 100 - [tool.isort] profile = "black" src_paths = ["src", "tests"] @@ -133,3 +130,7 @@ disable = "R0913" [tool.pylint.format] max-line-length = 100 + +[tool.ruff] +src = ["src", "tests"] +line-length = 100 diff --git a/src/biocommons/seqrepo/cli.py b/src/biocommons/seqrepo/cli.py index d2b76a6..a767d34 100644 --- a/src/biocommons/seqrepo/cli.py +++ b/src/biocommons/seqrepo/cli.py @@ -10,6 +10,7 @@ $ seqrepo --help """ + import argparse import datetime import gzip @@ -524,7 +525,10 @@ def load(opts: argparse.Namespace) -> None: fh = io.open(fn, mode="rt", encoding="ascii") _logger.info("Opened " + fn) seq_bar = tqdm.tqdm( - FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False # type: ignore noqa: E501 + FastaIter(fh), # type: ignore + unit=" seqs", + disable=disable_bar, + leave=False, ) for defline, seq in seq_bar: # type: ignore n_seqs_seen += 1 diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 987a131..10f9a40 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -5,6 +5,7 @@ Files must be named as .fa.bgz to be recognized as blocked gzip compressed """ + import io import logging import os diff --git a/src/biocommons/seqrepo/fastadir/fastadir.py b/src/biocommons/seqrepo/fastadir/fastadir.py index 83b3851..1336a3c 100644 --- a/src/biocommons/seqrepo/fastadir/fastadir.py +++ b/src/biocommons/seqrepo/fastadir/fastadir.py @@ -81,9 +81,7 @@ def __init__( if schema_version != expected_schema_version: raise RuntimeError( """Upgrade required: Database schema - version is {} and code expects {}""".format( - schema_version, expected_schema_version - ) + version is {} and code expects {}""".format(schema_version, expected_schema_version) ) if fd_cache_size == 0: @@ -142,9 +140,7 @@ def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = N if self._writing and self._writing["relpath"] == rec["relpath"]: _logger.warning( """Fetching from file opened for writing; - closing first ({})""".format( - rec["relpath"] - ) + closing first ({})""".format(rec["relpath"]) ) self.commit() diff --git a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py index 916106d..2a26ba8 100644 --- a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py +++ b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py @@ -53,8 +53,9 @@ def __init__( # if we're not at the expected schema version for this code, bail if schema_version != expected_schema_version: # pragma: no cover raise RuntimeError( - "Upgrade required: Database schema" - "version is {} and code expects {}".format(schema_version, expected_schema_version) + "Upgrade required: Database schema" "version is {} and code expects {}".format( + schema_version, expected_schema_version + ) ) # ############################################################################ From 5013e7f2325cebf1f05ed7414e5677b1a2d1ab6b Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 24 Jun 2024 07:54:25 -0400 Subject: [PATCH 7/7] chore: add git ignore revs file (#165) --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..9581b95 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# .git-blame-ignore-revs +# Initial formatting with Ruff +7c35332e15e8923992d77231fbcf2f19706bf625