Skip to content

Commit

Permalink
Merge branch 'main' into python-312
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jul 5, 2024
2 parents 80d6459 + 5013e7f commit dae5c7d
Show file tree
Hide file tree
Showing 16 changed files with 53 additions and 61 deletions.
3 changes: 3 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# .git-blame-ignore-revs
# Initial formatting with Ruff
7c35332e15e8923992d77231fbcf2f19706bf625
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ jobs:
run: |
isort --check src
- name: Format check with black
- name: Format check with ruff
run: |
black --check src
ruff format --check src
- name: Security check with bandit
run: |
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ repos:
entry: isort
language: system
types: [python]
- id: black
name: black
entry: black
language: system
types: [python]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
hooks:
- id: ruff-format
args: [ --check ]
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ build: %:
cqa:
flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics
isort --profile black --check src
black --check src
ruff format --check src tests
bandit -ll -r src

#=> test: execute tests
Expand Down Expand Up @@ -97,7 +97,7 @@ cqa:
flake8 src --show-source --statistics
pyright
isort --check src --profile black
black --check src
ruff format --check src
bandit -ll -r src

#=> reformat: reformat code
Expand All @@ -108,13 +108,13 @@ reformat:
############################################################################
#= UTILITY TARGETS

#=> reformat: reformat code with yapf and commit
#=> reformat: reformat code and commit
.PHONY: reformat
reformat:
@if ! git diff --cached --exit-code >/dev/null; then echo "Repository not clean" 1>&2; exit 1; fi
black src tests
ruff src tests
isort src tests
git commit -a -m "reformatted with black and isort"
git commit -a -m "reformatted with ruff and isort"

#=> rename: rename files and substitute content for new repo name
.PHONY: rename
Expand Down
3 changes: 3 additions & 0 deletions docs/store.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ To add a new sequence and/or aliases in a Python environment, construct a ``SeqR
n_seqs_added, n_aliases_added = sr.store(sequence, nsaliases)
print(n_seqs_added, n_aliases_added)
# (1, 1)
sr.commit()
``store()`` returns a tuple containing the number of new sequences and aliases that were successfully added (the sha512t24u sequence hash is not counted as a new alias, because it is automatically added with a new sequence as the main sequence identifier).

Note that the ``commit()`` method MUST be called before the end of an interpreter session for data to be durably committed to the database. ``store()`` only stages pending database additions, but only makes calls to ``commit()`` when the number of staged changes exceeds a (relatively large) threshold, for performance reasons.

.. and this should be a "tip" admonition or something of that nature
Load FASTA files
Expand Down
1 change: 0 additions & 1 deletion misc/docker/seqrepo.df
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ RUN pip3 install --upgrade \
pygments \
pysam \
simplegeneric \
six \
tqdm \
traitlets \
wcwidth \
Expand Down
12 changes: 6 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
]
dynamic = ["version"]
Expand All @@ -22,7 +22,6 @@ dependencies = [
"ipython ~= 8.4",
"pysam ~= 0.22",
"requests ~= 2.31",
"six ~= 1.16",
"tqdm ~= 4.66",
"yoyo-migrations ~= 8.2",
"setuptools", # pin until yoyo-migrations doesn't use pkg_resources
Expand All @@ -31,7 +30,6 @@ dependencies = [
[project.optional-dependencies]
dev = [
"bandit ~= 1.7",
"black ~= 22.3",
"build ~= 0.8",
"flake8 ~= 4.0",
"ipython ~= 8.4",
Expand All @@ -44,6 +42,7 @@ dev = [
"pytest ~= 7.1",
"pyright~=1.1",
"requests_html ~= 0.10",
"ruff == 0.4.4",
"tox ~= 3.25",
"vcrpy",
]
Expand Down Expand Up @@ -118,9 +117,6 @@ exclude_lines = [
"if __name__ == .__main__.:",
]

[tool.black]
line-length = 100

[tool.isort]
profile = "black"
src_paths = ["src", "tests"]
Expand All @@ -135,3 +131,7 @@ disable = "R0913"

[tool.pylint.format]
max-line-length = 100

[tool.ruff]
src = ["src", "tests"]
line-length = 100
7 changes: 4 additions & 3 deletions src/biocommons/seqrepo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
"""

from __future__ import division, print_function

import argparse
import datetime
import gzip
Expand Down Expand Up @@ -527,7 +525,10 @@ def load(opts: argparse.Namespace) -> None:
fh = io.open(fn, mode="rt", encoding="ascii")
_logger.info("Opened " + fn)
seq_bar = tqdm.tqdm(
FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False # type: ignore noqa: E501
FastaIter(fh), # type: ignore
unit=" seqs",
disable=disable_bar,
leave=False,
)
for defline, seq in seq_bar: # type: ignore
n_seqs_seen += 1
Expand Down
8 changes: 2 additions & 6 deletions src/biocommons/seqrepo/fastadir/bases.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import abc
from typing import Optional

import six


@six.add_metaclass(abc.ABCMeta)
class BaseReader:
class BaseReader(metaclass=abc.ABCMeta):
@abc.abstractmethod
def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = None) -> str:
raise NotImplementedError
Expand All @@ -14,8 +11,7 @@ def __getitem__(self, ac: str) -> str:
return self.fetch(ac)


@six.add_metaclass(abc.ABCMeta)
class BaseWriter:
class BaseWriter(metaclass=abc.ABCMeta):
@abc.abstractmethod
def store(self, seq_id: str, seq: str) -> str:
pass # pragma: no cover
8 changes: 2 additions & 6 deletions src/biocommons/seqrepo/fastadir/fabgz.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
"""

from __future__ import unicode_literals

import io
import logging
import os
Expand All @@ -19,7 +17,6 @@
from types import TracebackType
from typing import Optional, Type

import six
from pysam import FastaFile
from typing_extensions import Self

Expand Down Expand Up @@ -48,15 +45,14 @@ def _get_bgzip_version(exe: str) -> str:

def _find_bgzip() -> str:
"""return path to bgzip if found and meets version requirements, else exception"""
missing_file_exception = OSError if six.PY2 else FileNotFoundError
min_bgzip_version = ".".join(map(str, min_bgzip_version_info))
exe = os.environ.get("SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip")

try:
bgzip_version = _get_bgzip_version(exe)
except AttributeError:
raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe))
except missing_file_exception:
except FileNotFoundError:
raise RuntimeError(
"{exe} doesn't exist; you need to install htslib and tabix "
"(See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(exe=exe)
Expand Down Expand Up @@ -119,7 +115,7 @@ def __init__(self, filename: str) -> None:
super(FabgzWriter, self).__init__()

self.filename = filename
self.fh = None
self._fh = None
self._basepath, suffix = os.path.splitext(self.filename)
if suffix != ".bgz":
raise RuntimeError("Path must end with .bgz")
Expand Down
8 changes: 2 additions & 6 deletions src/biocommons/seqrepo/fastadir/fastadir.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ def __init__(
if schema_version != expected_schema_version:
raise RuntimeError(
"""Upgrade required: Database schema
version is {} and code expects {}""".format(
schema_version, expected_schema_version
)
version is {} and code expects {}""".format(schema_version, expected_schema_version)
)

if fd_cache_size == 0:
Expand Down Expand Up @@ -142,9 +140,7 @@ def fetch(self, seq_id: str, start: Optional[int] = None, end: Optional[int] = N
if self._writing and self._writing["relpath"] == rec["relpath"]:
_logger.warning(
"""Fetching from file opened for writing;
closing first ({})""".format(
rec["relpath"]
)
closing first ({})""".format(rec["relpath"])
)
self.commit()

Expand Down
5 changes: 3 additions & 2 deletions src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ def __init__(
# if we're not at the expected schema version for this code, bail
if schema_version != expected_schema_version: # pragma: no cover
raise RuntimeError(
"Upgrade required: Database schema"
"version is {} and code expects {}".format(schema_version, expected_schema_version)
"Upgrade required: Database schema" "version is {} and code expects {}".format(
schema_version, expected_schema_version
)
)

# ############################################################################
Expand Down
Binary file added tests/.DS_Store
Binary file not shown.
2 changes: 0 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import io
import os
import tempfile
Expand Down
9 changes: 4 additions & 5 deletions tests/test_fabgz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import tempfile

import pytest
import six

from biocommons.seqrepo.fastadir.fabgz import FabgzReader, FabgzWriter

Expand All @@ -19,18 +18,18 @@ def test_write_reread():

# write sequences
faw = FabgzWriter(fabgz_fn)
for seq_id, seq in six.iteritems(sequences):
for seq_id, seq in sequences.items():
faw.store(seq_id, seq)
# add twice to demonstrate non-redundancy
for seq_id, seq in six.iteritems(sequences):
for seq_id, seq in sequences.items():
faw.store(seq_id, seq)
faw.close()

# now read them back
far = FabgzReader(fabgz_fn)
assert far.filename.startswith(tmpdir.encode())
assert far.filename.startswith(tmpdir.encode()) # type: ignore
assert set(far.keys()) == set(sequences.keys())
assert 5 == len(far), "expected 5 sequences"
assert 5 == len(far), "expected 5 sequences" # type: ignore
assert "l10" in far.keys()
assert far["l10"] == seed * 10
for seq_id in far.keys():
Expand Down
24 changes: 12 additions & 12 deletions tests/test_fastaiter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from io import StringIO

import pytest
import six
from six.moves import StringIO

from biocommons.seqrepo.fastaiter import FastaIter

Expand All @@ -12,7 +12,7 @@ def test_empty():

# should return an empty generator
with pytest.raises(StopIteration):
six.next(iterator)
next(iterator)


def test_noheader():
Expand All @@ -22,54 +22,54 @@ def test_noheader():

# should return an empty generator
with pytest.raises(StopIteration):
six.next(iterator)
next(iterator)


def test_single():
data = StringIO(">seq1\nACGT\n")

iterator = FastaIter(data)

header, seq = six.next(iterator)
header, seq = next(iterator)
assert header == "seq1"
assert seq == "ACGT"

# should be empty now
with pytest.raises(StopIteration):
six.next(iterator)
next(iterator)


def test_multiple():
data = StringIO(">seq1\nACGT\n>seq2\nTGCA\n\n>seq3\nTTTT")

iterator = FastaIter(data)

header, seq = six.next(iterator)
header, seq = next(iterator)
assert header == "seq1"
assert seq == "ACGT"

header, seq = six.next(iterator)
header, seq = next(iterator)
assert header == "seq2"
assert seq == "TGCA"

header, seq = six.next(iterator)
header, seq = next(iterator)
assert header == "seq3"
assert seq == "TTTT"

# should be empty now
with pytest.raises(StopIteration):
six.next(iterator)
next(iterator)


def test_multiline():
data = StringIO(">seq1\nACGT\nTGCA")

iterator = FastaIter(data)

header, seq = six.next(iterator)
header, seq = next(iterator)
assert header == "seq1"
assert seq == "ACGTTGCA"

# should be empty now
with pytest.raises(StopIteration):
six.next(iterator)
next(iterator)

0 comments on commit dae5c7d

Please sign in to comment.