Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dropped pyfaidx #744

Merged
merged 3 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 3.2.0 January 31, 2021

Dropped the `FastaFaiIndex` class. Use `SqliteIndex` instead. Fixed two
skipped Python 3 tests (for gzipped and bz2'd file reading).

## 3.1.86 January 31, 2021

Fixed two failing tests due to changed DIAMOND (version 2.0.6) bitscore
Expand Down
4 changes: 1 addition & 3 deletions bin/trim-primers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from dark.sequence import findPrimerBidiLimits


Expand Down Expand Up @@ -53,5 +52,4 @@ def trimPrimers(primer, verbose):

args = parser.parse_args()

trimPrimers(Seq(args.primer.upper(), IUPAC.unambiguous_dna),
args.verbose)
trimPrimers(Seq(args.primer.upper()), args.verbose)
2 changes: 1 addition & 1 deletion dark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
# will not be found by the version() function in ../setup.py
#
# Remember to update ../CHANGELOG.md describing what's new in each version.
__version__ = '3.1.86'
__version__ = '3.1.87'
51 changes: 0 additions & 51 deletions dark/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os

from Bio import SeqIO, bgzf
from pyfaidx import Fasta

from dark.reads import Reads, DNARead
from dark.utils import asHandle
Expand Down Expand Up @@ -131,56 +130,6 @@ def iter(self):
count += 1


class FastaFaiReads(Reads):
"""
Subclass of L{dark.reads.Reads} that provides dictionary-like access to
FASTA reads using the pyfaidx module.

@param filename: The C{str} name of a file containing FASTA. This may be an
uncompressed file or one that has been compressed with bgzip (from
the samtools suite).
# The pyfaidx module will create an identically named FASTA index file
# with an additional ".fai" extension. An .fai file can also be made
# (if desired) with 'samtools faidx' or the 'faidx' command installed
# by pyfaidx. If you use the latter, you can preserve full sequence
# titles via '-e "lambda x: x".
@param readClass: The class of read that should be yielded by iter.
@param upperCase: If C{True}, read sequences will be converted to upper
case.
"""
def __init__(self, filename, readClass=DNARead, upperCase=False):
self._filename = filename
self._fasta = Fasta(filename)
self._readClass = readClass
# TODO: It would be better if upperCase were an argument that could
# be passed to Reads.__init__ and that could do the uppercasing in
# its add method (as opposed to using it below in our iter method).
# In that case, in the iter of this class we'd call self.add on
# each of the sequences coming from self._file. Or, if we'd already
# read the file we'd return Reads.iter(self) to re-iterate over the
# sequences already added from the file.
self._upperCase = upperCase
if PY3:
super().__init__()
else:
Reads.__init__(self)

def iter(self):
"""
Iterate over the sequences in the files in self.files_, yielding each
as an instance of the desired read class.
"""
if self._upperCase:
for id_ in self._fasta:
yield self._readClass(id_, str(self._fasta[id_]).upper())
else:
for id_ in self._fasta:
yield self._readClass(id_, str(self._fasta[id_]))

def __getitem__(self, id_):
return self._readClass(str(id_), str(self._fasta[id_]))


def combineReads(filename, sequences, readClass=DNARead,
upperCase=False, idPrefix='command-line-read-'):
"""
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ def version():
'mysql-connector-python==8.0.11',
'numpy>=1.14.2',
'pysam>=0.15.2',
'pyfaidx>=0.4.8.4',
'pyzmq>=14.3.1',
'requests>=2.18.4',
'cachetools>=3.1.0',
Expand Down
12 changes: 8 additions & 4 deletions test/test_dna.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import six
from unittest import TestCase

from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA

from dark.dna import (
AMBIGUOUS, BASES_TO_AMBIGUOUS, compareDNAReads, matchToString,
findKozakConsensus, FloatBaseCounts, sequenceToRegex)
from dark.reads import Read, DNARead, DNAKozakRead

# The following are the letters that used to be on
# from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA
# IUPACAmbiguousDNA.letters
# But Bio.Alphabet is now deprecated and will be removed.
AMBIGUOUS_DNA_LETTERS = 'GATCRYWSMKHBVDN'


class TestAmbiguousLetters(TestCase):
"""
Expand All @@ -17,7 +21,7 @@ def testExpectedAmbiguousLetters(self):
"""
The ambiguous DNA letters must match those given by IUPAC.
"""
self.assertEqual(sorted(IUPACAmbiguousDNA.letters), sorted(AMBIGUOUS))
self.assertEqual(sorted(AMBIGUOUS_DNA_LETTERS), sorted(AMBIGUOUS))

def testExpectedLengthOne(self):
"""
Expand All @@ -31,7 +35,7 @@ def testExpectedLengthsGreaterThanOne(self):
The ambiguous DNA letters must be in sets of size greater than one
and less than 5.
"""
for base in set(IUPACAmbiguousDNA.letters) - set('ACGT'):
for base in set(AMBIGUOUS_DNA_LETTERS) - set('ACGT'):
self.assertTrue(5 > len(AMBIGUOUS[base]) > 1)

def testAmbiguousLettersAreAllACGT(self):
Expand Down
152 changes: 3 additions & 149 deletions test/test_fasta.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import sys
import os
from six.moves import builtins
from six import assertRaisesRegex
from io import BytesIO
import os
from unittest import TestCase

from unittest import TestCase, skipUnless
from Bio import SeqIO, bgzf
from contextlib import contextmanager

try:
from unittest.mock import patch
Expand All @@ -17,11 +15,9 @@

from dark.reads import Read, AARead, DNARead, RNARead, Reads
from dark.fasta import (dedupFasta, dePrefixAndSuffixFasta, fastaSubtract,
FastaReads, FastaFaiReads, combineReads, SqliteIndex)
FastaReads, combineReads, SqliteIndex)
from dark.utils import StringIO

canTestPyfaidx = sys.platform != 'linux'


class FastaDeDup(TestCase):
"""
Expand Down Expand Up @@ -453,148 +449,6 @@ def sideEffect(self, filename, **kwargs):
list(reads))


@skipUnless(canTestPyfaidx, 'pyfaidx tests are skipped on Linux')
class TestFastaFaiReads(TestCase):
"""
Tests for the L{dark.fasta.FastaFaiReads} class.
"""
def testMissingKey(self):
"""
If a non-existent sequence id is looked up, a KeyError must be raised.
"""

pyfaidxIndex = StringIO()

class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.count = 0

def sideEffect(self, filename, *args, **kwargs):
if self.count == 0:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return BytesIO(b'>id1\nACTG\n')
elif self.count == 1:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return StringIO('>id1\nACTG\n')
elif self.count == 2:
self.count += 1
return self.manager
elif self.count == 3:
self.count += 1
return StringIO(pyfaidxIndex.getvalue())
else:
self.test.fail(
'Open called too many times. Filename: %r, Args: %r, '
'Keyword args: %r.' % (filename, args, kwargs))

@contextmanager
def manager():
yield pyfaidxIndex

sideEffect = Open(self, manager()).sideEffect
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = sideEffect
reads = FastaFaiReads('filename.fasta')
error = "^'id2 not in filename\\.fasta\\.'"
assertRaisesRegex(self, KeyError, error, reads.__getitem__, 'id2')

def testOneRead(self):
"""
It must be possible to access a FASTA file with one read like a dict.
"""

pyfaidxIndex = StringIO()

class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.count = 0

def sideEffect(self, filename, *args, **kwargs):
if self.count == 0:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return BytesIO(b'>id1\nACTG\n')
elif self.count == 1:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return StringIO('>id1\nACTG\n')
elif self.count == 2:
self.count += 1
return self.manager
elif self.count == 3:
self.count += 1
return StringIO(pyfaidxIndex.getvalue())
else:
self.test.fail(
'Open called too many times. Filename: %r, Args: %r, '
'Keyword args: %r.' % (filename, args, kwargs))

@contextmanager
def manager():
yield pyfaidxIndex

sideEffect = Open(self, manager()).sideEffect
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = sideEffect
reads = FastaFaiReads('filename.fasta')
self.assertEqual(DNARead('id1', 'ACTG'), reads['id1'])
# Check that the fai index was built correctly.
self.assertEqual(pyfaidxIndex.getvalue(), 'id1\t4\t5\t4\t5\n')

def testTwoReads(self):
"""
It must be possible to access a FASTA file with two reads like a dict.
"""

pyfaidxIndex = StringIO()

class Open(object):
def __init__(self, test, manager):
self.test = test
self.manager = manager
self.count = 0

def sideEffect(self, filename, *args, **kwargs):
if self.count == 0:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return BytesIO(b'>id1\nACTG\n>id2\nAACCTTGG\n')
elif self.count == 1:
self.test.assertEqual('filename.fasta', filename)
self.count += 1
return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
elif self.count == 2:
self.count += 1
return self.manager
elif self.count == 3:
self.count += 1
return StringIO(pyfaidxIndex.getvalue())
else:
self.test.fail(
'Open called too many times. Filename: %r, Args: %r, '
'Keyword args: %r.' % (filename, args, kwargs))

@contextmanager
def manager():
yield pyfaidxIndex

sideEffect = Open(self, manager()).sideEffect
with patch.object(builtins, 'open') as mockMethod:
mockMethod.side_effect = sideEffect
reads = FastaFaiReads('filename.fasta')
self.assertEqual(DNARead('id1', 'ACTG'), reads['id1'])
self.assertEqual(DNARead('id2', 'AACCTTGG'), reads['id2'])
# Check that the fai index was built correctly.
self.assertEqual(pyfaidxIndex.getvalue(),
'id1\t4\t5\t4\t5\nid2\t8\t15\t8\t9\n')


class TestCombineReads(TestCase):
"""
Tests for the L{dark.fasta.combineReads} function.
Expand Down
12 changes: 0 additions & 12 deletions test/test_local_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,6 @@ class TestLocalAlign(TestCase):
With match +1, mismatch -1, gap open -1, gap extend -1 and
gap extend decay 0.0.
"""

def testNonDNAString(self):
"""
If the sequences do not consist of nucleotides, an exception
must be raised.
"""
self.skipTest('Non-DNA raising of ValueError is disabled.')
seq1 = Read('seq1', 'xxx')
seq2 = Read('seq2', 'yyy')
six.assertRaisesRegex(self, ValueError, 'Invalid DNA nucleotide: "X"',
LocalAlignment, seq1, seq2)

def testPositiveMismatch(self):
"""
If the mismatch value passed is positive, an exception
Expand Down
Loading