Skip to content

Commit

Permalink
MRG: fix manysketch naming bug (#284)
Browse files Browse the repository at this point in the history
This bug caused sketch names to be set only for the first sketch for a given file, and _not_ set for any additional sketches. This means, if we were sketching at `k=21,k=31,k=51`, only the `k=21` sketches would have the name properly set.

- fixes #283
  • Loading branch information
bluegenes authored Mar 20, 2024
1 parent 8878f31 commit 34adf84
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/manysketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ pub fn manysketch(
sig.set_name(name);
// sourmash sets filename to last filename if merging fastas
sig.set_filename(last_filename.as_str());
set_name = true;
};
if moltype == "protein" {
sig.add_protein(&record.seq())
Expand All @@ -237,6 +236,9 @@ pub fn manysketch(
// if not force, panics with 'N' in dna sequence
}
});
if !set_name {
set_name = true;
}
}
Err(err) => eprintln!("Error while processing record: {:?}", err),
}
Expand Down
14 changes: 14 additions & 0 deletions src/python/tests/test_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ def test_manysketch_mult_k(runtmp):

assert len(sigs) == 6

names = [sig.name for sig in sigs]
print(names)
assert names.count('short') == 2
assert names.count('short2') == 2
assert names.count('short3') == 2


def test_manysketch_mult_k_2(runtmp):
fa_csv = runtmp.output('db-fa.txt')
Expand All @@ -115,6 +121,12 @@ def test_manysketch_mult_k_2(runtmp):

assert len(sigs) == 6

names = [sig.name for sig in sigs]
print(names)
assert names.count('short') == 2
assert names.count('short2') == 2
assert names.count('short3') == 2


def test_manysketch_mult_moltype(runtmp):
fa_csv = runtmp.output('db-fa.csv')
Expand Down Expand Up @@ -148,10 +160,12 @@ def test_manysketch_mult_moltype(runtmp):
assert sig.minhash.scaled == 1
assert sig.md5sum() == "1474578c5c46dd09da4c2df29cf86621"
else:
assert sig.name == 'short'
assert sig.minhash.ksize == 10
assert sig.minhash.scaled == 1
assert sig.md5sum() == "eb4467d11e0ecd2dbde4193bfc255310"
else:
assert sig.name in ['short', 'short2', 'short3']
assert sig.minhash.ksize == 21
assert sig.minhash.scaled == 1
assert sig.minhash.is_dna
Expand Down

0 comments on commit 34adf84

Please sign in to comment.