Skip to content

Commit

Permalink
sort winnowmap output to avoid non-determinism
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Feb 7, 2024
1 parent 9e94227 commit af86243
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/Snakefiles/3-alignTips.sm
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ cat > ./combineONT.sh <<EOF
#!/bin/sh
set -e

cat {params.alignments} \\\\
> ../{output.alignments}
cat {params.alignments} | \\\\
sort -sk1,1 -T . > ../{output.alignments}

EOF

Expand Down
5 changes: 4 additions & 1 deletion src/scripts/select_best_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ def mad(values, mean):

if currID != parts[0]:
if len(idys) > 0:
assert(currID not in read_alignment_idy)
read_alignment_idy[currID] = [statistics.median(idys), mad(idys, statistics.mean(idys))]
#if len(idys) > 1: sys.stderr.write("Adding info for read %s which has mean %s and list %s and mad is %s sd %s\n"%(currID, statistics.median(idys), idys, mad(idys, statistics.mean(idys)), statistics.stdev(idys)))
idys.clear()

currID = parts[0]
idys.append(idy)
if len(idys) > 0: read_alignment_idy[currID] = [statistics.median(idys), mad(idys, statistics.mean(idys))]
if len(idys) > 0:
assert(currID not in read_alignment_idy)
read_alignment_idy[currID] = [statistics.median(idys), mad(idys, statistics.mean(idys))]

# two pass, first records median alignment identity for a read, second only keeps those within some distand from median
reads=set()
Expand Down

0 comments on commit af86243

Please sign in to comment.