From 932e872af982c6358d153d81bf308821f8ebd8ff Mon Sep 17 00:00:00 2001 From: Sergey Koren Date: Mon, 13 May 2024 11:59:35 -0400 Subject: [PATCH] ensure we fail when conversion of hic pairs fails, don't track mis-sorted reads to save memory --- src/Snakefiles/8-hicPipeline.sm | 4 ++-- src/scripts/parse_sam_pairs.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/Snakefiles/8-hicPipeline.sm b/src/Snakefiles/8-hicPipeline.sm index 0471ae8b..0f5e2f98 100644 --- a/src/Snakefiles/8-hicPipeline.sm +++ b/src/Snakefiles/8-hicPipeline.sm @@ -386,8 +386,8 @@ rule transformBWA: cd 8-hicPipeline cat > ./transform_bwa.sh < ../{output.byread_mapping} EOF diff --git a/src/scripts/parse_sam_pairs.py b/src/scripts/parse_sam_pairs.py index 30e6ec24..fdb2f04a 100755 --- a/src/scripts/parse_sam_pairs.py +++ b/src/scripts/parse_sam_pairs.py @@ -33,25 +33,15 @@ def print_results(names): name = "" names = [ ] -seen = {} -out_of_order = 0 for line in input_stream: line=line.split() if name == "": name = line[0] if name != line[0]: - seen[name] = 1 print_results(names) name = line[0] names = [ ] - if name in seen: - print("Warning: read %s already seen but encountered it again, please confirm your bam file is sorted by read."%(name), file=sys.stderr) - out_of_order += 1 names.append("%s\t%s"%(line[0], line[2])) -if out_of_order > 1000: - print("Error: encountered too many unsorted reads (%d), exiting. Please confirm the input bam is sorted by read."%(out_of_order), file=sys.stderr) - sys.exit(1) - print_results(names)