From f998fe447b8de6eeb9e75e1938eba5533e6eb7d9 Mon Sep 17 00:00:00 2001 From: Botond Sipos Date: Thu, 14 Nov 2019 09:39:15 +0000 Subject: [PATCH] Updated per-read report. --- scripts/cdna_classifier.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/cdna_classifier.py b/scripts/cdna_classifier.py index 9878469..f94e28d 100755 --- a/scripts/cdna_classifier.py +++ b/scripts/cdna_classifier.py @@ -79,23 +79,21 @@ def _update_stats(st, d_fh, segments, hits, usable_len, read): st["Classification"]["Unclassified"] += 1 st["UnclassHitNr"][len(hits)] += 1 if d_fh is not None: - d_fh.write("{}\t{}\t{}\t{}\t{}\n".format(read.Id, ".", ".", ".", ".")) + d_fh.write("{}\t{}\t0\t{}\t{}\t{}\n".format(read.Id, len(read.Seq), -1, -1, ".")) elif len(segments) == 1: st["Classification"]["Classified"] += 1 st["Strand"][segments[0].Strand] += 1 st["Unusable"][int(segments[0].Len / len(read.Seq) * 100)] += 1 if d_fh is not None: rs = segments[0] - sr_id = "{}:{}|{}".format(rs.Start, rs.End, read.Id) - d_fh.write("{}\t{}\t{}\t{}\t{}\n".format(read.Id, sr_id, rs.Start, rs.End, rs.Strand)) + d_fh.write("{}\t{}\t1\t{}\t{}\t{}\n".format(read.Id, len(read.Seq), rs.Start, rs.End, rs.Strand)) else: for rs in segments: st["Classification"]["Rescue"] += 1 st["RescueStrand"][rs.Strand] += 1 st["RescueHitNr"][len(hits)] += 1 if d_fh is not None: - sr_id = "{}:{}|{}".format(rs.Start, rs.End, read.Id) - d_fh.write("{}\t{}\t{}\t{}\t{}\n".format(read.Id, sr_id, rs.Start, rs.End, rs.Strand)) + d_fh.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(read.Id, len(read.Seq), len(segments), rs.Start, rs.End, rs.Strand)) st["Unusable"][len(read.Seq) - int(sum([s.Len for s in segments]))] += 1 st["RescueSegmentNr"][len(segments)] += 1 @@ -228,7 +226,7 @@ def _plot_stats(st, pdf): d_fh = None if args.D is not None: d_fh = open(args.D, "w") - d_fh.write("Read\tSegment\tStart\tEnd\tStrand\n") + d_fh.write("Read\tLength\tClassified\tStart\tEnd\tStrand\n") st = _new_stats() input_size = None