From 0c69e408ffe1b29eec4b6f91fcf075cdf5b5682f Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Thu, 25 Jul 2024 17:07:13 -0700 Subject: [PATCH] finished memory opt --- tests/integration_tests.ipynb | 2355 +++++++++++++++++++++++++++++++-- 1 file changed, 2260 insertions(+), 95 deletions(-) diff --git a/tests/integration_tests.ipynb b/tests/integration_tests.ipynb index 42d1249..32a57bd 100644 --- a/tests/integration_tests.ipynb +++ b/tests/integration_tests.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "id": "9f2684b1-dbad-45d5-bdb5-b83989bec7dc", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "63b5feff-aa8a-417c-8c93-9b9d74e5dee9", "metadata": {}, "outputs": [], @@ -23,22 +23,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "id": "f6012705-9d34-4997-8681-c7bbcc4f008b", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'pybedtools' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[43mpybedtools\u001b[49m\u001b[38;5;241m.\u001b[39mBedTool()\n", - "\u001b[0;31mNameError\u001b[0m: name 'pybedtools' is not defined" - ] - } - ], + "outputs": [], "source": [ "b = pybedtools.BedTool()" ] @@ -72,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "ba12c2b8-2ccb-4866-86c8-a2284fd1229f", "metadata": { "scrolled": true @@ -272,85 +260,2262 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:02<00:00, 5.37it/s]\n" + "100%|██████████| 16/16 [00:02<00:00, 5.37it/s]\n", + "100%|██████████| 16/16 [00:03<00:00, 5.19it/s]\n" ] - } - ], - "source": [ - "%%bash\n", - "\n", - "pwd\n", - "echo \"Running\"\n", - "\n", - "cd ..\n", - "pwd\n", - "\n", - "tests_folder=\"tests/strandedness_tests/\"\n", - "\n", - "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", - "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", - "\n", - "for t in \"unstranded_pair_test\" \"F1R2_pair_test-single_end_mode\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", - "do\n", - " echo $t\n", - " echo \"Removing old files...\"\n", - " rm $tests_folder$t/* -r\n", - "\n", - " echo \"Running tests...\"\n", - " bash tests/strandedness_tests/scripts/$t.sh $mypython\n", - " \n", - "done\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57192b28-b498-4f81-bdcd-0d4d14f644ff", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "%%bash\n", - "\n", - "pwd\n", - "echo \"Running\"\n", - "\n", - "cd ..\n", - "pwd\n", - "\n", - "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", - "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", - "\n", - "tests_folder=\"tests/singlecell_tests/\"\n", - "for t in \"only_5_cells_test\" \"long_read_sc_test\"\n", - "\n", - "do\n", - " echo $t\n", - " echo \"Removing old files...\"\n", - " rm $tests_folder$t/* -r\n", - "\n", - " echo \"Running old tests...\"\n", - " bash tests/singlecell_tests/scripts/$t.sh $mypython\n", - " \n", - "done\n" - ] - }, - { - "cell_type": "markdown", - "id": "8877b85c-bb47-45e7-9ad1-05667d0fbf53", - "metadata": {}, - "source": [ - "# Integration test automatic checks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387", - "metadata": {}, - "outputs": [], + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 4\n", + "filtered 3\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.054 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t3\n", + "\tNumber of unique edit sites:\n", + "\t3\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "3 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "1 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.563305MB; Peak: 22.675814MB\n", + "Time elapsed: 8.57s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "F1R2_pair_test\n", + "Removing old files...\n", + "Running tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\tchr17\n", + "\tNumber of intervals:\t16\n", + "\tCores:\t16\n", + "\tVerbose:\tTrue\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 16 subsets...\n", + "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", + "\tContig chr17\n", + "16 total jobs\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 26A20G52\n", + "CIGAR tag 1S100M\n", + "is_reverse False\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", + "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t99\t#16\t43044305\t255\t1S100M\t#16\t43044312\t107\tGTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\tarray('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\t[('MD', '26A20G52'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", + "reverse_or_forward: -\n", + "----------------------------\n", + "MD tag: 26A20G52\n", + "CIGAR string 1S100M\n", + "Reference seq: TGCTACCAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Aligned seq: GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities: array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 100)]\n", + "Aligned sequence before clipping (if needed):\n", + " GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities before clipping:\n", + " array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['26', '20', '52']\n", + "[26, 47, 100]\n", + "Indicated reference seq:\n", + " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "Fixed aligned seq:\n", + " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Finalized fixed aligned seq:\n", + " tgctaccaagtttatttgcagtgttaCcagcacaacatttacaaaacAtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "Indicated qualities:\n", + " 34343434343434343434263434343434343434342634343434261234343434342626343434343434343434341234341234343434343434123434343434343412343434343412343434343434343434342634342634343426343426343434342634343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['C', 'A']\n", + "ref bases ['A', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['C', 'A'], ref bases: ['A', 'G']\n", + "Getting info: C A 12 27\n", + "Getting info: A G 12 48\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 40G3T55\n", + "CIGAR tag 100M1S\n", + "is_reverse True\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", + "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t147\t#16\t43044312\t255\t100M1S\t#16\t43044305\t-107\tAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\tarray('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '40G3T55'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", + "reverse_or_forward: -\n", + "----------------------------\n", + "MD tag: 40G3T55\n", + "CIGAR string 100M1S\n", + "Reference seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", + "Aligned seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 100), (4, 1)]\n", + "Aligned sequence before clipping (if needed):\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['40', '3', '55']\n", + "[40, 44, 100]\n", + "Indicated reference seq:\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", + "Fixed aligned seq:\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", + "Finalized fixed aligned seq:\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", + "Indicated qualities:\n", + " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['A', 'G']\n", + "ref bases ['G', 'T']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "Getting info: A G 34 41\n", + "Getting info: G T 12 45\n", + "chr17:008, total reads: 2, counts_df: chr17\n", + "edited 2\n", + "total_edits 4\n", + "Reads processed:\t2\n", + "Time to process reads in min:\t0.05194\n", + "Read Summary:\n", + "edited 2\n", + "total_edits 4\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit chr17_000_0_5203591, 0/16...\n", + "\tsplit chr17_010_52035910_57239501, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n", + "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43044352~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43044331~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43044356~~~~~~~`\n", + "coverage_at_pos 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.29it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 4\n", + "filtered 3\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.052 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t3\n", + "\tNumber of unique edit sites:\n", + "\t3\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "3 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "1 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.564878MB; Peak: 22.677319MB\n", + "Time elapsed: 8.48s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "F2R1_end_second_in_pair_test\n", + "Removing old files...\n", + "Running tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F2R1_end_second_in_pair.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\tchr17\n", + "\tNumber of intervals:\t16\n", + "\tCores:\t16\n", + "\tVerbose:\tTrue\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 16 subsets...\n", + "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", + "\tContig chr17\n", + "16 total jobs\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 10A0G0A84\n", + "CIGAR tag 13M198556N84M1S\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:1112:41030:25422\n", + "VH01429:22:AACFJ5NHV:1:1112:41030:25422\t163\t#16\t43001705\t255\t13M198556N84M1S\t#16\t43200271\t198653\tGGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '10A0G0A84'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 172), ('XS', '+')]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 10A0G0A84\n", + "CIGAR string 13M198556N84M1S\n", + "Reference seq: GGCTGTCATGAGAGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", + "Aligned seq: GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 13), (3, 198556), (0, 84), (4, 1)]\n", + "Aligned sequence before clipping (if needed):\n", + " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 13), (3, 198556), (0, 84)]\n", + "Aligned sequence after clipping (if needed):\n", + " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['10', '0', '0', '84']\n", + "[10, 11, 12, 97]\n", + "Indicated reference seq:\n", + " ggctgtcatgAGAgaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ggctgtcatgAGA198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", + "Fixed aligned seq:\n", + " GGCTGTCATGGAG198556*nGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", + "Finalized fixed aligned seq:\n", + " ggctgtcatgGAG198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", + "Indicated qualities:\n", + " 34343434343434343434263434343434343434342634342634343434343434343434343434343434343434343434123434343434343426343434342634343434343434342634343434342634343434123434123434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A', 'G']\n", + "ref bases ['A', 'G', 'A']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A', 'G'], ref bases: ['A', 'G', 'A']\n", + "Getting info: G A 26 11\n", + "Getting info: A G 34 12\n", + "Getting info: G A 34 13\n", + "chr17:008, total reads: 1, counts_df: chr17\n", + "edited 1\n", + "total_edits 3\n", + "Reads processed:\t1\n", + "Time to process reads in min:\t0.05321\n", + "Read Summary:\n", + "edited 1\n", + "total_edits 3\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit chr17_000_0_5203591, 0/16...\n", + "\tsplit chr17_010_52035910_57239501, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n", + "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.20it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43001715~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43001716~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + " chr17:43001717~~~~~~~`\n", + "coverage_at_pos 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.28it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 3\n", + "filtered 3\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.053 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t3\n", + "\tNumber of unique edit sites:\n", + "\t3\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "3 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "0 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.710648MB; Peak: 22.821244MB\n", + "Time elapsed: 8.63s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "same_pos_dif_reads_test\n", + "Removing old files...\n", + "Running tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/same_pos_dif_reads.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\tchr17\n", + "\tNumber of intervals:\t16\n", + "\tCores:\t16\n", + "\tVerbose:\tTrue\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 16 subsets...\n", + "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", + "\tContig chr17\n", + "16 total jobs\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 55C43\n", + "CIGAR tag 1S97M17144N2M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:2:2606:39477:40189\n", + "VH01429:22:AACFJ5NHV:2:2606:39477:40189\t163\t#16\t83195127\t255\t1S97M17144N2M\t#16\t83195141\t17258\tTCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\t[('MD', '55C43'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 197), ('XS', '+')]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 55C43\n", + "CIGAR string 1S97M17144N2M\n", + "Reference seq: CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTCCCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", + "Aligned seq: TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 97), (3, 17144), (0, 2)]\n", + "Aligned sequence before clipping (if needed):\n", + " TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 97), (3, 17144), (0, 2)]\n", + "Aligned sequence after clipping (if needed):\n", + " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['55', '43']\n", + "[55, 99]\n", + "Indicated reference seq:\n", + " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctggga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", + "Fixed aligned seq:\n", + " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGG17144*nGA\n", + "Finalized fixed aligned seq:\n", + " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctAccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", + "Indicated qualities:\n", + " 343434343434342626343434343434343434343434263434263434343434343434343434343434263434343434343434343434343434341234341234343426343434343434343434343434343434343434343434343434263434343434343412343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['A']\n", + "ref bases ['C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['A'], ref bases: ['C']\n", + "Getting info: A C 12 56\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 86C13\n", + "CIGAR tag 1S17M4579N83M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", + "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t163\t#16\t83195207\t255\t1S17M4579N83M\t#16\t83199866\t4757\tGTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '86C13'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191), ('XS', '+')]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 86C13\n", + "CIGAR string 1S17M4579N83M\n", + "Reference seq: TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAAT\n", + "Aligned seq: GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 17), (3, 4579), (0, 83)]\n", + "Aligned sequence before clipping (if needed):\n", + " GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 17), (3, 4579), (0, 83)]\n", + "Aligned sequence after clipping (if needed):\n", + " TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['86', '13']\n", + "[86, 100]\n", + "Indicated reference seq:\n", + " tgtggggacagatctggaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", + "Fixed aligned seq:\n", + " TGTGGGGACAGATCTGG4579*nAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", + "Finalized fixed aligned seq:\n", + " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaat\n", + "Indicated qualities:\n", + " 34343434343434343434342634342634343434343434343434343434343434343434343434343412343434343434343434343434343434343434343434343434343434343434341234343434343434343434343426343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G']\n", + "ref bases ['C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G'], ref bases: ['C']\n", + "Getting info: G C 34 4666\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 95C4\n", + "CIGAR tag 1S100M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", + "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t163\t#16\t83199777\t255\t1S100M\t#16\t83199839\t163\tGCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\t[('MD', '95C4'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 95C4\n", + "CIGAR string 1S100M\n", + "Reference seq: CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTG\n", + "Aligned seq: GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 100)]\n", + "Aligned sequence before clipping (if needed):\n", + " GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['95', '4']\n", + "[95, 100]\n", + "Indicated reference seq:\n", + " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", + "Fixed aligned seq:\n", + " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", + "Finalized fixed aligned seq:\n", + " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtg\n", + "Indicated qualities:\n", + " 34343434343434343434343434343434123434343434343434343434343434343434343434343434343426343434343434343434343434343434343434342634343434343426343434343434343434343434343434343434343434343426343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G']\n", + "ref bases ['C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G'], ref bases: ['C']\n", + "Getting info: G C 34 96\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 92C8\n", + "CIGAR tag 101M\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:1208:38151:26028\n", + "VH01429:22:AACFJ5NHV:2:1208:38151:26028\t83\t#16\t83199780\t255\t101M\t#16\t83199711\t-170\tTTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\t[('MD', '92C8'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 195)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 92C8\n", + "CIGAR string 101M\n", + "Reference seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGA\n", + "Aligned seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence before clipping (if needed):\n", + " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence after clipping (if needed):\n", + " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['92', '8']\n", + "[92, 101]\n", + "Indicated reference seq:\n", + " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", + "Fixed aligned seq:\n", + " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", + "Finalized fixed aligned seq:\n", + " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcga\n", + "Indicated qualities:\n", + " 3434343434343434343426343426343434343434343426343434343434343434343434343434343434343434343434343434343434343434343434263434343434343426343434343434343434343434343434343434343434343434343434343426343426\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G']\n", + "ref bases ['C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G'], ref bases: ['C']\n", + "Getting info: G C 34 93\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 60C37\n", + "CIGAR tag 1S98M2S\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:2112:47146:35399\n", + "VH01429:22:AACFJ5NHV:1:2112:47146:35399\t163\t#16\t83199812\t255\t1S98M2S\t#16\t83199946\t232\tTCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '60C37'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 192)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 60C37\n", + "CIGAR string 1S98M2S\n", + "Reference seq: CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", + "Aligned seq: TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 98), (4, 2)]\n", + "Aligned sequence before clipping (if needed):\n", + " TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 98)]\n", + "Aligned sequence after clipping (if needed):\n", + " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['60', '37']\n", + "[60, 98]\n", + "Indicated reference seq:\n", + " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", + "Fixed aligned seq:\n", + " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", + "Finalized fixed aligned seq:\n", + " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgca\n", + "Indicated qualities:\n", + " 3434343434343426343434343434343434343434343434342634343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G']\n", + "ref bases ['C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G'], ref bases: ['C']\n", + "Getting info: G C 34 61\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 5C50C37G5\n", + "CIGAR tag 1S100M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", + "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t163\t#16\t83199816\t255\t1S100M\t#16\t83199822\t105\tTCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\tarray('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\t[('MD', '5C50C37G5'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 187)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 5C50C37G5\n", + "CIGAR string 1S100M\n", + "Reference seq: CAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTT\n", + "Aligned seq: TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", + "Qualities: array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 100)]\n", + "Aligned sequence before clipping (if needed):\n", + " TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", + "Qualities before clipping:\n", + " array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", + "Qualities after clipping:\n", + " array('B', [26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['5', '50', '37', '5']\n", + "[5, 56, 94, 100]\n", + "Indicated reference seq:\n", + " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", + "Fixed aligned seq:\n", + " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", + "Finalized fixed aligned seq:\n", + " caaacTtgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactt\n", + "Indicated qualities:\n", + " 26343434343434343412342634343412343412341226263434343434341234343434343434343434123434343434343434343434343434343434343434342634343434343412343434343434263434343434343434343434343434343434343434342626\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['T', 'G', 'A']\n", + "ref bases ['C', 'C', 'G']\n", + "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['T', 'G', 'A'], ref bases: ['C', 'C', 'G']\n", + "Getting info: T C 34 6\n", + "Getting info: G C 34 57\n", + "Getting info: A G 34 95\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 50C37G10\n", + "CIGAR tag 1S99M1S\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", + "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t83\t#16\t83199822\t255\t1S99M1S\t#16\t83199816\t-105\tTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '50C37G10'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 187)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 50C37G10\n", + "CIGAR string 1S99M1S\n", + "Reference seq: TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTG\n", + "Aligned seq: TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 99), (4, 1)]\n", + "Aligned sequence before clipping (if needed):\n", + " TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 99)]\n", + "Aligned sequence after clipping (if needed):\n", + " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['50', '37', '10']\n", + "[50, 88, 99]\n", + "Indicated reference seq:\n", + " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", + "Fixed aligned seq:\n", + " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", + "Finalized fixed aligned seq:\n", + " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctg\n", + "Indicated qualities:\n", + " 343434343434341234123434343434343434343434343434343434343412343426263434121234343434343434343434343434343412342634342634341234123434343434341234343434343434343434343434343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 51\n", + "Getting info: A G 34 89\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 48C37G14\n", + "CIGAR tag 101M\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:1:1403:49513:32843\n", + "VH01429:22:AACFJ5NHV:1:1403:49513:32843\t83\t#16\t83199824\t255\t101M\t#16\t83199748\t-177\tACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '48C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 194)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 48C37G14\n", + "CIGAR string 101M\n", + "Reference seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", + "Aligned seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence before clipping (if needed):\n", + " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence after clipping (if needed):\n", + " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['48', '37', '14']\n", + "[48, 86, 101]\n", + "Indicated reference seq:\n", + " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", + "Fixed aligned seq:\n", + " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Finalized fixed aligned seq:\n", + " actcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", + "Indicated qualities:\n", + " 3434343434343434343434343434341234123434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 49\n", + "Getting info: A G 34 87\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 47C37G14\n", + "CIGAR tag 1S100M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", + "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t163\t#16\t83199825\t255\t1S100M\t#16\t83199839\t115\tTCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '47C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 47C37G14\n", + "CIGAR string 1S100M\n", + "Reference seq: CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", + "Aligned seq: TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities: array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 100)]\n", + "Aligned sequence before clipping (if needed):\n", + " TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities before clipping:\n", + " array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Qualities after clipping:\n", + " array('B', [12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['47', '37', '14']\n", + "[47, 85, 100]\n", + "Indicated reference seq:\n", + " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", + "Fixed aligned seq:\n", + " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", + "Finalized fixed aligned seq:\n", + " ctcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", + "Indicated qualities:\n", + " 12343434123434263434343412343434263434343434343434343434343434123434343426343426343434343434343434341226343434343434343434263434263434263412341226123434261234343434263434263426343434341234343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 48\n", + "Getting info: A G 26 86\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 2T9T0T26G4C37G1A11\n", + "CIGAR tag 3S97M\n", + "is_reverse False\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", + "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t163\t#16\t83199827\t255\t3S97M\t#16\t83199827\t100\tCCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\tarray('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '2T9T0T26G4C37G1A11'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 7), ('AS', 177)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 2T9T0T26G4C37G1A11\n", + "CIGAR string 3S97M\n", + "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAA\n", + "Aligned seq: CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", + "Qualities: array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 3), (0, 97)]\n", + "Aligned sequence before clipping (if needed):\n", + " CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", + "Qualities before clipping:\n", + " array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 97)]\n", + "Aligned sequence after clipping (if needed):\n", + " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", + "Qualities after clipping:\n", + " array('B', [12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['2', '9', '0', '26', '4', '37', '1', '11']\n", + "[2, 12, 13, 40, 45, 83, 85, 97]\n", + "Indicated reference seq:\n", + " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", + "Fixed aligned seq:\n", + " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", + "Finalized fixed aligned seq:\n", + " caAatttccaacAAtattaatttttgcaagacttcggaggCtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtTctttcctgtaa\n", + "Indicated qualities:\n", + " 12121234123434341234341212123434341212341212341212343434342634341226341234263412123434343434121226263434343434342634341234122634343434343434343412343434343434343434343434123434342634343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['A', 'A', 'A', 'C', 'G', 'A', 'T']\n", + "ref bases ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['A', 'A', 'A', 'C', 'G', 'A', 'T'], ref bases: ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", + "Getting info: A T 12 3\n", + "Getting info: A T 12 13\n", + "Getting info: A T 12 14\n", + "Getting info: C G 12 41\n", + "Getting info: G C 34 46\n", + "Getting info: A G 34 84\n", + "Getting info: T A 12 86\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 45C37G16\n", + "CIGAR tag 100M1S\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", + "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t83\t#16\t83199827\t255\t100M1S\t#16\t83199827\t-100\tCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\tarray('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\t[('MD', '45C37G16'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 177)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 45C37G16\n", + "CIGAR string 100M1S\n", + "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAAC\n", + "Aligned seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", + "Qualities: array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 100), (4, 1)]\n", + "Aligned sequence before clipping (if needed):\n", + " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", + "Qualities before clipping:\n", + " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", + "Qualities after clipping:\n", + " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['45', '37', '16']\n", + "[45, 83, 100]\n", + "Indicated reference seq:\n", + " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", + "Fixed aligned seq:\n", + " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", + "Finalized fixed aligned seq:\n", + " catatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaac\n", + "Indicated qualities:\n", + " 34261234343434263434342634343434342634343412123434343426343434343434343434343434343434341226343434343434343434343434343426342612343434343434343434341234343434123434342612343434343434343434343412343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 26 46\n", + "Getting info: A G 26 84\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 40C37G20\n", + "CIGAR tag 99M2S\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:1104:58090:46985\n", + "VH01429:22:AACFJ5NHV:2:1104:58090:46985\t83\t#16\t83199832\t255\t99M2S\t#16\t83195148\t-4783\tTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\t[('MD', '40C37G20'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 192), ('XS', '+')]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 40C37G20\n", + "CIGAR string 99M2S\n", + "Reference seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGAC\n", + "Aligned seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 99), (4, 2)]\n", + "Aligned sequence before clipping (if needed):\n", + " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 99)]\n", + "Aligned sequence after clipping (if needed):\n", + " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['40', '37', '20']\n", + "[40, 78, 99]\n", + "Indicated reference seq:\n", + " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", + "Fixed aligned seq:\n", + " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", + "Finalized fixed aligned seq:\n", + " ttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgac\n", + "Indicated qualities:\n", + " 343434343434343434263412263434123434343434341234342634343434343434343434343434343434343434343434342634343434343434263434343434343434343434262634343434343434342626123434343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 41\n", + "Getting info: A G 34 79\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 33C37G19C9\n", + "CIGAR tag 101M\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", + "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t83\t#16\t83199839\t255\t101M\t#16\t83199777\t-163\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '33C37G19C9'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 191)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 33C37G19C9\n", + "CIGAR string 101M\n", + "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", + "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence before clipping (if needed):\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence after clipping (if needed):\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['33', '37', '19', '9']\n", + "[33, 71, 91, 101]\n", + "Indicated reference seq:\n", + " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", + "Fixed aligned seq:\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", + "Finalized fixed aligned seq:\n", + " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgaTtaaaggaga\n", + "Indicated qualities:\n", + " 3434343426343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343412343434343434343434343412\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A', 'T']\n", + "ref bases ['C', 'G', 'C']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A', 'T'], ref bases: ['C', 'G', 'C']\n", + "Getting info: G C 34 34\n", + "Getting info: A G 34 72\n", + "Getting info: T C 34 92\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 33C37G29\n", + "CIGAR tag 101M\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", + "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t83\t#16\t83199839\t255\t101M\t#16\t83199825\t-115\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\t[('MD', '33C37G29'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 33C37G29\n", + "CIGAR string 101M\n", + "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence before clipping (if needed):\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 101)]\n", + "Aligned sequence after clipping (if needed):\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['33', '37', '29']\n", + "[33, 71, 101]\n", + "Indicated reference seq:\n", + " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", + "Fixed aligned seq:\n", + " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", + "Finalized fixed aligned seq:\n", + " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggaga\n", + "Indicated qualities:\n", + " 3434343426343426263434343434343434343434343426343434343434343412343434343434263412343434343434343434343434343434342634341234343434343434343434342626343434342634343434342634343434343434343434341234343426\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 34\n", + "Getting info: A G 34 72\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 6C37G53\n", + "CIGAR tag 98M2S\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", + "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t83\t#16\t83199866\t255\t98M2S\t#16\t83195207\t-4757\tGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\t[('MD', '6C37G53'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191), ('XS', '+')]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 6C37G53\n", + "CIGAR string 98M2S\n", + "Reference seq: GGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", + "Aligned seq: GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(0, 98), (4, 2)]\n", + "Aligned sequence before clipping (if needed):\n", + " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", + "Qualities before clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 98)]\n", + "Aligned sequence after clipping (if needed):\n", + " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['6', '37', '53']\n", + "[6, 44, 98]\n", + "Indicated reference seq:\n", + " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", + "Fixed aligned seq:\n", + " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", + "Finalized fixed aligned seq:\n", + " ggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", + "Indicated qualities:\n", + " 3434343434343434343434343434343434343426343434343434343434343434343434343434343434343434343434343434263434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'A']\n", + "ref bases ['C', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", + "Getting info: G C 34 7\n", + "Getting info: A G 34 45\n", + "chr17:015, total reads: 16, counts_df: chr17\n", + "edited 15\n", + "no_edits 1\n", + "total_edits 32\n", + "Reads processed:\t16\n", + "Time to process reads in min:\t0.05393\n", + "Read Summary:\n", + "edited 15\n", + "no_edits 1\n", + "total_edits 32\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit chr17_000_0_5203591, 0/16...\n", + "\tsplit chr17_010_52035910_57239501, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n", + "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.18it/s]\n", + "100%|██████████| 16/16 [00:03<00:00, 5.24it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 32\n", + "filtered 24\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.053 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t24\n", + "\tNumber of unique edit sites:\n", + "\t10\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "10 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "2 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.710238MB; Peak: 22.819935MB\n", + "Time elapsed: 8.73s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "tax1bp3_chr17_3665556_read_test\n", + "Removing old files...\n", + "Running tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/tax1bp3_chr17_3665556_read.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\tchr17\n", + "\tNumber of intervals:\t16\n", + "\tCores:\t16\n", + "\tVerbose:\tTrue\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 16 subsets...\n", + "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", + "\tContig chr17\n", + "16 total jobs\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 2C1C6G3A1G4G47\n", + "CIGAR tag 28S70M\n", + "is_reverse True\n", + "is_read1 True\n", + "is_read2 False\n", + "is_paired True\n", + "is_proper_pair True\n", + "mate_is_reverse False\n", + "read id VH01429:22:AACFJ5NHV:2:2414:21053:15521\n", + "VH01429:22:AACFJ5NHV:2:2414:21053:15521\t83\t#16\t3665539\t255\t28S70M\t#16\t3665452\t-157\tCACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\tarray('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '2C1C6G3A1G4G47'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 6), ('AS', 151)]\n", + "reverse_or_forward: +\n", + "----------------------------\n", + "MD tag: 2C1C6G3A1G4G47\n", + "CIGAR string 28S70M\n", + "Reference seq: AACGCGTGAAGGAAAATGATCAGAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + "Aligned seq: CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + "Qualities: array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 28), (0, 70)]\n", + "Aligned sequence before clipping (if needed):\n", + " CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + "Qualities before clipping:\n", + " array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 70)]\n", + "Aligned sequence after clipping (if needed):\n", + " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + "Qualities after clipping:\n", + " array('B', [34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['2', '1', '6', '3', '1', '4', '47']\n", + "[2, 4, 11, 15, 17, 22, 70]\n", + "Indicated reference seq:\n", + " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + "Fixed aligned seq:\n", + " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + "Finalized fixed aligned seq:\n", + " aaGgTgtgaagTaaaTtAatcaTaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + "Indicated qualities:\n", + " 34261226341234123412263426123412341212121212123434343412123434341234343434343434343434343434343434343434343434343434343426263434343434343412\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['G', 'T', 'T', 'T', 'A', 'T']\n", + "ref bases ['C', 'C', 'G', 'A', 'G', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['G', 'T', 'T', 'T', 'A', 'T'], ref bases: ['C', 'C', 'G', 'A', 'G', 'G']\n", + "Getting info: G C 12 3\n", + "Getting info: T C 34 5\n", + "Getting info: T G 34 12\n", + "Getting info: T A 12 16\n", + "Getting info: A G 12 18\n", + "Getting info: T G 12 23\n", + "chr17:000, total reads: 1, counts_df: chr17\n", + "edited 1\n", + "total_edits 6\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "Reads processed:\t1\n", + "Time to process reads in min:\t0.05173\n", + "Read Summary:\n", + "edited 1\n", + "total_edits 6\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit chr17_000_0_5203591, 0/16...\n", + "\tsplit chr17_010_52035910_57239501, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n", + "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:02<00:00, 5.34it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665541~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665543~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665550~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665554~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665556~~~~~~~`\n", + "coverage_at_pos 1\n", + "~~~~~~\n", + "!!!!PAIRED END!!!!!\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + " chr17:3665561~~~~~~~`\n", + "coverage_at_pos 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.23it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 6\n", + "filtered 6\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.053 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t6\n", + "\tNumber of unique edit sites:\n", + "\t6\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "6 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "1 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.685883MB; Peak: 22.796221MB\n", + "Time elapsed: 8.58s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "pair_test\n", + "Removing old files...\n", + "Running tests...\n", + "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/pair_example_18_49488551_49590000.sorted.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\t18\n", + "\tNumber of intervals:\t1\n", + "\tCores:\t1\n", + "\tVerbose:\tFalse\n", + "\tKeep intermediate files:\tTrue\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 1 subsets...\n", + "\tContig 18\n", + "1 total jobs\n", + "Reads processed:\t10\n", + "Time to process reads in min:\t0.02888\n", + "Read Summary:\n", + "edited 6\n", + "no_edits 2\n", + "secondary 2\n", + "total_edits 6\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit 18_000_0_80373285, 0/1...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:01<00:00, 1.72s/it]\n", + "100%|██████████| 1/1 [00:01<00:00, 1.28s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 6\n", + "filtered 4\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.022 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t4\n", + "\tNumber of unique edit sites:\n", + "\t2\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "2 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "2 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.601166MB; Peak: 22.711632MB\n", + "Time elapsed: 5.21s\n", + "++++++\n", + "Done!\n", + "++++++\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "pwd\n", + "echo \"Running\"\n", + "\n", + "cd ..\n", + "pwd\n", + "\n", + "tests_folder=\"tests/strandedness_tests/\"\n", + "\n", + "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", + "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", + "\n", + "for t in \"unstranded_pair_test\" \"F1R2_pair_test-single_end_mode\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", + "do\n", + " echo $t\n", + " echo \"Removing old files...\"\n", + " rm $tests_folder$t/* -r\n", + "\n", + " echo \"Running tests...\"\n", + " bash tests/strandedness_tests/scripts/$t.sh $mypython\n", + " \n", + "done\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "57192b28-b498-4f81-bdcd-0d4d14f644ff", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/tscc/projects/ps-yeolab3/ekofman/sailor2/tests\n", + "Running\n", + "/tscc/projects/ps-yeolab3/ekofman/sailor2\n", + "only_5_cells_test\n", + "Removing old files...\n", + "Running old tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/bams/9_3000526_only_5_cells.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/only_5_cells_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tCB\n", + "\tPaired End:\tFalse\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tFalse\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\t9\n", + "\tNumber of intervals:\t16\n", + "\tCores:\t16\n", + "\tVerbose:\tFalse\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/only_5_cells_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 16 subsets...\n", + "\tContig 9\n", + "16 total jobs\n", + "Reconfiguring contig 9\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:03<00:00, 5.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reads processed:\t31\n", + "Time to process reads in min:\t0.05208\n", + "Read Summary:\n", + "edited 31\n", + "total_edits 99\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit 9_000_0_7787195, 0/16...\n", + "\tsplit 9_010_77871950_85659145, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 4/4 [00:03<00:00, 1.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 99\n", + "filtered 99\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.052 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t99\n", + "\tNumber of unique edit sites:\n", + "\t73\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.918925MB; Peak: 23.026284MB\n", + "Time elapsed: 11.90s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n", + "long_read_sc_test\n", + "Removing old files...\n", + "Running old tests...\n", + "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/LR_single_cell.md.subset.filtered.sorted.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/long_read_sc_test\n", + "\tBarcode whitelist:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/sc_lr_barcodes.tsv.gz\n", + "\tStrandedness:\t2\n", + "\tBarcode Tag:\tIB\n", + "\tPaired End:\tFalse\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tFalse\n", + "\tBedgraphs:\t[]\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tMaximum edits per read:\tNone\n", + "\tContigs:\t6\n", + "\tNumber of intervals:\t4\n", + "\tCores:\t16\n", + "\tVerbose:\tFalse\n", + "\tKeep intermediate files:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/long_read_sc_test is not empty\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "Barcodes in whitelist: 3\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 4 subsets...\n", + "\tContig 6\n", + "4 total jobs\n", + "Reconfiguring contig 6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 4/4 [00:02<00:00, 1.35it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reads processed:\t13\n", + "Time to process reads in min:\t0.05166\n", + "Read Summary:\n", + "edited 3\n", + "no_edits 10\n", + "total_edits 3\n", + "dtype: int64\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit 6_000_0_37434137, 0/4...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 400/400 [00:05<00:00, 77.44it/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "original 3\n", + "filtered 3\n", + "dtype: int64\n", + "Total time to calculate coverage: 0.089 minutes\n", + "Filtering..\n", + "\tNumber of edits after filtering:\n", + "\t3\n", + "\tNumber of unique edit sites:\n", + "\t3\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.792386MB; Peak: 22.9056MB\n", + "Time elapsed: 24.85s\n", + "-------------------------------\n", + "Deleting intermediate files...\n", + "-------------------------------\n", + "++++++\n", + "Done!\n", + "++++++\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "pwd\n", + "echo \"Running\"\n", + "\n", + "cd ..\n", + "pwd\n", + "\n", + "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", + "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", + "\n", + "tests_folder=\"tests/singlecell_tests/\"\n", + "for t in \"only_5_cells_test\" \"long_read_sc_test\"\n", + "\n", + "do\n", + " echo $t\n", + " echo \"Removing old files...\"\n", + " rm $tests_folder$t/* -r\n", + "\n", + " echo \"Running old tests...\"\n", + " bash tests/singlecell_tests/scripts/$t.sh $mypython\n", + " \n", + "done\n" + ] + }, + { + "cell_type": "markdown", + "id": "8877b85c-bb47-45e7-9ad1-05667d0fbf53", + "metadata": {}, + "source": [ + "# Integration test automatic checks" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for unstranded_pair_test\n", + "\tExpecting: {'contig': 'Citrine.dna', 'position': 435, 'count': 22, 'coverage': 22, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '+'}\n", + "\n", + "\t >>> unstranded_pair_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for pair_test\n", + "\tExpecting: {'contig': '18', 'position': 49491556, 'count': 2, 'coverage': 2, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'RPL17-C18orf32,RPL17', 'feature_strand': '-,-'}\n", + "\n", + "\t >>> pair_test passed! <<<\n", + "\n", + "\tExpecting: {'contig': '18', 'position': 49567494, 'count': 2, 'coverage': 2, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '+', 'feature_name': 'LIPG', 'feature_strand': '+'}\n", + "\n", + "\t >>> pair_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for F1R2_pair_test\n", + "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", + "\n", + "\t >>> F1R2_pair_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for F1R2_pair_test-single_end_mode\n", + "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 2, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", + "\n", + "\t >>> F1R2_pair_test-single_end_mode passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for F2R1_end_second_in_pair_test\n", + "\tExpecting: {'contig': 'chr17', 'position': 43001716, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'G>A', 'strand': '+', 'feature_name': 'RPL27'}\n", + "\n", + "\t >>> F2R1_end_second_in_pair_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for same_pos_dif_reads_test\n", + "\tExpecting: {'contig': 'chr17', 'position': 83199872, 'count': 9, 'coverage': 9, 'conversion': 'C>G', 'strand_conversion': 'C>G', 'strand': '+', 'feature_name': 'AC139099.2'}\n", + "\n", + "\t >>> same_pos_dif_reads_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for tax1bp3_chr17_3665556_read_test\n", + "\tExpecting: {'contig': 'chr17', 'position': 3665556, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'G>A', 'strand': '+'}\n", + "\n", + "\t >>> tax1bp3_chr17_3665556_read_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for only_5_cells_test\n", + "\tExpecting: {'contig': '9', 'barcode': 'GGGACCTTCGAGCCAC-1', 'position': 3000524, 'num_rows': 1, 'count': 1, 'coverage': 12, 'conversion': 'C>A', 'strand_conversion': 'G>T', 'strand': '-'}\n", + "\n", + "\t >>> only_5_cells_test passed! <<<\n", + "\n", + "\tExpecting: {'contig': '9', 'barcode': 'GGGACCTTCGAGCCAC-1', 'position': 3000525, 'num_rows': 1, 'count': 1, 'coverage': 12, 'conversion': 'C>T', 'strand_conversion': 'G>A', 'strand': '-'}\n", + "\n", + "\t >>> only_5_cells_test passed! <<<\n", + "\n", + "\tExpecting: {'contig': '9', 'barcode': 'GATCCCTCAGTAACGG-1', 'position': 3000525, 'num_rows': 1, 'count': 1, 'coverage': 4, 'conversion': 'C>G', 'strand_conversion': 'G>C', 'strand': '-'}\n", + "\n", + "\t >>> only_5_cells_test passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for long_read_sc_test\n", + "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000203816-AACGTGTTGGAGAGGG-16-G', 'position': 115807969, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'A>C', 'strand_conversion': 'T>G', 'strand': '-', 'feature_name': 'Rpl32'}\n", + "\n", + "\t >>> long_read_sc_test passed! <<<\n", + "\n", + "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000081840-AAGTCGTACCAGGCTC-40-C', 'position': 115805653, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'Rpl32'}\n", + "\n", + "\t >>> long_read_sc_test passed! <<<\n", + "\n", + "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000081840-AACGTGTTGGAGAGGG-40-G', 'position': 115807015, 'num_rows': 1, 'count': 1, 'coverage': 8, 'conversion': 'C>T', 'strand_conversion': 'G>A', 'strand': '-', 'feature_name': 'Rpl32'}\n", + "\n", + "\t >>> long_read_sc_test passed! <<<\n", + "\n", + "There were 0 failures\n" + ] + } + ], "source": [ "test_name_to_expectations = {\n", " \"unstranded_pair_test\": {\n",