diff --git a/marine.py b/marine.py index 02ed3da..3fc9952 100755 --- a/marine.py +++ b/marine.py @@ -330,11 +330,11 @@ def bam_reconfig_launcher(contig): #total_seconds_for_bams_df.to_csv("{}/bam_reconfiguration_timing.tsv".format(logging_folder), sep='\t') #pretty_print("Total time to concat and write bams: {} minutes".format(round(total_bam_generation_time/60, 3))) - - for c in contigs: - thread = threading.Thread(target=monitor_event, args=(events[c], c, bam_reconfig_launcher)) - thread.start() - threads.append(thread) + if barcode_tag: + for c in contigs: + thread = threading.Thread(target=monitor_event, args=(events[c], c, bam_reconfig_launcher)) + thread.start() + threads.append(thread) _, results, total_seconds_for_reads_df, total_reads_processed, counts_summary_df = edit_finder( bam_filepath, @@ -352,8 +352,9 @@ def bam_reconfig_launcher(contig): ) # Wait for all threads to complete - for thread in threads: - thread.join() + if barcode_tag: + for thread in threads: + thread.join() total_seconds_for_reads_df.to_csv("{}/edit_finder_timing.tsv".format(logging_folder), sep='\t') diff --git a/tests/integration_tests.ipynb b/tests/integration_tests.ipynb index 69f7e86..42d1249 100644 --- a/tests/integration_tests.ipynb +++ b/tests/integration_tests.ipynb @@ -23,10 +23,22 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "f6012705-9d34-4997-8681-c7bbcc4f008b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pybedtools' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[43mpybedtools\u001b[49m\u001b[38;5;241m.\u001b[39mBedTool()\n", + "\u001b[0;31mNameError\u001b[0m: name 'pybedtools' is not defined" + ] + } + ], "source": [ "b = pybedtools.BedTool()" ] @@ -60,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "ba12c2b8-2ccb-4866-86c8-a2284fd1229f", "metadata": { "scrolled": true @@ -121,7 +133,7 @@ "\tContig Citrine.dna\n", "1 total jobs\n", "Reads processed:\t44\n", - "Time to process reads in min:\t0.02512\n", + "Time to process reads in min:\t0.02141\n", "Read Summary:\n", "edited 25\n", "no_edits 19\n", @@ -140,25 +152,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.47s/it]\n", - "100%|██████████| 1/1 [00:01<00:00, 1.01s/it]\n", - "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:448: SettingWithCopyWarning: \n", + "100%|██████████| 1/1 [00:01<00:00, 1.27s/it]\n", + "100%|██████████| 1/1 [00:01<00:00, 1.31s/it]\n", + "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:481: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " sites_for_conversion['edit_fraction'] = sites_for_conversion['count']/sites_for_conversion['coverage']\n", - "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:449: SettingWithCopyWarning: \n", + "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:482: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " sites_for_conversion['start'] = sites_for_conversion['position'] - 1\n", - "***** WARNING: File /tmp/pybedtools.9on6r3qb.tmp has inconsistent naming convention for record:\n", - "Citrine.dna\t431\t433\tno_barcode_Citrine.dna_432_C_T_+\tC>T\t+\n", + "***** WARNING: File /tmp/pybedtools.mt5ov_jc.tmp has inconsistent naming convention for record:\n", + "Citrine.dna\t437\t439\tno_barcode_Citrine.dna_438_C_A_+\tC>A\t+\n", "\n", - "***** WARNING: File /tmp/pybedtools.9on6r3qb.tmp has inconsistent naming convention for record:\n", - "Citrine.dna\t431\t433\tno_barcode_Citrine.dna_432_C_T_+\tC>T\t+\n", + "***** WARNING: File /tmp/pybedtools.mt5ov_jc.tmp has inconsistent naming convention for record:\n", + "Citrine.dna\t437\t439\tno_barcode_Citrine.dna_438_C_A_+\tC>A\t+\n", "\n" ] }, @@ -171,7 +183,7 @@ "original 37\n", "filtered 37\n", "dtype: int64\n", - "Total time to calculate coverage: 0.017 minutes\n", + "Total time to calculate coverage: 0.022 minutes\n", "Filtering..\n", "\tNumber of edits after filtering:\n", "\t37\n", @@ -187,8 +199,8 @@ "Making bedgraphs for ['CT'] conversions...\n", "\n", "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.567791MB; Peak: 22.676446MB\n", - "Time elapsed: 3.26s\n", + "Current memory usage 21.833447MB; Peak: 22.94241MB\n", + "Time elapsed: 4.85s\n", "-------------------------------\n", "Deleting intermediate files...\n", "-------------------------------\n", @@ -220,259 +232,7 @@ "\tContigs:\tchr17\n", "\tNumber of intervals:\t16\n", "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 26A20G52\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t99\t#16\t43044305\t255\t1S100M\t#16\t43044312\t107\tGTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\tarray('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\t[('MD', '26A20G52'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 26A20G52\n", - "CIGAR string 1S100M\n", - "Reference seq: TGCTACCAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Aligned seq: GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities: array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities before clipping:\n", - " array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['26', '20', '52']\n", - "[26, 47, 100]\n", - "Indicated reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Fixed aligned seq:\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Finalized fixed aligned seq:\n", - " tgctaccaagtttatttgcagtgttaCcagcacaacatttacaaaacAtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Indicated qualities:\n", - " 34343434343434343434263434343434343434342634343434261234343434342626343434343434343434341234341234343434343434123434343434343412343434343412343434343434343434342634342634343426343426343434342634343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['C', 'A']\n", - "ref bases ['A', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['C', 'A'], ref bases: ['A', 'G']\n", - "Getting info: C A 12 27\n", - "Getting info: A G 12 48\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 40G3T55\n", - "CIGAR tag 100M1S\n", - "is_reverse True\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t147\t#16\t43044312\t255\t100M1S\t#16\t43044305\t-107\tAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\tarray('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '40G3T55'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 40G3T55\n", - "CIGAR string 100M1S\n", - "Reference seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Aligned seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 100), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['40', '3', '55']\n", - "[40, 44, 100]\n", - "Indicated reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Fixed aligned seq:\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Finalized fixed aligned seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Indicated qualities:\n", - " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'G']\n", - "ref bases ['G', 'T']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", - "Getting info: A G 34 41\n", - "Getting info: G T 12 45\n", - "chr17:008, total reads: 2, counts_df: chr17\n", - "edited 2\n", - "total_edits 4\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "Reads processed:\t2\n", - "Time to process reads in min:\t0.15093\n", - "Read Summary:\n", - "edited 2\n", - "total_edits 4\n", - "dtype: int64\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.83it/s]\n", - "100%|██████████| 16/16 [00:08<00:00, 1.89it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 4\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.145 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.288258MB; Peak: 22.400914MB\n", - "Time elapsed: 18.62s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "F1R2_pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", + "\tVerbose:\tFalse\n", "\tKeep intermediate files:\tFalse\n", "\tSkip coverage?:\tFalse\n", "\n", @@ -486,172 +246,14 @@ "==================================================================\n", "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", "~~~~~~~~~~~~~~~~~~\n", "Identifying edits\n", "~~~~~~~~~~~~~~~~~~\n", "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", "\tContig chr17\n", "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 26A20G52\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t99\t#16\t43044305\t255\t1S100M\t#16\t43044312\t107\tGTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\tarray('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\t[('MD', '26A20G52'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 26A20G52\n", - "CIGAR string 1S100M\n", - "Reference seq: TGCTACCAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Aligned seq: GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities: array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities before clipping:\n", - " array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['26', '20', '52']\n", - "[26, 47, 100]\n", - "Indicated reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Fixed aligned seq:\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Finalized fixed aligned seq:\n", - " tgctaccaagtttatttgcagtgttaCcagcacaacatttacaaaacAtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Indicated qualities:\n", - " 34343434343434343434263434343434343434342634343434261234343434342626343434343434343434341234341234343434343434123434343434343412343434343412343434343434343434342634342634343426343426343434342634343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['C', 'A']\n", - "ref bases ['A', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['C', 'A'], ref bases: ['A', 'G']\n", - "Getting info: C A 12 27\n", - "Getting info: A G 12 48\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 40G3T55\n", - "CIGAR tag 100M1S\n", - "is_reverse True\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t147\t#16\t43044312\t255\t100M1S\t#16\t43044305\t-107\tAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\tarray('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '40G3T55'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 40G3T55\n", - "CIGAR string 100M1S\n", - "Reference seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Aligned seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 100), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['40', '3', '55']\n", - "[40, 44, 100]\n", - "Indicated reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Fixed aligned seq:\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Finalized fixed aligned seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Indicated qualities:\n", - " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'G']\n", - "ref bases ['G', 'T']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", - "Getting info: A G 34 41\n", - "Getting info: G T 12 45\n", - "chr17:008, total reads: 2, counts_df: chr17\n", - "edited 2\n", - "total_edits 4\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "Reads processed:\t2\n", - "Time to process reads in min:\t0.15098\n", + "Time to process reads in min:\t0.05137\n", "Read Summary:\n", "edited 2\n", "total_edits 4\n", @@ -663,1497 +265,14 @@ "\tsplit chr17_000_0_5203591, 0/16...\n", "\tsplit chr17_010_52035910_57239501, 10/16...\n", "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.83it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044352~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044331~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044356~~~~~~~`\n", - "coverage_at_pos 1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:06<00:00, 2.39it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 4\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.116 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.289436MB; Peak: 22.402017MB\n", - "Time elapsed: 16.87s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "F2R1_end_second_in_pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F2R1_end_second_in_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "Reads processed:\t1\n", - "Time to process reads in min:\t0.15103\n", - "Read Summary:\n", - "edited 1\n", - "total_edits 3\n", - "dtype: int64\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.82it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001715~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001716~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001717~~~~~~~`\n", - "coverage_at_pos 1\n" + "Done concatenating!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.90it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 3\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.146 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "0 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.436974MB; Peak: 22.547439MB\n", - "Time elapsed: 18.59s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "same_pos_dif_reads_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/same_pos_dif_reads.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 55C43\n", - "CIGAR tag 1S97M17144N2M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:2606:39477:40189\n", - "VH01429:22:AACFJ5NHV:2:2606:39477:40189\t163\t#16\t83195127\t255\t1S97M17144N2M\t#16\t83195141\t17258\tTCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\t[('MD', '55C43'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 197), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 55C43\n", - "CIGAR string 1S97M17144N2M\n", - "Reference seq: CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTCCCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Aligned seq: TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 97), (3, 17144), (0, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 97), (3, 17144), (0, 2)]\n", - "Aligned sequence after clipping (if needed):\n", - " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['55', '43']\n", - "[55, 99]\n", - "Indicated reference seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctggga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", - "Fixed aligned seq:\n", - " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGG17144*nGA\n", - "Finalized fixed aligned seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctAccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", - "Indicated qualities:\n", - " 343434343434342626343434343434343434343434263434263434343434343434343434343434263434343434343434343434343434341234341234343426343434343434343434343434343434343434343434343434263434343434343412343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A'], ref bases: ['C']\n", - "Getting info: A C 12 56\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 86C13\n", - "CIGAR tag 1S17M4579N83M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", - "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t163\t#16\t83195207\t255\t1S17M4579N83M\t#16\t83199866\t4757\tGTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '86C13'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 86C13\n", - "CIGAR string 1S17M4579N83M\n", - "Reference seq: TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAAT\n", - "Aligned seq: GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 17), (3, 4579), (0, 83)]\n", - "Aligned sequence before clipping (if needed):\n", - " GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 17), (3, 4579), (0, 83)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['86', '13']\n", - "[86, 100]\n", - "Indicated reference seq:\n", - " tgtggggacagatctggaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", - "Fixed aligned seq:\n", - " TGTGGGGACAGATCTGG4579*nAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Finalized fixed aligned seq:\n", - " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaat\n", - "Indicated qualities:\n", - " 34343434343434343434342634342634343434343434343434343434343434343434343434343412343434343434343434343434343434343434343434343434343434343434341234343434343434343434343426343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 4666\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 95C4\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", - "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t163\t#16\t83199777\t255\t1S100M\t#16\t83199839\t163\tGCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\t[('MD', '95C4'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 95C4\n", - "CIGAR string 1S100M\n", - "Reference seq: CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTG\n", - "Aligned seq: GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['95', '4']\n", - "[95, 100]\n", - "Indicated reference seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", - "Fixed aligned seq:\n", - " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Finalized fixed aligned seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtg\n", - "Indicated qualities:\n", - " 34343434343434343434343434343434123434343434343434343434343434343434343434343434343426343434343434343434343434343434343434342634343434343426343434343434343434343434343434343434343434343426343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 96\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 92C8\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1208:38151:26028\n", - "VH01429:22:AACFJ5NHV:2:1208:38151:26028\t83\t#16\t83199780\t255\t101M\t#16\t83199711\t-170\tTTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\t[('MD', '92C8'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 195)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 92C8\n", - "CIGAR string 101M\n", - "Reference seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGA\n", - "Aligned seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['92', '8']\n", - "[92, 101]\n", - "Indicated reference seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", - "Fixed aligned seq:\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Finalized fixed aligned seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcga\n", - "Indicated qualities:\n", - " 3434343434343434343426343426343434343434343426343434343434343434343434343434343434343434343434343434343434343434343434263434343434343426343434343434343434343434343434343434343434343434343434343426343426\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 93\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 60C37\n", - "CIGAR tag 1S98M2S\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2112:47146:35399\n", - "VH01429:22:AACFJ5NHV:1:2112:47146:35399\t163\t#16\t83199812\t255\t1S98M2S\t#16\t83199946\t232\tTCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '60C37'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 192)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 60C37\n", - "CIGAR string 1S98M2S\n", - "Reference seq: CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Aligned seq: TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 98), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 98)]\n", - "Aligned sequence after clipping (if needed):\n", - " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['60', '37']\n", - "[60, 98]\n", - "Indicated reference seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", - "Fixed aligned seq:\n", - " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Finalized fixed aligned seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgca\n", - "Indicated qualities:\n", - " 3434343434343426343434343434343434343434343434342634343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 61\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 5C50C37G5\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", - "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t163\t#16\t83199816\t255\t1S100M\t#16\t83199822\t105\tTCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\tarray('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\t[('MD', '5C50C37G5'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 187)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 5C50C37G5\n", - "CIGAR string 1S100M\n", - "Reference seq: CAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTT\n", - "Aligned seq: TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities: array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities before clipping:\n", - " array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities after clipping:\n", - " array('B', [26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['5', '50', '37', '5']\n", - "[5, 56, 94, 100]\n", - "Indicated reference seq:\n", - " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", - "Fixed aligned seq:\n", - " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Finalized fixed aligned seq:\n", - " caaacTtgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactt\n", - "Indicated qualities:\n", - " 26343434343434343412342634343412343412341226263434343434341234343434343434343434123434343434343434343434343434343434343434342634343434343412343434343434263434343434343434343434343434343434343434342626\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['T', 'G', 'A']\n", - "ref bases ['C', 'C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['T', 'G', 'A'], ref bases: ['C', 'C', 'G']\n", - "Getting info: T C 34 6\n", - "Getting info: G C 34 57\n", - "Getting info: A G 34 95\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 50C37G10\n", - "CIGAR tag 1S99M1S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", - "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t83\t#16\t83199822\t255\t1S99M1S\t#16\t83199816\t-105\tTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '50C37G10'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 187)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 50C37G10\n", - "CIGAR string 1S99M1S\n", - "Reference seq: TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTG\n", - "Aligned seq: TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 99), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 99)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['50', '37', '10']\n", - "[50, 88, 99]\n", - "Indicated reference seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", - "Fixed aligned seq:\n", - " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", - "Finalized fixed aligned seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctg\n", - "Indicated qualities:\n", - " 343434343434341234123434343434343434343434343434343434343412343426263434121234343434343434343434343434343412342634342634341234123434343434341234343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 51\n", - "Getting info: A G 34 89\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 48C37G14\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:1403:49513:32843\n", - "VH01429:22:AACFJ5NHV:1:1403:49513:32843\t83\t#16\t83199824\t255\t101M\t#16\t83199748\t-177\tACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '48C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 194)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 48C37G14\n", - "CIGAR string 101M\n", - "Reference seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", - "Aligned seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['48', '37', '14']\n", - "[48, 86, 101]\n", - "Indicated reference seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "Fixed aligned seq:\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Finalized fixed aligned seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", - "Indicated qualities:\n", - " 3434343434343434343434343434341234123434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 49\n", - "Getting info: A G 34 87\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 47C37G14\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", - "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t163\t#16\t83199825\t255\t1S100M\t#16\t83199839\t115\tTCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '47C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 47C37G14\n", - "CIGAR string 1S100M\n", - "Reference seq: CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", - "Aligned seq: TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities: array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities before clipping:\n", - " array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities after clipping:\n", - " array('B', [12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['47', '37', '14']\n", - "[47, 85, 100]\n", - "Indicated reference seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "Fixed aligned seq:\n", - " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Finalized fixed aligned seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", - "Indicated qualities:\n", - " 12343434123434263434343412343434263434343434343434343434343434123434343426343426343434343434343434341226343434343434343434263434263434263412341226123434261234343434263434263426343434341234343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 48\n", - "Getting info: A G 26 86\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 2T9T0T26G4C37G1A11\n", - "CIGAR tag 3S97M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", - "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t163\t#16\t83199827\t255\t3S97M\t#16\t83199827\t100\tCCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\tarray('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '2T9T0T26G4C37G1A11'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 7), ('AS', 177)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 2T9T0T26G4C37G1A11\n", - "CIGAR string 3S97M\n", - "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAA\n", - "Aligned seq: CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities: array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 3), (0, 97)]\n", - "Aligned sequence before clipping (if needed):\n", - " CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities before clipping:\n", - " array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 97)]\n", - "Aligned sequence after clipping (if needed):\n", - " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities after clipping:\n", - " array('B', [12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['2', '9', '0', '26', '4', '37', '1', '11']\n", - "[2, 12, 13, 40, 45, 83, 85, 97]\n", - "Indicated reference seq:\n", - " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", - "Fixed aligned seq:\n", - " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Finalized fixed aligned seq:\n", - " caAatttccaacAAtattaatttttgcaagacttcggaggCtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtTctttcctgtaa\n", - "Indicated qualities:\n", - " 12121234123434341234341212123434341212341212341212343434342634341226341234263412123434343434121226263434343434342634341234122634343434343434343412343434343434343434343434123434342634343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'A', 'A', 'C', 'G', 'A', 'T']\n", - "ref bases ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'A', 'A', 'C', 'G', 'A', 'T'], ref bases: ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", - "Getting info: A T 12 3\n", - "Getting info: A T 12 13\n", - "Getting info: A T 12 14\n", - "Getting info: C G 12 41\n", - "Getting info: G C 34 46\n", - "Getting info: A G 34 84\n", - "Getting info: T A 12 86\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 45C37G16\n", - "CIGAR tag 100M1S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", - "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t83\t#16\t83199827\t255\t100M1S\t#16\t83199827\t-100\tCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\tarray('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\t[('MD', '45C37G16'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 177)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 45C37G16\n", - "CIGAR string 100M1S\n", - "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAAC\n", - "Aligned seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", - "Qualities: array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 100), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", - "Qualities before clipping:\n", - " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", - "Qualities after clipping:\n", - " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['45', '37', '16']\n", - "[45, 83, 100]\n", - "Indicated reference seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", - "Fixed aligned seq:\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", - "Finalized fixed aligned seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaac\n", - "Indicated qualities:\n", - " 34261234343434263434342634343434342634343412123434343426343434343434343434343434343434341226343434343434343434343434343426342612343434343434343434341234343434123434342612343434343434343434343412343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 26 46\n", - "Getting info: A G 26 84\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 40C37G20\n", - "CIGAR tag 99M2S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1104:58090:46985\n", - "VH01429:22:AACFJ5NHV:2:1104:58090:46985\t83\t#16\t83199832\t255\t99M2S\t#16\t83195148\t-4783\tTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\t[('MD', '40C37G20'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 192), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 40C37G20\n", - "CIGAR string 99M2S\n", - "Reference seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGAC\n", - "Aligned seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 99), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 99)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['40', '37', '20']\n", - "[40, 78, 99]\n", - "Indicated reference seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", - "Fixed aligned seq:\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", - "Finalized fixed aligned seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgac\n", - "Indicated qualities:\n", - " 343434343434343434263412263434123434343434341234342634343434343434343434343434343434343434343434342634343434343434263434343434343434343434262634343434343434342626123434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 41\n", - "Getting info: A G 34 79\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 33C37G19C9\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", - "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t83\t#16\t83199839\t255\t101M\t#16\t83199777\t-163\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '33C37G19C9'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 33C37G19C9\n", - "CIGAR string 101M\n", - "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['33', '37', '19', '9']\n", - "[33, 71, 91, 101]\n", - "Indicated reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", - "Fixed aligned seq:\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Finalized fixed aligned seq:\n", - " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgaTtaaaggaga\n", - "Indicated qualities:\n", - " 3434343426343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343412343434343434343434343412\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A', 'T']\n", - "ref bases ['C', 'G', 'C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A', 'T'], ref bases: ['C', 'G', 'C']\n", - "Getting info: G C 34 34\n", - "Getting info: A G 34 72\n", - "Getting info: T C 34 92\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 33C37G29\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", - "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t83\t#16\t83199839\t255\t101M\t#16\t83199825\t-115\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\t[('MD', '33C37G29'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 33C37G29\n", - "CIGAR string 101M\n", - "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['33', '37', '29']\n", - "[33, 71, 101]\n", - "Indicated reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", - "Fixed aligned seq:\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Finalized fixed aligned seq:\n", - " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggaga\n", - "Indicated qualities:\n", - " 3434343426343426263434343434343434343434343426343434343434343412343434343434263412343434343434343434343434343434342634341234343434343434343434342626343434342634343434342634343434343434343434341234343426\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 34\n", - "Getting info: A G 34 72\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 6C37G53\n", - "CIGAR tag 98M2S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", - "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t83\t#16\t83199866\t255\t98M2S\t#16\t83195207\t-4757\tGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\t[('MD', '6C37G53'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 6C37G53\n", - "CIGAR string 98M2S\n", - "Reference seq: GGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Aligned seq: GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 98), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 98)]\n", - "Aligned sequence after clipping (if needed):\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['6', '37', '53']\n", - "[6, 44, 98]\n", - "Indicated reference seq:\n", - " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "Fixed aligned seq:\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Finalized fixed aligned seq:\n", - " ggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "Indicated qualities:\n", - " 3434343434343434343434343434343434343426343434343434343434343434343434343434343434343434343434343434263434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 7\n", - "Getting info: A G 34 45\n", - "chr17:015, total reads: 16, counts_df: chr17\n", - "edited 15\n", - "no_edits 1\n", - "total_edits 32\n", - "Reads processed:\t16\n", - "Time to process reads in min:\t0.15431\n", - "Read Summary:\n", - "edited 15\n", - "no_edits 1\n", - "total_edits 32\n", - "dtype: int64\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.79it/s]\n", - "100%|██████████| 16/16 [00:08<00:00, 1.82it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 32\n", - "filtered 24\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.151 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t24\n", - "\tNumber of unique edit sites:\n", - "\t10\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "10 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "2 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.436933MB; Peak: 22.546578MB\n", - "Time elapsed: 19.22s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "tax1bp3_chr17_3665556_read_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/tax1bp3_chr17_3665556_read.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 2C1C6G3A1G4G47\n", - "CIGAR tag 28S70M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:2414:21053:15521\n", - "VH01429:22:AACFJ5NHV:2:2414:21053:15521\t83\t#16\t3665539\t255\t28S70M\t#16\t3665452\t-157\tCACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\tarray('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '2C1C6G3A1G4G47'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 6), ('AS', 151)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 2C1C6G3A1G4G47\n", - "CIGAR string 28S70M\n", - "Reference seq: AACGCGTGAAGGAAAATGATCAGAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Aligned seq: CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Qualities: array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 28), (0, 70)]\n", - "Aligned sequence before clipping (if needed):\n", - " CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Qualities before clipping:\n", - " array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 70)]\n", - "Aligned sequence after clipping (if needed):\n", - " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Qualities after clipping:\n", - " array('B', [34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['2', '1', '6', '3', '1', '4', '47']\n", - "[2, 4, 11, 15, 17, 22, 70]\n", - "Indicated reference seq:\n", - " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", - "Fixed aligned seq:\n", - " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Finalized fixed aligned seq:\n", - " aaGgTgtgaagTaaaTtAatcaTaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", - "Indicated qualities:\n", - " 34261226341234123412263426123412341212121212123434343412123434341234343434343434343434343434343434343434343434343434343426263434343434343412\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'T', 'T', 'T', 'A', 'T']\n", - "ref bases ['C', 'C', 'G', 'A', 'G', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'T', 'T', 'T', 'A', 'T'], ref bases: ['C', 'C', 'G', 'A', 'G', 'G']\n", - "Getting info: G C 12 3\n", - "Getting info: T C 34 5\n", - "Getting info: T G 34 12\n", - "Getting info: T A 12 16\n", - "Getting info: A G 12 18\n", - "Getting info: T G 12 23\n", - "chr17:000, total reads: 1, counts_df: chr17\n", - "edited 1\n", - "total_edits 6\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "Reads processed:\t1\n", - "Time to process reads in min:\t0.1499\n", - "Read Summary:\n", - "edited 1\n", - "total_edits 6\n", - "dtype: int64\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.83it/s]\n", - "100%|██████████| 16/16 [00:07<00:00, 2.09it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 6\n", - "filtered 6\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.132 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t6\n", - "\tNumber of unique edit sites:\n", - "\t6\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "6 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.405846MB; Peak: 22.51619MB\n", - "Time elapsed: 17.81s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/pair_example_18_49488551_49590000.sorted.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\t18\n", - "\tNumber of intervals:\t1\n", - "\tCores:\t1\n", - "\tVerbose:\tFalse\n", - "\tKeep intermediate files:\tTrue\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 1 subsets...\n", - "\tContig 18\n", - "1 total jobs\n", - "Reads processed:\t10\n", - "Time to process reads in min:\t0.0216\n", - "Read Summary:\n", - "edited 6\n", - "no_edits 2\n", - "secondary 2\n", - "total_edits 6\n", - "dtype: int64\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit 18_000_0_80373285, 0/1...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.26s/it]\n", - "100%|██████████| 1/1 [00:01<00:00, 1.19s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 6\n", - "filtered 4\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.02 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t4\n", - "\tNumber of unique edit sites:\n", - "\t2\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "2 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "2 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed...\n", - "Current memory usage 21.319782MB; Peak: 22.42987MB\n", - "Time elapsed: 3.36s\n", - "++++++\n", - "Done!\n", - "++++++\n" + "100%|██████████| 16/16 [00:02<00:00, 5.37it/s]\n" ] } ], @@ -2186,264 +305,12 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "57192b28-b498-4f81-bdcd-0d4d14f644ff", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tscc/projects/ps-yeolab3/ekofman/sailor2/tests\n", - "Running\n", - "/tscc/projects/ps-yeolab3/ekofman/sailor2\n", - "only_5_cells_test\n", - "Removing old files...\n", - "Running old tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/bams/9_3000526_only_5_cells.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/only_5_cells_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tCB\n", - "\tPaired End:\tFalse\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tFalse\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\t9\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tFalse\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/only_5_cells_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 16 subsets...\n", - "\tContig 9\n", - "16 total jobs\n", - "Reads processed:\t31\n", - "Time to process reads in min:\t0.15101\n", - "Read Summary:\n", - "edited 31\n", - "total_edits 99\n", - "dtype: int64\n", - "\n", - "Contigs processed\n", - "\n", - "\t['9']\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Splitting and reconfiguring BAMs to optimize coverage calculations\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Contig 9 has 0/4 bams generated\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:08<00:00, 1.82it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total time to concat and write bams: 0.141 minutes\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit 9_000_0_7787195, 0/16...\n", - "\tsplit 9_010_77871950_85659145, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:08<00:00, 8.23s/it]\n", - "100%|██████████| 4/4 [00:07<00:00, 2.00s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 99\n", - "filtered 99\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.138 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t99\n", - "\tNumber of unique edit sites:\n", - "\t73\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", - "Current memory usage 21.632608MB; Peak: 22.739356MB\n", - "Time elapsed: 26.69s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "long_read_sc_test\n", - "Removing old files...\n", - "Running old tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/LR_single_cell.md.subset.filtered.sorted.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/long_read_sc_test\n", - "\tBarcode whitelist:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/sc_lr_barcodes.tsv.gz\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tIB\n", - "\tPaired End:\tFalse\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tFalse\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\t6\n", - "\tNumber of intervals:\t4\n", - "\tCores:\t16\n", - "\tVerbose:\tFalse\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/long_read_sc_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "Barcodes in whitelist: 3\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "Each contig is being split into 4 subsets...\n", - "\tContig 6\n", - "4 total jobs\n", - "Reads processed:\t13\n", - "Time to process reads in min:\t0.14743\n", - "Read Summary:\n", - "edited 3\n", - "no_edits 10\n", - "total_edits 3\n", - "dtype: int64\n", - "\n", - "Contigs processed\n", - "\n", - "\t['6']\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Splitting and reconfiguring BAMs to optimize coverage calculations\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Contig 6 has 0/4 bams generated\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:08<00:00, 2.14s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\t400 suffixes\n", - "Total time to concat and write bams: 0.337 minutes\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit 6_000_0_37434137, 0/4...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:19<00:00, 19.88s/it]\n", - "100%|██████████| 400/400 [00:10<00:00, 36.66it/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 3\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.187 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", - "Current memory usage 21.511415MB; Peak: 22.623014MB\n", - "Time elapsed: 41.90s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "\n", @@ -2480,101 +347,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for unstranded_pair_test\n", - "\tExpecting: {'contig': 'Citrine.dna', 'position': 435, 'count': 22, 'coverage': 22, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '+'}\n", - "\n", - "\t >>> unstranded_pair_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for pair_test\n", - "\tExpecting: {'contig': '18', 'position': 49491556, 'count': 2, 'coverage': 2, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'RPL17-C18orf32,RPL17', 'feature_strand': '-,-'}\n", - "\n", - "\t >>> pair_test passed! <<<\n", - "\n", - "\tExpecting: {'contig': '18', 'position': 49567494, 'count': 2, 'coverage': 2, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '+', 'feature_name': 'LIPG', 'feature_strand': '+'}\n", - "\n", - "\t >>> pair_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for F1R2_pair_test\n", - "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", - "\n", - "\t >>> F1R2_pair_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for F1R2_pair_test-single_end_mode\n", - "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 2, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", - "\n", - "\t >>> F1R2_pair_test-single_end_mode passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for F2R1_end_second_in_pair_test\n", - "\tExpecting: {'contig': 'chr17', 'position': 43001716, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'G>A', 'strand': '+', 'feature_name': 'RPL27'}\n", - "\n", - "\t >>> F2R1_end_second_in_pair_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for same_pos_dif_reads_test\n", - "\tExpecting: {'contig': 'chr17', 'position': 83199872, 'count': 9, 'coverage': 9, 'conversion': 'C>G', 'strand_conversion': 'C>G', 'strand': '+', 'feature_name': 'AC139099.2'}\n", - "\n", - "\t >>> same_pos_dif_reads_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for tax1bp3_chr17_3665556_read_test\n", - "\tExpecting: {'contig': 'chr17', 'position': 3665556, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'G>A', 'strand': '+'}\n", - "\n", - "\t >>> tax1bp3_chr17_3665556_read_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for only_5_cells_test\n", - "\tExpecting: {'contig': '9', 'barcode': 'GGGACCTTCGAGCCAC-1', 'position': 3000524, 'num_rows': 1, 'count': 1, 'coverage': 12, 'conversion': 'C>A', 'strand_conversion': 'G>T', 'strand': '-'}\n", - "\n", - "\t >>> only_5_cells_test passed! <<<\n", - "\n", - "\tExpecting: {'contig': '9', 'barcode': 'GGGACCTTCGAGCCAC-1', 'position': 3000525, 'num_rows': 1, 'count': 1, 'coverage': 12, 'conversion': 'C>T', 'strand_conversion': 'G>A', 'strand': '-'}\n", - "\n", - "\t >>> only_5_cells_test passed! <<<\n", - "\n", - "\tExpecting: {'contig': '9', 'barcode': 'GATCCCTCAGTAACGG-1', 'position': 3000525, 'num_rows': 1, 'count': 1, 'coverage': 4, 'conversion': 'C>G', 'strand_conversion': 'G>C', 'strand': '-'}\n", - "\n", - "\t >>> only_5_cells_test passed! <<<\n", - "\n", - "\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Checking results for long_read_sc_test\n", - "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000203816-AACGTGTTGGAGAGGG-16-G', 'position': 115807969, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'A>C', 'strand_conversion': 'T>G', 'strand': '-', 'feature_name': 'Rpl32'}\n", - "\n", - "\t >>> long_read_sc_test passed! <<<\n", - "\n", - "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000081840-AAGTCGTACCAGGCTC-40-C', 'position': 115805653, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'G>A', 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'Rpl32'}\n", - "\n", - "\t >>> long_read_sc_test passed! <<<\n", - "\n", - "\tExpecting: {'contig': '6', 'barcode': 'ENSMUST00000081840-AACGTGTTGGAGAGGG-40-G', 'position': 115807015, 'num_rows': 1, 'count': 1, 'coverage': 8, 'conversion': 'C>T', 'strand_conversion': 'G>A', 'strand': '-', 'feature_name': 'Rpl32'}\n", - "\n", - "\t >>> long_read_sc_test passed! <<<\n", - "\n", - "There were 0 failures\n" - ] - } - ], + "outputs": [], "source": [ "test_name_to_expectations = {\n", " \"unstranded_pair_test\": {\n", diff --git a/tests/integration_tests_auto_check.py b/tests/integration_tests_auto_check.py deleted file mode 100755 index 8c49e6a..0000000 --- a/tests/integration_tests_auto_check.py +++ /dev/null @@ -1,249 +0,0 @@ -import pandas as pd -import sys - -test_name_to_expectations = { - "unstranded_pair_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "Citrine.dna", - "position": 435, - "count": 22, - "coverage": 22, - "num_rows": 1, - "strand_conversion": "C>T", - "strand": "+" - }] - }, - "pair_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "18", - "position": 49491556, - "count": 2, - "coverage": 2, - "num_rows": 1, - "strand_conversion": "C>T", - "strand": "-", - "feature_name": "RPL17-C18orf32,RPL17", - "feature_strand": "-,-" - }, - { - "contig": "18", - "position": 49567494, - "count": 2, - "coverage": 2, - "num_rows": 1, - "strand_conversion": "C>T", - "strand": "+", - "feature_name": "LIPG", - "feature_strand": "+" - }] - }, - - "F1R2_pair_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "chr17", - "position": 43044352, - "count": 1, - "coverage": 1, - "conversion": "G>A", - "num_rows": 1, - "conversion": "G>A", - "strand_conversion": "C>T", - "strand": "-", - "feature_name": "BRCA1", - "feature_strand": "-" - }] - }, - - "F1R2_pair_test-single_end_mode": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "chr17", - "position": 43044352, - "count": 1, - "coverage": 2, - "conversion": "G>A", - "num_rows": 1, - "conversion": "G>A", - "strand_conversion": "C>T", - "strand": "-", - "feature_name": "BRCA1", - "feature_strand": "-" - }] - }, - - "F2R1_end_second_in_pair_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "chr17", - "position": 43001716, - "count": 1, - "coverage": 1, - "conversion": "G>A", - "strand_conversion": "G>A", - "strand": "+", - "feature_name": "RPL27" - }] - }, - "same_pos_dif_reads_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "chr17", - "position": 83199872, - "count": 9, - "coverage": 9, - "conversion": "C>G", - "strand_conversion": "C>G", - "strand": "+", - "feature_name": "AC139099.2" - }] - }, - "tax1bp3_chr17_3665556_read_test": { - "folder": "strandedness_tests", - "expectations": [{ - "contig": "chr17", - "position": 3665556, - "num_rows": 1, - "count": 1, - "coverage": 1, - "conversion": "G>A", - "strand_conversion": "G>A", - "strand": "+", - #"feature_name": "AC139099.2" - }] - }, - - "only_5_cells_test": { - "folder": "singlecell_tests", - "expectations": [{ - "contig": "9", - "barcode": "GGGACCTTCGAGCCAC-1", - "position": 3000524, - "num_rows": 1, - "count": 1, - "coverage": 12, - "conversion": "C>A", - "strand_conversion": "G>T", - "strand": "-" - }, - { - "contig": "9", - "barcode": "GGGACCTTCGAGCCAC-1", - "position": 3000525, - "num_rows": 1, - "count": 1, - "coverage": 12, - "conversion": "C>T", - "strand_conversion": "G>A", - "strand": "-" - }, - { - "contig": "9", - "barcode": "GATCCCTCAGTAACGG-1", - "position": 3000525, - "num_rows": 1, - "count": 1, - "coverage": 4, - "conversion": "C>G", - "strand_conversion": "G>C", - "strand": "-" - }] - }, - "long_read_sc_test": { - "folder": "singlecell_tests", - "expectations": [{ - "contig": "6", - "barcode": "ENSMUST00000203816-AACGTGTTGGAGAGGG-16-G", - "position": 115807969, - "num_rows": 1, - "count": 1, - "coverage": 1, - "conversion": "A>C", - "strand_conversion": "T>G", - "strand": "-", - "feature_name": "Rpl32" - }, - { - "contig": "6", - "barcode": "ENSMUST00000081840-AAGTCGTACCAGGCTC-40-C", - "position": 115805653, - "num_rows": 1, - "count": 1, - "coverage": 1, - "conversion": "G>A", - "strand_conversion": "C>T", - "strand": "-", - "feature_name": "Rpl32" - }, - { - "contig": "6", - "barcode": "ENSMUST00000081840-AACGTGTTGGAGAGGG-40-G", - "position": 115807015, - "num_rows": 1, - "count": 1, - "coverage": 8, - "conversion": "C>T", - "strand_conversion": "G>A", - "strand": "-", - "feature_name": "Rpl32" - }] - } - -} - -failures = 0 -for test_name, info in test_name_to_expectations.items(): - print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nChecking results for {}".format(test_name)) - - expectations_list = info.get("expectations") - for expectations in expectations_list: - print("\tExpecting: {}".format(expectations)) - - - folder = info.get("folder") - - contig = expectations.get("contig") - barcode = expectations.get("barcode", None) - - position = expectations.get("position") - - final_filtered_site_info_annotated = "{}/{}/final_filtered_site_info_annotated.tsv".format(folder, test_name) - final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\t', index_col=0) - - row_of_interest = final_filtered_site_info_annotated_df[ - (final_filtered_site_info_annotated_df['position'] == position) &\ - (final_filtered_site_info_annotated_df['contig'].astype(str) == contig) - ] - - - if barcode: - row_of_interest = row_of_interest[row_of_interest.barcode == barcode] - - failure = False - try: - assert(len(row_of_interest) == expectations.get("num_rows", 1)) - except Exception as e: - print("Num rows expected: {}, was {}".format(expectations.get("num_rows", 1), len(row_of_interest))) - failure = True - - for attribute in list(expectations.keys()): - if attribute in ['count', 'coverage', 'conversion', 'strand', 'feature_name']: - attribute_expectation = expectations.get(attribute) - try: - assert(row_of_interest[attribute].iloc[0] == attribute_expectation) - except Exception as e: - print("Exception: {} was {}".format(attribute, row_of_interest[attribute].iloc[0])) - failure = True - if not failure: - print("\n\t >>> {} passed! <<<\n".format(test_name)) - else: - print("\n\t ~~~ {} FAILED! ~~~\n".format(test_name)) - failures += 1 - -print("There were {} failures".format(failures)) -if failures > 0: - sys.exit(1) -else: - sys.exit(0) \ No newline at end of file diff --git a/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh b/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh index d743985..9f2a11f 100644 --- a/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh +++ b/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh @@ -17,5 +17,4 @@ $MARINE/tests/strandedness_tests/F1R2_pair_test-single_end_mode \ --strandedness 2 \ --contigs "chr17" \ --sailor \ ---verbose \ --num_intervals_per_contig 16 \ No newline at end of file diff --git a/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv b/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv index 6fc92f0..fec8513 100644 --- a/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv +++ b/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv @@ -1,15 +1,15 @@ site_id barcode contig position ref alt strand count coverage conversion strand_conversion -no_barcode_Citrine.dna_432_C_T_+ no_barcode Citrine.dna 432 C T + 1 22 C>T C>T -no_barcode_Citrine.dna_437_A_G_+ no_barcode Citrine.dna 437 A G + 1 22 A>G A>G no_barcode_Citrine.dna_438_C_A_+ no_barcode Citrine.dna 438 C A + 1 22 C>A C>A -no_barcode_Citrine.dna_439_A_G_+ no_barcode Citrine.dna 439 A G + 1 22 A>G A>G -no_barcode_Citrine.dna_428_A_G_+ no_barcode Citrine.dna 428 A G + 1 20 A>G A>G -no_barcode_Citrine.dna_454_T_G_+ no_barcode Citrine.dna 454 T G + 1 14 T>G T>G -no_barcode_Citrine.dna_411_C_A_+ no_barcode Citrine.dna 411 C A + 1 18 C>A C>A -no_barcode_Citrine.dna_441_C_A_+ no_barcode Citrine.dna 441 C A + 1 22 C>A C>A -no_barcode_Citrine.dna_414_G_C_+ no_barcode Citrine.dna 414 G C + 1 18 G>C G>C no_barcode_Citrine.dna_431_A_G_+ no_barcode Citrine.dna 431 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_437_A_G_+ no_barcode Citrine.dna 437 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_432_C_T_+ no_barcode Citrine.dna 432 C T + 1 22 C>T C>T +no_barcode_Citrine.dna_21_G_A_+ no_barcode Citrine.dna 21 G A + 2 2 G>A G>A no_barcode_Citrine.dna_430_T_G_+ no_barcode Citrine.dna 430 T G + 2 22 T>G T>G no_barcode_Citrine.dna_435_C_T_+ no_barcode Citrine.dna 435 C T + 22 22 C>T C>T -no_barcode_Citrine.dna_21_G_A_+ no_barcode Citrine.dna 21 G A + 2 2 G>A G>A +no_barcode_Citrine.dna_441_C_A_+ no_barcode Citrine.dna 441 C A + 1 22 C>A C>A +no_barcode_Citrine.dna_414_G_C_+ no_barcode Citrine.dna 414 G C + 1 18 G>C G>C +no_barcode_Citrine.dna_439_A_G_+ no_barcode Citrine.dna 439 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_411_C_A_+ no_barcode Citrine.dna 411 C A + 1 18 C>A C>A no_barcode_Citrine.dna_149_C_G_+ no_barcode Citrine.dna 149 C G + 1 2 C>G C>G +no_barcode_Citrine.dna_428_A_G_+ no_barcode Citrine.dna 428 A G + 1 20 A>G A>G +no_barcode_Citrine.dna_454_T_G_+ no_barcode Citrine.dna 454 T G + 1 14 T>G T>G