From f3423c7dd31b42c85111ed4f24b95483d4ff9f1c Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Wed, 21 Aug 2024 13:16:01 -0700 Subject: [PATCH] default CT if empty sailor flag provided --- marine.py | 2 +- tests/integration_tests.ipynb | 2155 +++---------------------- tests/integration_tests_auto_check.py | 17 + tests/integration_tests_run.sh | 2 +- 4 files changed, 237 insertions(+), 1939 deletions(-) diff --git a/marine.py b/marine.py index 5b992e6..9ebfd3e 100755 --- a/marine.py +++ b/marine.py @@ -572,7 +572,7 @@ def run(bam_filepath, annotation_bedfile_path, output_folder, contigs=[], num_in parser.add_argument('--contigs', type=str, default='all') parser.add_argument('--min_read_quality', type=int, default=0, help='Minimum read quality, default is 0... every aligner assigns mapq scores differently, so double-check the range of qualities in your sample before setting this filter') - parser.add_argument('--sailor', type=str, default=None, dest='sailor') + parser.add_argument('--sailor', type=str, nargs='?', const='CT', default=None, dest='sailor') parser.add_argument('--bedgraphs', type=str, default=None, help='Conversions for which to output a bedgraph for non-single cell runs, e.g. CT, AI') parser.add_argument('--verbose', dest='verbose', action='store_true') diff --git a/tests/integration_tests.ipynb b/tests/integration_tests.ipynb index 52cd28e..2b78545 100644 --- a/tests/integration_tests.ipynb +++ b/tests/integration_tests.ipynb @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "id": "ba12c2b8-2ccb-4866-86c8-a2284fd1229f", "metadata": { "scrolled": true @@ -73,131 +73,7 @@ "/tscc/projects/ps-yeolab3/ekofman/sailor2/tests\n", "Running\n", "/tscc/projects/ps-yeolab3/ekofman/sailor2\n", - "unstranded_pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/citrine435.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/unstranded_pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t0\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t['CT']\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tall\n", - "\tNumber of intervals:\t1\n", - "\tCores:\t1\n", - "\tVerbose:\tFalse\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/unstranded_pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "broken_up_contigs [[]]\n", - "Each contig is being split into 1 subsets...\n", - "\tContig Citrine.dna\n", - "1 total jobs\n", - "Reads processed:\t44\n", - "Time to process reads in min:\t0.02895\n", - "Read Summary:\n", - "edited 25\n", - "no_edits 19\n", - "total_edits 37\n", - "dtype: int64\n", - "\n", - "Deleting overall_label_to_list_of_contents...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit Citrine.dna_000_0_741, 0/1...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.70s/it]\n", - "100%|██████████| 1/1 [00:01<00:00, 1.74s/it]\n", - "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:483: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " sites_for_conversion['edit_fraction'] = sites_for_conversion['count']/sites_for_conversion['coverage']\n", - "/tscc/projects/ps-yeolab3/ekofman/sailor2//marine.py:484: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " sites_for_conversion['start'] = sites_for_conversion['position'] - 1\n", - "***** WARNING: File /tmp/pybedtools.96t907wz.tmp has inconsistent naming convention for record:\n", - "Citrine.dna\t436\t438\tno_barcode_Citrine.dna_437_A_G_+\tA>G\t+\n", - "\n", - "***** WARNING: File /tmp/pybedtools.96t907wz.tmp has inconsistent naming convention for record:\n", - "Citrine.dna\t436\t438\tno_barcode_Citrine.dna_437_A_G_+\tA>G\t+\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 37\n", - "filtered 37\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.029 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t37\n", - "\tNumber of unique edit sites:\n", - "\t14\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "14 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "2 final deduplicated SAILOR-formatted sites\n", - "Making bedgraphs for ['CT'] conversions...\n", - "\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.566707MB; Peak: 22.675407MB\n", - "Time elapsed: 4.40s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "F1R2_pair_test-single_end_mode\n", + "F1R2_pair_test-single_end_mode_sailor\n", "Removing old files...\n", "Running tests...\n", "Python is /tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", @@ -205,7 +81,7 @@ "Arguments:\n", "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test-single_end_mode\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test-single_end_mode_sailor\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -213,7 +89,7 @@ "\tCoverage only:\tFalse\n", "\tFiltering only:\tFalse\n", "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", + "\tSailor outputs:\t['CT', 'AG']\n", "\tBedgraphs:\t[]\n", "\tMinimum base quality:\t0\n", "\tMinimum read quality:\t0\n", @@ -225,6 +101,8 @@ "\tVerbose:\tTrue\n", "\tKeep intermediate files:\tFalse\n", "\tSkip coverage?:\tFalse\n", + "\tFor single-cell: \t6 contigs at at time\n", + "\n", "\n", ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", @@ -239,10 +117,7 @@ "~~~~~~~~~~~~~~~~~~\n", "Identifying edits\n", "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['chr17']]\n", + "Contig groups to be processed: [['chr17']]\n", "Each contig is being split into 16 subsets...\n", "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", "\tContig chr17\n", @@ -256,7 +131,7 @@ "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", @@ -268,9 +143,6 @@ "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "MD tag 26A20G52\n", "CIGAR tag 1S100M\n", @@ -369,40 +241,43 @@ "Finalized fixed aligned seq:\n", " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", "Indicated qualities:\n", - " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'G']\n", - "ref bases ['G', 'T']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + " chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['A', 'G']\n", + "ref bases ['G', 'T']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", + "Getting info: A G 34 41\n", + "Getting info: G T 12 45\n", + "chr17:008, total reads: 2, counts_df: chr17\n", + "edited 2\n", + "total_edits 4\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "Getting info: A G 34 41\n", - "Getting info: G T 12 45\n", - "chr17:008, total reads: 2, counts_df: chr17\n", - "edited 2\n", - "total_edits 4\n", "Reads processed:\t2\n", - "Time to process reads in min:\t0.03892\n", + "Time to process reads in min:\t0.09623\n", "Read Summary:\n", "edited 2\n", "total_edits 4\n", @@ -412,1579 +287,40 @@ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Calculating coverage at edited sites, minimum read quality is 0...\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 7.17it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!SINGLE END!!!!!\n", - "~~~~~~~`\n", - "~~~~~~\n", - "!!!!SINGLE END!!!!!\n", - "~~~~~~~`\n", - "~~~~~~\n", - "!!!!SINGLE END!!!!!\n", - "~~~~~~~`\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 7.04it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 4\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.039 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.284636MB; Peak: 22.397353MB\n", - "Time elapsed: 5.56s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "F1R2_pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['chr17']]\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 26A20G52\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t99\t#16\t43044305\t255\t1S100M\t#16\t43044312\t107\tGTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\tarray('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\t[('MD', '26A20G52'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 26A20G52\n", - "CIGAR string 1S100M\n", - "Reference seq: TGCTACCAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Aligned seq: GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities: array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities before clipping:\n", - " array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['26', '20', '52']\n", - "[26, 47, 100]\n", - "Indicated reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Fixed aligned seq:\n", - " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", - "Finalized fixed aligned seq:\n", - " tgctaccaagtttatttgcagtgttaCcagcacaacatttacaaaacAtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", - "Indicated qualities:\n", - " 34343434343434343434263434343434343434342634343434261234343434342626343434343434343434341234341234343434343434123434343434343412343434343412343434343434343434342634342634343426343426343434342634343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['C', 'A']\n", - "ref bases ['A', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['C', 'A'], ref bases: ['A', 'G']\n", - "Getting info: C A 12 27\n", - "Getting info: A G 12 48\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 40G3T55\n", - "CIGAR tag 100M1S\n", - "is_reverse True\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", - "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t147\t#16\t43044312\t255\t100M1S\t#16\t43044305\t-107\tAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\tarray('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '40G3T55'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", - "reverse_or_forward: -\n", - "----------------------------\n", - "MD tag: 40G3T55\n", - "CIGAR string 100M1S\n", - "Reference seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Aligned seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 100), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['40', '3', '55']\n", - "[40, 44, 100]\n", - "Indicated reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Fixed aligned seq:\n", - " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", - "Finalized fixed aligned seq:\n", - " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", - "Indicated qualities:\n", - " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'G']\n", - "ref bases ['G', 'T']\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "Reads processed:\t2\n", - "Time to process reads in min:\t0.04157\n", - "Read Summary:\n", - "edited 2\n", - "total_edits 4\n", - "dtype: int64\n", - "\n", - "Deleting overall_label_to_list_of_contents...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.72it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044352~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044331~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43044356~~~~~~~`\n", - "coverage_at_pos 1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.73it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 4\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.042 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.285267MB; Peak: 22.397912MB\n", - "Time elapsed: 5.93s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "F2R1_end_second_in_pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F2R1_end_second_in_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['chr17']]\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 10A0G0A84\n", - "CIGAR tag 13M198556N84M1S\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:1112:41030:25422\n", - "VH01429:22:AACFJ5NHV:1:1112:41030:25422\t163\t#16\t43001705\t255\t13M198556N84M1S\t#16\t43200271\t198653\tGGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '10A0G0A84'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 172), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 10A0G0A84\n", - "CIGAR string 13M198556N84M1S\n", - "Reference seq: GGCTGTCATGAGAGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Aligned seq: GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 13), (3, 198556), (0, 84), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 13), (3, 198556), (0, 84)]\n", - "Aligned sequence after clipping (if needed):\n", - " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['10', '0', '0', '84']\n", - "[10, 11, 12, 97]\n", - "Indicated reference seq:\n", - " ggctgtcatgAGAgaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ggctgtcatgAGA198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "Fixed aligned seq:\n", - " GGCTGTCATGGAG198556*nGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Finalized fixed aligned seq:\n", - " ggctgtcatgGAG198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "Indicated qualities:\n", - " 34343434343434343434263434343434343434342634342634343434343434343434343434343434343434343434123434343434343426343434342634343434343434342634343434342634343434123434123434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A', 'G']\n", - "ref bases ['A', 'G', 'A']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A', 'G'], ref bases: ['A', 'G', 'A']\n", - "Getting info: G A 26 11\n", - "Getting info: A G 34 12\n", - "Getting info: G A 34 13\n", - "chr17:008, total reads: 1, counts_df: chr17\n", - "edited 1\n", - "total_edits 3\n", - "Reads processed:\t1\n", - "Time to process reads in min:\t0.04317\n", - "Read Summary:\n", - "edited 1\n", - "total_edits 3\n", - "dtype: int64\n", - "\n", - "Deleting overall_label_to_list_of_contents...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.50it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001715~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001716~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", - " chr17:43001717~~~~~~~`\n", - "coverage_at_pos 1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.11it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 3\n", - "filtered 3\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.046 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t3\n", - "\tNumber of unique edit sites:\n", - "\t3\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "3 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "0 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.433455MB; Peak: 22.543922MB\n", - "Time elapsed: 6.32s\n", - "-------------------------------\n", - "Deleting intermediate files...\n", - "-------------------------------\n", - "++++++\n", - "Done!\n", - "++++++\n", - "same_pos_dif_reads_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/same_pos_dif_reads.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\tchr17\n", - "\tNumber of intervals:\t16\n", - "\tCores:\t16\n", - "\tVerbose:\tTrue\n", - "\tKeep intermediate files:\tFalse\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['chr17']]\n", - "Each contig is being split into 16 subsets...\n", - "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 55C43\n", - "CIGAR tag 1S97M17144N2M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:2606:39477:40189\n", - "VH01429:22:AACFJ5NHV:2:2606:39477:40189\t163\t#16\t83195127\t255\t1S97M17144N2M\t#16\t83195141\t17258\tTCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\t[('MD', '55C43'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 197), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 55C43\n", - "CIGAR string 1S97M17144N2M\n", - "Reference seq: CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTCCCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Aligned seq: TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 97), (3, 17144), (0, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 97), (3, 17144), (0, 2)]\n", - "Aligned sequence after clipping (if needed):\n", - " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGGGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['55', '43']\n", - "[55, 99]\n", - "Indicated reference seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctggga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctCccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", - "Fixed aligned seq:\n", - " CCCACCTTCGCTTCCGGGACCACAGAGCCCCTGGAGCCTGCACGCCACGGAGTCTACCACTGAGGAGTGAGGAGGCCTCTTGTGGGGACAGATCTGG17144*nGA\n", - "Finalized fixed aligned seq:\n", - " cccaccttcgcttccgggaccacagagcccctggagcctgcacgccacggagtctAccactgaggagtgaggaggcctcttgtggggacagatctgg17144*nga\n", - "Indicated qualities:\n", - " 343434343434342626343434343434343434343434263434263434343434343434343434343434263434343434343434343434343434341234341234343426343434343434343434343434343434343434343434343434263434343434343412343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A'], ref bases: ['C']\n", - "Getting info: A C 12 56\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 86C13\n", - "CIGAR tag 1S17M4579N83M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", - "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t163\t#16\t83195207\t255\t1S17M4579N83M\t#16\t83199866\t4757\tGTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '86C13'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 86C13\n", - "CIGAR string 1S17M4579N83M\n", - "Reference seq: TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAAT\n", - "Aligned seq: GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 17), (3, 4579), (0, 83)]\n", - "Aligned sequence before clipping (if needed):\n", - " GTGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 17), (3, 4579), (0, 83)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGTGGGGACAGATCTGGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['86', '13']\n", - "[86, 100]\n", - "Indicated reference seq:\n", - " tgtggggacagatctggaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaat\n", - "Fixed aligned seq:\n", - " TGTGGGGACAGATCTGG4579*nAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAAT\n", - "Finalized fixed aligned seq:\n", - " tgtggggacagatctgg4579*naatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaat\n", - "Indicated qualities:\n", - " 34343434343434343434342634342634343434343434343434343434343434343434343434343412343434343434343434343434343434343434343434343434343434343434341234343434343434343434343426343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 4666\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 95C4\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", - "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t163\t#16\t83199777\t255\t1S100M\t#16\t83199839\t163\tGCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\t[('MD', '95C4'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 95C4\n", - "CIGAR string 1S100M\n", - "Reference seq: CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTG\n", - "Aligned seq: GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " GCTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['95', '4']\n", - "[95, 100]\n", - "Indicated reference seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtg\n", - "Fixed aligned seq:\n", - " CTATTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTG\n", - "Finalized fixed aligned seq:\n", - " ctattattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtg\n", - "Indicated qualities:\n", - " 34343434343434343434343434343434123434343434343434343434343434343434343434343434343426343434343434343434343434343434343434342634343434343426343434343434343434343434343434343434343434343426343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 96\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 92C8\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1208:38151:26028\n", - "VH01429:22:AACFJ5NHV:2:1208:38151:26028\t83\t#16\t83199780\t255\t101M\t#16\t83199711\t-170\tTTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\t[('MD', '92C8'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 195)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 92C8\n", - "CIGAR string 101M\n", - "Reference seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGA\n", - "Aligned seq: TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['92', '8']\n", - "[92, 101]\n", - "Indicated reference seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcga\n", - "Fixed aligned seq:\n", - " TTATTGAGTTTCTTCTTTTTCAGAATGAGTGGCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGA\n", - "Finalized fixed aligned seq:\n", - " ttattgagtttcttctttttcagaatgagtggcatccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcga\n", - "Indicated qualities:\n", - " 3434343434343434343426343426343434343434343426343434343434343434343434343434343434343434343434343434343434343434343434263434343434343426343434343434343434343434343434343434343434343434343434343426343426\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 93\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 60C37\n", - "CIGAR tag 1S98M2S\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2112:47146:35399\n", - "VH01429:22:AACFJ5NHV:1:2112:47146:35399\t163\t#16\t83199812\t255\t1S98M2S\t#16\t83199946\t232\tTCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '60C37'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 1), ('AS', 192)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 60C37\n", - "CIGAR string 1S98M2S\n", - "Reference seq: CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Aligned seq: TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 98), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAAT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 98)]\n", - "Aligned sequence after clipping (if needed):\n", - " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['60', '37']\n", - "[60, 98]\n", - "Indicated reference seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgca\n", - "Fixed aligned seq:\n", - " CATCCAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCA\n", - "Finalized fixed aligned seq:\n", - " catccaaacctgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgca\n", - "Indicated qualities:\n", - " 3434343434343426343434343434343434343434343434342634343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G']\n", - "ref bases ['C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G'], ref bases: ['C']\n", - "Getting info: G C 34 61\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 5C50C37G5\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", - "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t163\t#16\t83199816\t255\t1S100M\t#16\t83199822\t105\tTCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\tarray('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\t[('MD', '5C50C37G5'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 187)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 5C50C37G5\n", - "CIGAR string 1S100M\n", - "Reference seq: CAAACCTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTT\n", - "Aligned seq: TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities: array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities before clipping:\n", - " array('B', [34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Qualities after clipping:\n", - " array('B', [26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 34, 12, 34, 34, 12, 34, 12, 26, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['5', '50', '37', '5']\n", - "[5, 56, 94, 100]\n", - "Indicated reference seq:\n", - " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " caaacCtgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactt\n", - "Fixed aligned seq:\n", - " CAAACTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTT\n", - "Finalized fixed aligned seq:\n", - " caaacTtgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactt\n", - "Indicated qualities:\n", - " 26343434343434343412342634343412343412341226263434343434341234343434343434343434123434343434343434343434343434343434343434342634343434343412343434343434263434343434343434343434343434343434343434342626\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['T', 'G', 'A']\n", - "ref bases ['C', 'C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['T', 'G', 'A'], ref bases: ['C', 'C', 'G']\n", - "Getting info: T C 34 6\n", - "Getting info: G C 34 57\n", - "Getting info: A G 34 95\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 50C37G10\n", - "CIGAR tag 1S99M1S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:2610:69319:26899\n", - "VH01429:22:AACFJ5NHV:2:2610:69319:26899\t83\t#16\t83199822\t255\t1S99M1S\t#16\t83199816\t-105\tTTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '50C37G10'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 187)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 50C37G10\n", - "CIGAR string 1S99M1S\n", - "Reference seq: TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTG\n", - "Aligned seq: TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 99), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 99)]\n", - "Aligned sequence after clipping (if needed):\n", - " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 26, 34, 34, 12, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 26, 34, 34, 26, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['50', '37', '10']\n", - "[50, 88, 99]\n", - "Indicated reference seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctg\n", - "Fixed aligned seq:\n", - " TGACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTG\n", - "Finalized fixed aligned seq:\n", - " tgactcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctg\n", - "Indicated qualities:\n", - " 343434343434341234123434343434343434343434343434343434343412343426263434121234343434343434343434343434343412342634342634341234123434343434341234343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 51\n", - "Getting info: A G 34 89\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 48C37G14\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:1403:49513:32843\n", - "VH01429:22:AACFJ5NHV:1:1403:49513:32843\t83\t#16\t83199824\t255\t101M\t#16\t83199748\t-177\tACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '48C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 194)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 48C37G14\n", - "CIGAR string 101M\n", - "Reference seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", - "Aligned seq: ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['48', '37', '14']\n", - "[48, 86, 101]\n", - "Indicated reference seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "Fixed aligned seq:\n", - " ACTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Finalized fixed aligned seq:\n", - " actcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", - "Indicated qualities:\n", - " 3434343434343434343434343434341234123434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 49\n", - "Getting info: A G 34 87\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 47C37G14\n", - "CIGAR tag 1S100M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", - "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t163\t#16\t83199825\t255\t1S100M\t#16\t83199839\t115\tTCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\tarray('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '47C37G14'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 47C37G14\n", - "CIGAR string 1S100M\n", - "Reference seq: CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAA\n", - "Aligned seq: TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities: array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 1), (0, 100)]\n", - "Aligned sequence before clipping (if needed):\n", - " TCTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities before clipping:\n", - " array('B', [34, 12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Qualities after clipping:\n", - " array('B', [12, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 26, 34, 12, 34, 12, 26, 12, 34, 34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 26, 34, 26, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['47', '37', '14']\n", - "[47, 85, 100]\n", - "Indicated reference seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaa\n", - "Fixed aligned seq:\n", - " CTCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAA\n", - "Finalized fixed aligned seq:\n", - " ctcatatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaa\n", - "Indicated qualities:\n", - " 12343434123434263434343412343434263434343434343434343434343434123434343426343426343434343434343434341226343434343434343434263434263434263412341226123434261234343434263434263426343434341234343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 48\n", - "Getting info: A G 26 86\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 2T9T0T26G4C37G1A11\n", - "CIGAR tag 3S97M\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", - "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t163\t#16\t83199827\t255\t3S97M\t#16\t83199827\t100\tCCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\tarray('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '2T9T0T26G4C37G1A11'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 7), ('AS', 177)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 2T9T0T26G4C37G1A11\n", - "CIGAR string 3S97M\n", - "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAA\n", - "Aligned seq: CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities: array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(4, 3), (0, 97)]\n", - "Aligned sequence before clipping (if needed):\n", - " CCACAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities before clipping:\n", - " array('B', [12, 12, 12, 12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 97)]\n", - "Aligned sequence after clipping (if needed):\n", - " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Qualities after clipping:\n", - " array('B', [12, 12, 12, 34, 12, 34, 34, 34, 12, 34, 34, 12, 12, 12, 34, 34, 34, 12, 12, 34, 12, 12, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 12, 26, 34, 12, 34, 26, 34, 12, 12, 34, 34, 34, 34, 34, 12, 12, 26, 26, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['2', '9', '0', '26', '4', '37', '1', '11']\n", - "[2, 12, 13, 40, 45, 83, 85, 97]\n", - "Indicated reference seq:\n", - " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " caTatttccaacTTtattaatttttgcaagacttcggaggGtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtActttcctgtaa\n", - "Fixed aligned seq:\n", - " CAAATTTCCAACAATATTAATTTTTGCAAGACTTCGGAGGCTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATTCTTTCCTGTAA\n", - "Finalized fixed aligned seq:\n", - " caAatttccaacAAtattaatttttgcaagacttcggaggCtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtTctttcctgtaa\n", - "Indicated qualities:\n", - " 12121234123434341234341212123434341212341212341212343434342634341226341234263412123434343434121226263434343434342634341234122634343434343434343412343434343434343434343434123434342634343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'A', 'A', 'C', 'G', 'A', 'T']\n", - "ref bases ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'A', 'A', 'C', 'G', 'A', 'T'], ref bases: ['T', 'T', 'T', 'G', 'C', 'G', 'A']\n", - "Getting info: A T 12 3\n", - "Getting info: A T 12 13\n", - "Getting info: A T 12 14\n", - "Getting info: C G 12 41\n", - "Getting info: G C 34 46\n", - "Getting info: A G 34 84\n", - "Getting info: T A 12 86\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 45C37G16\n", - "CIGAR tag 100M1S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1608:28097:17679\n", - "VH01429:22:AACFJ5NHV:2:1608:28097:17679\t83\t#16\t83199827\t255\t100M1S\t#16\t83199827\t-100\tCATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\tarray('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\t[('MD', '45C37G16'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 177)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 45C37G16\n", - "CIGAR string 100M1S\n", - "Reference seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAAC\n", - "Aligned seq: CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", - "Qualities: array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 100), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACG\n", - "Qualities before clipping:\n", - " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 100)]\n", - "Aligned sequence after clipping (if needed):\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", - "Qualities after clipping:\n", - " array('B', [34, 26, 12, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 12, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 12, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['45', '37', '16']\n", - "[45, 83, 100]\n", - "Indicated reference seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaac\n", - "Fixed aligned seq:\n", - " CATATTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAAC\n", - "Finalized fixed aligned seq:\n", - " catatttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaac\n", - "Indicated qualities:\n", - " 34261234343434263434342634343434342634343412123434343426343434343434343434343434343434341226343434343434343434343434343426342612343434343434343434341234343434123434342612343434343434343434343412343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 26 46\n", - "Getting info: A G 26 84\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 40C37G20\n", - "CIGAR tag 99M2S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1104:58090:46985\n", - "VH01429:22:AACFJ5NHV:2:1104:58090:46985\t83\t#16\t83199832\t255\t99M2S\t#16\t83195148\t-4783\tTTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\t[('MD', '40C37G20'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 192), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 40C37G20\n", - "CIGAR string 99M2S\n", - "Reference seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGAC\n", - "Aligned seq: TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 99), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACCT\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 99)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 12, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['40', '37', '20']\n", - "[40, 78, 99]\n", - "Indicated reference seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgac\n", - "Fixed aligned seq:\n", - " TTCCAACTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGAC\n", - "Finalized fixed aligned seq:\n", - " ttccaactttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgac\n", - "Indicated qualities:\n", - " 343434343434343434263412263434123434343434341234342634343434343434343434343434343434343434343434342634343434343434263434343434343434343434262634343434343434342626123434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 41\n", - "Getting info: A G 34 79\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 33C37G19C9\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:1109:44703:39053\n", - "VH01429:22:AACFJ5NHV:1:1109:44703:39053\t83\t#16\t83199839\t255\t101M\t#16\t83199777\t-163\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '33C37G19C9'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 33C37G19C9\n", - "CIGAR string 101M\n", - "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['33', '37', '19', '9']\n", - "[33, 71, 91, 101]\n", - "Indicated reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgaCtaaaggaga\n", - "Fixed aligned seq:\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGATTAAAGGAGA\n", - "Finalized fixed aligned seq:\n", - " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgaTtaaaggaga\n", - "Indicated qualities:\n", - " 3434343426343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343412343434343434343434343412\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A', 'T']\n", - "ref bases ['C', 'G', 'C']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A', 'T'], ref bases: ['C', 'G', 'C']\n", - "Getting info: G C 34 34\n", - "Getting info: A G 34 72\n", - "Getting info: T C 34 92\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 33C37G29\n", - "CIGAR tag 101M\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:1:2414:26942:20235\n", - "VH01429:22:AACFJ5NHV:1:2414:26942:20235\t83\t#16\t83199839\t255\t101M\t#16\t83199825\t-115\tTTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\tarray('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\t[('MD', '33C37G29'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191)]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 33C37G29\n", - "CIGAR string 101M\n", - "Reference seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Aligned seq: TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence before clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 101)]\n", - "Aligned sequence after clipping (if needed):\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['33', '37', '29']\n", - "[33, 71, 101]\n", - "Indicated reference seq:\n", - " tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.78it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " chr17:008, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "tttattaatttttgcaagacttcggagggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggaga\n", - "Fixed aligned seq:\n", - " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", - "Finalized fixed aligned seq:\n", - " tttattaatttttgcaagacttcggagggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggaga\n", - "Indicated qualities:\n", - " 3434343426343426263434343434343434343434343426343434343434343412343434343434263412343434343434343434343434343434342634341234343434343434343434342626343434342634343434342634343434343434343434341234343426\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 34\n", - "Getting info: A G 34 72\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 6C37G53\n", - "CIGAR tag 98M2S\n", - "is_reverse True\n", - "is_read1 True\n", - "is_read2 False\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:1602:67558:13893\n", - "VH01429:22:AACFJ5NHV:2:1602:67558:13893\t83\t#16\t83199866\t255\t98M2S\t#16\t83195207\t-4757\tGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\t[('MD', '6C37G53'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 191), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 6C37G53\n", - "CIGAR string 98M2S\n", - "Reference seq: GGTGCTCTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAGTACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Aligned seq: GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 98), (4, 2)]\n", - "Aligned sequence before clipping (if needed):\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGAGA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 98)]\n", - "Aligned sequence after clipping (if needed):\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['6', '37', '53']\n", - "[6, 44, 98]\n", - "Indicated reference seq:\n", - " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ggtgctCtgtggcgattaatattgctgagctgagcacagctgcaGtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "Fixed aligned seq:\n", - " GGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGAAGTTGGTGTCTTCCTCATACAAGA\n", - "Finalized fixed aligned seq:\n", - " ggtgctGtgtggcgattaatattgctgagctgagcacagctgcaAtactttcctgtaaaactgactaaaggagaagttggtgtcttcctcatacaaga\n", - "Indicated qualities:\n", - " 3434343434343434343434343434343434343426343434343434343434343434343434343434343434343434343434343434263434343434343434343434343434343434343434343434343434343434343434343434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A']\n", - "ref bases ['C', 'G']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A'], ref bases: ['C', 'G']\n", - "Getting info: G C 34 7\n", - "Getting info: A G 34 45\n", - "chr17:015, total reads: 16, counts_df: chr17\n", - "edited 15\n", - "no_edits 1\n", - "total_edits 32\n", - "Reads processed:\t16\n", - "Time to process reads in min:\t0.04113\n", - "Read Summary:\n", - "edited 15\n", - "no_edits 1\n", - "total_edits 32\n", - "dtype: int64\n", - "\n", - "Deleting overall_label_to_list_of_contents...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit chr17_000_0_5203591, 0/16...\n", - "\tsplit chr17_010_52035910_57239501, 10/16...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n", - "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199840~~~~~~~`\n", - "coverage_at_pos 9\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199872~~~~~~~`\n", - "coverage_at_pos 9\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199910~~~~~~~`\n", - "coverage_at_pos 7\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199912~~~~~~~`\n", - "coverage_at_pos 7\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199821~~~~~~~`\n", - "coverage_at_pos 5\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83195182~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199829~~~~~~~`\n", - "coverage_at_pos 8\n", + "\tsplit chr17_000_0_5203591, 0/16...\n", + "\tsplit chr17_010_52035910_57239501, 10/16...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n", + "edit_info_grouped_per_contig_combined dict_keys(['chr17_000_0_5203591', 'chr17_001_5203591_10407182', 'chr17_002_10407182_15610773', 'chr17_003_15610773_20814364', 'chr17_004_20814364_26017955', 'chr17_005_26017955_31221546', 'chr17_006_31221546_36425137', 'chr17_007_36425137_41628728', 'chr17_008_41628728_46832319', 'chr17_009_46832319_52035910', 'chr17_010_52035910_57239501', 'chr17_011_57239501_62443092', 'chr17_012_62443092_67646683', 'chr17_013_67646683_72850274', 'chr17_014_72850274_78053865', 'chr17_015_78053865_83257456'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 16/16 [00:05<00:00, 2.88it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199930~~~~~~~`\n", - "coverage_at_pos 4\n", + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n", "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199867~~~~~~~`\n", - "coverage_at_pos 9\n", + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n", "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199839~~~~~~~`\n", - "coverage_at_pos 9\n" + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.39it/s]\n" + "100%|██████████| 16/16 [00:05<00:00, 3.07it/s]\n" ] }, { @@ -1993,47 +329,52 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "original 32\n", - "filtered 24\n", + "original 4\n", + "filtered 3\n", "dtype: int64\n", - "Total time to calculate coverage: 0.044 minutes\n", + "Total time to calculate coverage: 0.089 minutes\n", "Filtering..\n", "\tNumber of edits after filtering:\n", - "\t24\n", + "\t3\n", "\tNumber of unique edit sites:\n", - "\t10\n", + "\t3\n", "Writing sites...\n", "\n", "Adding strand-specific conversion...\n", "\n", - "10 sites being converted to SAILOR format...\n", + "3 sites being converted to SAILOR format...\n", + "Generating SAILOR-style bed outputs for conversion CT...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "2 final deduplicated SAILOR-formatted sites\n", + "1 final deduplicated C>T SAILOR-formatted sites\n", + "Generating SAILOR-style bed outputs for conversion AG...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "0 final deduplicated A>G SAILOR-formatted sites\n", "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.432284MB; Peak: 22.541949MB\n", - "Time elapsed: 6.11s\n", + "Current memory usage 21.287284MB; Peak: 22.399873MB\n", + "Time elapsed: 12.00s\n", "-------------------------------\n", "Deleting intermediate files...\n", "-------------------------------\n", "++++++\n", "Done!\n", "++++++\n", - "tax1bp3_chr17_3665556_read_test\n", + "F1R2_pair_test-single_end_mode\n", "Removing old files...\n", "Running tests...\n", + "Python is /tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/tax1bp3_chr17_3665556_read.bam\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test-single_end_mode\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", + "\tPaired End:\tFalse\n", "\tCoverage only:\tFalse\n", "\tFiltering only:\tFalse\n", "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", + "\tSailor outputs:\t['CT']\n", "\tBedgraphs:\t[]\n", "\tMinimum base quality:\t0\n", "\tMinimum read quality:\t0\n", @@ -2045,6 +386,8 @@ "\tVerbose:\tTrue\n", "\tKeep intermediate files:\tFalse\n", "\tSkip coverage?:\tFalse\n", + "\tFor single-cell: \t6 contigs at at time\n", + "\n", "\n", ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", @@ -2059,124 +402,162 @@ "~~~~~~~~~~~~~~~~~~\n", "Identifying edits\n", "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['chr17']]\n", + "Contig groups to be processed: [['chr17']]\n", "Each contig is being split into 16 subsets...\n", "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", "\tContig chr17\n", "16 total jobs\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 2C1C6G3A1G4G47\n", - "CIGAR tag 28S70M\n", - "is_reverse True\n", + "MD tag 26A20G52\n", + "CIGAR tag 1S100M\n", + "is_reverse False\n", "is_read1 True\n", "is_read2 False\n", "is_paired True\n", "is_proper_pair True\n", + "mate_is_reverse True\n", + "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", + "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t99\t#16\t43044305\t255\t1S100M\t#16\t43044312\t107\tGTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\tarray('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\t[('MD', '26A20G52'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", + "reverse_or_forward: -\n", + "----------------------------\n", + "MD tag: 26A20G52\n", + "CIGAR string 1S100M\n", + "Reference seq: TGCTACCAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Aligned seq: GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities: array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "CIGAR tuples before clipping (if needed):\n", + " [(4, 1), (0, 100)]\n", + "Aligned sequence before clipping (if needed):\n", + " GTGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities before clipping:\n", + " array('B', [12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "Soft clipping quality scores ...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "CIGAR tuples after clipping (if needed):\n", + " [(0, 100)]\n", + "Aligned sequence after clipping (if needed):\n", + " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Qualities after clipping:\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 12, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34])\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "['26', '20', '52']\n", + "[26, 47, 100]\n", + "Indicated reference seq:\n", + " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Fixed reference seq:\n", + " tgctaccaagtttatttgcagtgttaAcagcacaacatttacaaaacGtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "Fixed aligned seq:\n", + " TGCTACCAAGTTTATTTGCAGTGTTACCAGCACAACATTTACAAAACATATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGA\n", + "Finalized fixed aligned seq:\n", + " tgctaccaagtttatttgcagtgttaCcagcacaacatttacaaaacAtattttgtacaatcaagtcttcactgcccttgcacactgggggggctaggga\n", + "Indicated qualities:\n", + " 34343434343434343434263434343434343434342634343434261234343434342626343434343434343434341234341234343434343434123434343434343412343434343412343434343434343434342634342634343426343426343434342634343434\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['C', 'A']\n", + "ref bases ['A', 'G']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['C', 'A'], ref bases: ['A', 'G']\n", + "Getting info: C A 12 27\n", + "Getting info: A G 12 48\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "MD tag 40G3T55\n", + "CIGAR tag 100M1S\n", + "is_reverse True\n", + "is_read1 False\n", + "is_read2 True\n", + "is_paired True\n", + "is_proper_pair True\n", "mate_is_reverse False\n", - "read id VH01429:22:AACFJ5NHV:2:2414:21053:15521\n", - "VH01429:22:AACFJ5NHV:2:2414:21053:15521\t83\t#16\t3665539\t255\t28S70M\t#16\t3665452\t-157\tCACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\tarray('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '2C1C6G3A1G4G47'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 6), ('AS', 151)]\n", - "reverse_or_forward: +\n", + "read id VH01429:22:AACFJ5NHV:1:2211:40480:18323\n", + "VH01429:22:AACFJ5NHV:1:2211:40480:18323\t147\t#16\t43044312\t255\t100M1S\t#16\t43044305\t-107\tAAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\tarray('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\t[('MD', '40G3T55'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 2), ('AS', 190)]\n", + "reverse_or_forward: -\n", "----------------------------\n", - "MD tag: 2C1C6G3A1G4G47\n", - "CIGAR string 28S70M\n", - "Reference seq: AACGCGTGAAGGAAAATGATCAGAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Aligned seq: CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", - "Qualities: array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + "MD tag: 40G3T55\n", + "CIGAR string 100M1S\n", + "Reference seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACGTATTTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", + "Aligned seq: AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", + "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", "CIGAR tuples before clipping (if needed):\n", - " [(4, 28), (0, 70)]\n", + " [(0, 100), (4, 1)]\n", "Aligned sequence before clipping (if needed):\n", - " CACAATAAGAGCCTATTTTGCTTCCTTAAAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTAC\n", "Qualities before clipping:\n", - " array('B', [26, 34, 34, 12, 12, 12, 26, 34, 34, 12, 34, 34, 12, 12, 26, 26, 12, 34, 12, 34, 12, 34, 34, 34, 12, 34, 12, 34, 34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12])\n", "Soft clipping quality scores ...\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "CIGAR tuples after clipping (if needed):\n", - " [(0, 70)]\n", + " [(0, 100)]\n", "Aligned sequence after clipping (if needed):\n", - " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", "Qualities after clipping:\n", - " array('B', [34, 26, 12, 26, 34, 12, 34, 12, 34, 12, 26, 34, 26, 12, 34, 12, 34, 12, 12, 12, 12, 12, 12, 34, 34, 34, 34, 12, 12, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 12])\n", + " array('B', [34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34])\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['2', '1', '6', '3', '1', '4', '47']\n", - "[2, 4, 11, 15, 17, 22, 70]\n", + "['40', '3', '55']\n", + "[40, 44, 100]\n", "Indicated reference seq:\n", - " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Fixed reference seq:\n", - " aaCgCgtgaagGaaaAtGatcaGaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacGtatTttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", "Fixed aligned seq:\n", - " AAGGTGTGAAGTAAATTAATCATAAAAAGAAAGAAGCCAAAGAGAAAGGTACCTGGGTTCAACTAAAGCA\n", + " AAGTTTATTTGCAGTGTTAACAGCACAACATTTACAAAACATATGTTGTACAATCAAGTCTTCACTGCCCTTGCACACTGGGGGGGCTAGGGAAGACCTA\n", "Finalized fixed aligned seq:\n", - " aaGgTgtgaagTaaaTtAatcaTaaaaagaaagaagccaaagagaaaggtacctgggttcaactaaagca\n", + " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", "Indicated qualities:\n", - " 34261226341234123412263426123412341212121212123434343412123434341234343434343434343434343434343434343434343434343434343426263434343434343412\n", + " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'T', 'T', 'T', 'A', 'T']\n", - "ref bases ['C', 'C', 'G', 'A', 'G', 'G']\n", + "alt bases ['A', 'G']\n", + "ref bases ['G', 'T']\n", "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'T', 'T', 'T', 'A', 'T'], ref bases: ['C', 'C', 'G', 'A', 'G', 'G']\n", - "Getting info: G C 12 3\n", - "Getting info: T C 34 5\n", - "Getting info: T G 34 12\n", - "Getting info: T A 12 16\n", - "Getting info: A G 12 18\n", - "Getting info: T G 12 23\n", - "chr17:000, total reads: 1, counts_df: chr17\n", - "edited 1\n", - "total_edits 6\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "Reads processed:\t1\n", - "Time to process reads in min:\t0.04167\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "Reads processed:\t2\n", + "Time to process reads in min:\t0.10064\n", "Read Summary:\n", - "edited 1\n", - "total_edits 6\n", + "edited 2\n", + "total_edits 4\n", "dtype: int64\n", "\n", "Deleting overall_label_to_list_of_contents...\n", @@ -2194,7 +575,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.73it/s]\n" + "100%|██████████| 16/16 [00:05<00:00, 2.73it/s]\n" ] }, { @@ -2202,42 +583,21 @@ "output_type": "stream", "text": [ "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665541~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665543~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665550~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665554~~~~~~~`\n", - "coverage_at_pos 1\n", + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n", "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665556~~~~~~~`\n", - "coverage_at_pos 1\n", + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n", "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", - " chr17:3665561~~~~~~~`\n", - "coverage_at_pos 1\n" + "!!!!SINGLE END!!!!!\n", + "~~~~~~~`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:02<00:00, 6.51it/s]\n" + "100%|██████████| 16/16 [00:05<00:00, 2.78it/s]\n" ] }, { @@ -2246,135 +606,31 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "original 6\n", - "filtered 6\n", + "original 4\n", + "filtered 3\n", "dtype: int64\n", - "Total time to calculate coverage: 0.043 minutes\n", + "Total time to calculate coverage: 0.099 minutes\n", "Filtering..\n", "\tNumber of edits after filtering:\n", - "\t6\n", + "\t3\n", "\tNumber of unique edit sites:\n", - "\t6\n", + "\t3\n", "Writing sites...\n", "\n", "Adding strand-specific conversion...\n", "\n", - "6 sites being converted to SAILOR format...\n", + "3 sites being converted to SAILOR format...\n", + "Generating SAILOR-style bed outputs for conversion CT...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "1 final deduplicated SAILOR-formatted sites\n", + "1 final deduplicated C>T SAILOR-formatted sites\n", "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 21.403118MB; Peak: 22.513332MB\n", - "Time elapsed: 5.99s\n", + "Current memory usage 21.285468MB; Peak: 22.398141MB\n", + "Time elapsed: 12.79s\n", "-------------------------------\n", "Deleting intermediate files...\n", "-------------------------------\n", "++++++\n", "Done!\n", - "++++++\n", - "pair_test\n", - "Removing old files...\n", - "Running tests...\n", - "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", - "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/pair_example_18_49488551_49590000.sorted.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test\n", - "\tBarcode whitelist:\tNone\n", - "\tStrandedness:\t2\n", - "\tBarcode Tag:\tNone\n", - "\tPaired End:\tTrue\n", - "\tCoverage only:\tFalse\n", - "\tFiltering only:\tFalse\n", - "\tAnnotation only:\tFalse\n", - "\tSailor outputs:\tTrue\n", - "\tBedgraphs:\t[]\n", - "\tMinimum base quality:\t0\n", - "\tMinimum read quality:\t0\n", - "\tMinimum distance from end:\t0\n", - "\tMaximum edits per read:\tNone\n", - "\tContigs:\t18\n", - "\tNumber of intervals:\t1\n", - "\tCores:\t1\n", - "\tVerbose:\tFalse\n", - "\tKeep intermediate files:\tTrue\n", - "\tSkip coverage?:\tFalse\n", - "\n", - ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", - "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", - "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", - "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", - "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", - "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", - "### ### ### ### ### ### ########### ### #### ########## \n", - "==================================================================\n", - "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", - "==================================================================\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "WARNING /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test is not empty\n", - "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "~~~~~~~~~~~~~~~~~~\n", - "Identifying edits\n", - "~~~~~~~~~~~~~~~~~~\n", - "i_options range(0, 2)\n", - "j_options range(0, 3)\n", - "j_options range(3, 6)\n", - "broken_up_contigs [['18']]\n", - "Each contig is being split into 1 subsets...\n", - "\tContig 18\n", - "1 total jobs\n", - "Reads processed:\t10\n", - "Time to process reads in min:\t0.02795\n", - "Read Summary:\n", - "edited 6\n", - "no_edits 2\n", - "secondary 2\n", - "total_edits 6\n", - "dtype: int64\n", - "\n", - "Deleting overall_label_to_list_of_contents...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Calculating coverage at edited sites, minimum read quality is 0...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "\tsplit 18_000_0_80373285, 0/1...\n", - "Done grouping! Concatenating ...\n", - "Done concatenating!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.64s/it]\n", - "100%|██████████| 1/1 [00:01<00:00, 1.68s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Concatenating results...\n", - "Done concatenating.\n", - "original 6\n", - "filtered 4\n", - "dtype: int64\n", - "Total time to calculate coverage: 0.028 minutes\n", - "Filtering..\n", - "\tNumber of edits after filtering:\n", - "\t4\n", - "\tNumber of unique edit sites:\n", - "\t2\n", - "Writing sites...\n", - "\n", - "Adding strand-specific conversion...\n", - "\n", - "2 sites being converted to SAILOR format...\n", - "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", - "2 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed...\n", - "Current memory usage 21.317943MB; Peak: 22.4279MB\n", - "Time elapsed: 4.15s\n", - "++++++\n", - "Done!\n", "++++++\n" ] } @@ -2393,7 +649,7 @@ "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", "\n", - "for t in \"sailor_test\" #\"unstranded_pair_test\" \"F1R2_pair_test-single_end_mode\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", + "for t in \"F1R2_pair_test-single_end_mode_sailor\" \"F1R2_pair_test-single_end_mode\" #\"unstranded_pair_test\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", "do\n", " echo $t\n", " echo \"Removing old files...\"\n", @@ -2712,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 9, "id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387", "metadata": {}, "outputs": [ @@ -2747,6 +1003,13 @@ "\n", "\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Checking results for F1R2_pair_test-single_end_mode_sailor\n", + "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 2, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", + "\n", + "\t >>> F1R2_pair_test-single_end_mode_sailor passed! <<<\n", + "\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Checking results for F1R2_pair_test-single_end_mode\n", "\tExpecting: {'contig': 'chr17', 'position': 43044352, 'count': 1, 'coverage': 2, 'conversion': 'G>A', 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '-', 'feature_name': 'BRCA1', 'feature_strand': '-'}\n", "\n", @@ -2864,6 +1127,24 @@ " }]\n", " },\n", "\n", + " \"F1R2_pair_test-single_end_mode_sailor\": {\n", + " \"folder\": \"strandedness_tests\",\n", + " \"expectations\": [{\n", + " \"contig\": \"chr17\",\n", + " \"position\": 43044352,\n", + " \"count\": 1,\n", + " \"coverage\": 2,\n", + " \"conversion\": \"G>A\",\n", + " \"num_rows\": 1,\n", + " \"conversion\": \"G>A\",\n", + " \"strand_conversion\": \"C>T\",\n", + " \"strand\": \"-\",\n", + " \"feature_name\": \"BRCA1\",\n", + " \"feature_strand\": \"-\"\n", + " }]\n", + " },\n", + "\n", + " \n", " \"F1R2_pair_test-single_end_mode\": {\n", " \"folder\": \"strandedness_tests\",\n", " \"expectations\": [{\n", diff --git a/tests/integration_tests_auto_check.py b/tests/integration_tests_auto_check.py index 8c49e6a..045c736 100755 --- a/tests/integration_tests_auto_check.py +++ b/tests/integration_tests_auto_check.py @@ -56,6 +56,23 @@ "feature_strand": "-" }] }, + + "F1R2_pair_test-single_end_mode_sailor": { + "folder": "strandedness_tests", + "expectations": [{ + "contig": "chr17", + "position": 43044352, + "count": 1, + "coverage": 2, + "conversion": "G>A", + "num_rows": 1, + "conversion": "G>A", + "strand_conversion": "C>T", + "strand": "-", + "feature_name": "BRCA1", + "feature_strand": "-" + }] + }, "F1R2_pair_test-single_end_mode": { "folder": "strandedness_tests", diff --git a/tests/integration_tests_run.sh b/tests/integration_tests_run.sh index 93bad69..33e77f1 100755 --- a/tests/integration_tests_run.sh +++ b/tests/integration_tests_run.sh @@ -13,7 +13,7 @@ tests_folder="strandedness_tests/" echo "Bulk tests scripts" ls -lh $MARINE/tests/$tests_folder/scripts/ -for t in "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" "unstranded_pair_test" +for t in "F1R2_pair_test-single_end_mode_sailor" "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" "unstranded_pair_test" do echo $t echo "Removing old files..."