From 60d2249cda87b687d0522a4a1a6802d1300bd414 Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Thu, 11 Jul 2024 15:13:12 -0700 Subject: [PATCH 1/3] updates and readme --- README.md | 12 +- marine.py | 1 + marine_environment2.yaml | 2 +- marine_environment_3_10.yml | 2 +- src/annotate.py | 19 +- tests/integration_tests.ipynb | 896 ++++++++++-------- tests/integration_tests_auto_check.py | 12 + tests/integration_tests_run.sh | 26 +- tests/strandedness_tests/README.txt | 11 +- tests/strandedness_tests/bams/citrine435.bam | Bin 0 -> 2562 bytes .../bams/citrine435.bam.bai | Bin 0 -> 96 bytes .../scripts/F1R2_pair_test-single_end_mode.sh | 1 + .../scripts/unstranded_pair_test.sh | 19 + .../final_filtered_site_info.tsv | 15 + 14 files changed, 611 insertions(+), 405 deletions(-) create mode 100644 tests/strandedness_tests/bams/citrine435.bam create mode 100644 tests/strandedness_tests/bams/citrine435.bam.bai create mode 100644 tests/strandedness_tests/scripts/unstranded_pair_test.sh create mode 100644 tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv diff --git a/README.md b/README.md index 39f9a64..48cdef4 100644 --- a/README.md +++ b/README.md @@ -12,14 +12,22 @@ MARINE was developed and tested using Python 3.8.18 and is not guaranteed to wor Use the provided .yml file to create a new conda environment that contains all required dependencies, including the proper Python version, for MARINE: ``` -conda env create --file=marine_environment.yml +conda env create --file=marine_environment2.yaml +conda activate marine_environment +``` + +or if you encounter problems with this approach, try using mamba instead, which can be faster: + +``` +mamba create -n marine_environment python=3.8.18 -y +mamba env update -n marine_environment --file marine_environment2.yaml conda activate marine_environment ``` Notes: * The more cores used, the faster MARINE will run * Ensure that your annotation bedfile has the same chromosome nomenclature (e.g., "9" vs "chr9") as your bam -* The annotation file should be tab-separated and should have a standard bed column ordering, as follows: +* The annotation bedfile should be tab-separated and should have a standard bed6 column ordering, as follows: ``` 1 29554 31109 MIR1302-2HG lincRNA + 1 34554 36081 FAM138A lincRNA - diff --git a/marine.py b/marine.py index 07b5532..35b70b0 100755 --- a/marine.py +++ b/marine.py @@ -16,6 +16,7 @@ import tracemalloc from matplotlib import pyplot as plt + sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'src/')) from read_process import incorporate_replaced_pos_info,incorporate_insertions_and_deletions,\ diff --git a/marine_environment2.yaml b/marine_environment2.yaml index 5eacb1a..0e4315f 100644 --- a/marine_environment2.yaml +++ b/marine_environment2.yaml @@ -215,7 +215,7 @@ dependencies: - python-json-logger=2.0.7=py38h06a4308_0 - python-tzdata=2023.3=pyhd3eb1b0_0 - python_abi=3.8=4_cp38 - - pytz=2023.3.post1=py38h06a4308_0 + - pytz - pyyaml=6.0.1=py38h5eee18b_0 - pyzmq=25.1.0=py38h6a678d5_0 - qt-main=5.15.2=h53bd1ea_10 diff --git a/marine_environment_3_10.yml b/marine_environment_3_10.yml index 7193e69..1b212fa 100644 --- a/marine_environment_3_10.yml +++ b/marine_environment_3_10.yml @@ -106,7 +106,7 @@ dependencies: - pyopenssl - pysam - pysocks - - python + - python - python-dateutil - python-tzdata - python_abi diff --git a/src/annotate.py b/src/annotate.py index 71364af..429b798 100644 --- a/src/annotate.py +++ b/src/annotate.py @@ -2,6 +2,12 @@ import pandas as pd import pybedtools +import os +import sys + +bin_dir = os.path.dirname(sys.executable) +pybedtools.set_bedtools_path(bin_dir) + import pandas as pd @@ -10,7 +16,7 @@ def make_bedtool_from_final_sites(df): return pybedtools.BedTool.from_dataframe(df_bed_cols) -def get_strand_specific_conversion(r, reverse_stranded): +def get_strand_specific_conversion(r, strandedness): ref_alt_dict = { 'A': 'T', 'T': 'A', @@ -22,14 +28,9 @@ def get_strand_specific_conversion(r, reverse_stranded): alt = r.alt mapped_strand = r.strand - """ - if reverse_stranded: - if mapped_strand == '+': - mapped_strand = '-' - elif mapped_strand == '-': - mapped_strand = '+' - """ - + if strandedness == 0: + return '{}>{}'.format(ref, alt) + if mapped_strand == '+': return '{}>{}'.format( ref, diff --git a/tests/integration_tests.ipynb b/tests/integration_tests.ipynb index 2e09d14..2f9458e 100644 --- a/tests/integration_tests.ipynb +++ b/tests/integration_tests.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "id": "9f2684b1-dbad-45d5-bdb5-b83989bec7dc", "metadata": {}, "outputs": [], @@ -10,6 +10,27 @@ "import pandas as pd" ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "63b5feff-aa8a-417c-8c93-9b9d74e5dee9", + "metadata": {}, + "outputs": [], + "source": [ + "import pybedtools\n", + "pybedtools.set_bedtools_path('/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6012705-9d34-4997-8681-c7bbcc4f008b", + "metadata": {}, + "outputs": [], + "source": [ + "b = pybedtools.BedTool()" + ] + }, { "cell_type": "markdown", "id": "f1b96ffb-37e9-4408-9655-669a63004186", @@ -39,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 26, "id": "ba12c2b8-2ccb-4866-86c8-a2284fd1229f", "metadata": { "scrolled": true @@ -52,14 +73,136 @@ "/tscc/projects/ps-yeolab3/ekofman/sailor2/tests\n", "Running\n", "/tscc/projects/ps-yeolab3/ekofman/sailor2\n", - "F1R2_pair_test-single_end_mode\n", + "unstranded_pair_test\n", "Removing old files...\n", "Running tests...\n", + "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", + "Arguments:\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/citrine435.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/unstranded_pair_test\n", + "\tBarcode whitelist:\tNone\n", + "\tStrandedness:\t0\n", + "\tBarcode Tag:\tNone\n", + "\tPaired End:\tTrue\n", + "\tCoverage only:\tFalse\n", + "\tFiltering only:\tFalse\n", + "\tAnnotation only:\tFalse\n", + "\tSailor outputs:\tTrue\n", + "\tMinimum base quality:\t0\n", + "\tMinimum read quality:\t0\n", + "\tMinimum distance from end:\t0\n", + "\tmax_edits_per_read:\tNone\n", + "\tContigs:\tall\n", + "\tNumber of intervals:\t1\n", + "\tCores:\t1\n", + "\tVerbose:\tFalse\n", + "\tSkip coverage?:\tFalse\n", + "\n", + ":::: :::: ::: ::::::::: ::::::::::: :::: ::: :::::::::: \n", + "+:+:+: :+:+:+ :+: :+: :+: :+: :+: :+:+: :+: :+: \n", + "+:+ +:+:+ +:+ +:+ +:+ +:+ +:+ +:+ :+:+:+ +:+ +:+ \n", + "+#+ +:+ +#+ +#++:++#++: +#++:++#: +#+ +#+ +:+ +#+ +#++:++# \n", + "+#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+ +#+#+# +#+ \n", + "#+# #+# #+# #+# #+# #+# #+# #+# #+#+# #+# \n", + "### ### ### ### ### ### ########### ### #### ########## \n", + "==================================================================\n", + "Multi-core Algorithm for Rapid Identification of Nucleotide Edits\n", + "==================================================================\n", + "~~~~~~~~~~~~~~~~~~\n", + "Identifying edits\n", + "~~~~~~~~~~~~~~~~~~\n", + "Each contig is being split into 1 subsets...\n", + "\tContig Citrine.dna\n", + "1 total jobs\n", + "Reads processed:\t44\n", + "Time to process reads in min:\t0.03751\n", + "Summary:\t Citrine.dna\n", + "edited 25\n", + "no_edits 19\n", + "\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Calculating coverage at edited sites, minimum read quality is 0...\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\tsplit Citrine.dna_000_0_741, 0/1...\n", + "Done grouping! Concatenating ...\n", + "Done concatenating!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:02<00:00, 2.20s/it]\n", + "100%|██████████| 1/1 [00:02<00:00, 2.22s/it]\n", + "***** WARNING: File /tmp/pybedtools.ab0mhyn2.tmp has inconsistent naming convention for record:\n", + "Citrine.dna\t436\t438\tno_barcode_Citrine.dna_437_A_G_+\tA>G\t+\n", + "\n", + "***** WARNING: File /tmp/pybedtools.ab0mhyn2.tmp has inconsistent naming convention for record:\n", + "Citrine.dna\t436\t438\tno_barcode_Citrine.dna_437_A_G_+\tA>G\t+\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Concatenating results...\n", + "Done concatenating.\n", + "Total time to calculate coverage: 0.037 minutes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Filtering and calculating site-level statistics\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "Loading edit information...\n", + "Filtering..\n", + "~~~~~~~~~~~~~~~~~~~~~~~\n", + "Filtering edited sites\n", + "~~~~~~~~~~~~~~~~~~~~~~~\n", + "Minimum distance from end = 0, Minimum base-calling quality = 0\n", + "Deduplicating....\n", + "\tNumber of edits before filtering:\n", + "\t37\n", + "\tNumber of edits after filtering:\n", + "\t37\n", + "\tNumber of edits after deduplicating:\n", + "\t37\n", + "\tNumber of unique edit sites:\n", + "\t14\n", + "Writing sites...\n", + "\n", + "Adding strand-specific conversion...\n", + "\n", + "14 sites being converted to SAILOR format...\n", + "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", + "2 final deduplicated SAILOR-formatted sites\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.57409MB; Peak: 22.682477MB\n", + "Time elapsed: 5.77s\n", + "++++++\n", + "Done!\n", + "++++++\n", + "F1R2_pair_test-single_end_mode\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/strandedness_tests/F1R2_pair_test-single_end_mode/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running tests...\n", + "Python is /tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/F1R2_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F1R2_pair_test-single_end_mode\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test-single_end_mode\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -199,57 +342,57 @@ "ref bases ['G', 'T']\n", "Successfully ran get_edit_information_wrapper\n", "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "Getting info: A G 34 41\n", "Getting info: G T 12 45\n", "chr17:008, total reads: 2, counts_df: chr17\n", "edited 2\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "Reads processed:\t2\n", - "Time to process reads in min:\t0.03172\n", + "Time to process reads in min:\t0.02926\n", "Summary:\t chr17\n", "edited 2\n", "\n", @@ -267,7 +410,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 9.14it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 9.80it/s]\n" ] }, { @@ -289,7 +432,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 13.16it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 11.70it/s]\n" ] }, { @@ -298,7 +441,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.022 minutes\n", + "Total time to calculate coverage: 0.024 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -324,9 +467,9 @@ "3 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 22.673795MB; Peak: 23.786047MB\n", - "Time elapsed: 4.97s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.384422MB; Peak: 22.496814MB\n", + "Time elapsed: 4.34s\n", "++++++\n", "Done!\n", "++++++\n", @@ -335,9 +478,9 @@ "Running tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/F1R2_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F1R2_pair_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F1R2_pair.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -373,6 +516,12 @@ "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", "\tContig chr17\n", "16 total jobs\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "MD tag 26A20G52\n", "CIGAR tag 1S100M\n", @@ -472,62 +621,56 @@ " aagtttatttgcagtgttaacagcacaacatttacaaaacAtatGttgtacaatcaagtcttcactgcccttgcacactgggggggctagggaagaccta\n", "Indicated qualities:\n", " 34343434343434263434342634343434341234343434263434343434343434343434343426343434343434341234343434343434343434343434263412343434343434343434342634343434343434343434343434343426343434263434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['A', 'G']\n", - "ref bases ['G', 'T']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "alt bases ['A', 'G']\n", + "ref bases ['G', 'T']\n", + "Successfully ran get_edit_information_wrapper\n", + "alt bases: ['A', 'G'], ref bases: ['G', 'T']\n", "Getting info: A G 34 41\n", "Getting info: G T 12 45\n", "chr17:008, total reads: 2, counts_df: chr17\n", "edited 2\n", "Reads processed:\t2\n", - "Time to process reads in min:\t0.02362\n", + "Time to process reads in min:\t0.02389\n", "Summary:\t chr17\n", "edited 2\n", "\n", @@ -545,7 +688,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.29it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 12.10it/s]\n" ] }, { @@ -554,17 +697,17 @@ "text": [ "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43044352~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43044331~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F1R2_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43044356~~~~~~~`\n", "coverage_at_pos 1\n" ] @@ -573,7 +716,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.56it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 12.33it/s]\n" ] }, { @@ -608,20 +751,33 @@ "3 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 22.673267MB; Peak: 23.78545MB\n", - "Time elapsed: 4.35s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.381979MB; Peak: 22.494346MB\n", + "Time elapsed: 3.76s\n", "++++++\n", "Done!\n", "++++++\n", "F2R1_end_second_in_pair_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/strandedness_tests/F2R1_end_second_in_pair_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/F2R1_end_second_in_pair.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F2R1_end_second_in_pair_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/F2R1_end_second_in_pair.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -657,109 +813,47 @@ "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", "\tContig chr17\n", "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "MD tag 10A0G0A84\n", - "CIGAR tag 13M198556N84M1S\n", - "is_reverse False\n", - "is_read1 False\n", - "is_read2 True\n", - "is_paired True\n", - "is_proper_pair True\n", - "mate_is_reverse True\n", - "read id VH01429:22:AACFJ5NHV:1:1112:41030:25422\n", - "VH01429:22:AACFJ5NHV:1:1112:41030:25422\t163\t#16\t43001705\t255\t13M198556N84M1S\t#16\t43200271\t198653\tGGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\tarray('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\t[('MD', '10A0G0A84'), ('PG', 'MarkDuplicates'), ('RG', 'ET_STAMPExpt_1-CTRL_S2'), ('NH', 1), ('HI', 1), ('NM', 3), ('AS', 172), ('XS', '+')]\n", - "reverse_or_forward: +\n", - "----------------------------\n", - "MD tag: 10A0G0A84\n", - "CIGAR string 13M198556N84M1S\n", - "Reference seq: GGCTGTCATGAGAGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Aligned seq: GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", - "Qualities: array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "CIGAR tuples before clipping (if needed):\n", - " [(0, 13), (3, 198556), (0, 84), (4, 1)]\n", - "Aligned sequence before clipping (if needed):\n", - " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGCA\n", - "Qualities before clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "Soft clipping quality scores ...\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "CIGAR tuples after clipping (if needed):\n", - " [(0, 13), (3, 198556), (0, 84)]\n", - "Aligned sequence after clipping (if needed):\n", - " GGCTGTCATGGAGGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Qualities after clipping:\n", - " array('B', [34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 12, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34])\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "['10', '0', '0', '84']\n", - "[10, 11, 12, 97]\n", - "Indicated reference seq:\n", - " ggctgtcatgAGAgaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "Fixed reference seq:\n", - " ggctgtcatgAGA198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "Fixed aligned seq:\n", - " GGCTGTCATGGAG198556*nGAGGAGGAGGATGAGCTCAAAGATGAAGTTCAAAGTCAGTCCTCTGCTTCCTCAGAGGATTACATCATCATCCTGCCTGAGTGC\n", - "Finalized fixed aligned seq:\n", - " ggctgtcatgGAG198556*ngaggaggaggatgagctcaaagatgaagttcaaagtcagtcctctgcttcctcagaggattacatcatcatcctgcctgagtgc\n", - "Indicated qualities:\n", - " 34343434343434343434263434343434343434342634342634343434343434343434343434343434343434343434123434343434343426343434342634343434343434342634343434342634343434123434123434343434343434343434343434\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "alt bases ['G', 'A', 'G']\n", - "ref bases ['A', 'G', 'A']\n", - "Successfully ran get_edit_information_wrapper\n", - "alt bases: ['G', 'A', 'G'], ref bases: ['A', 'G', 'A']\n", - "Getting info: G A 26 11\n", - "Getting info: A G 34 12\n", - "Getting info: G A 34 13\n", - "chr17:008, total reads: 1, counts_df: chr17\n", - "edited 1\n", "Reads processed:\t1\n", - "Time to process reads in min:\t0.02365\n", + "Time to process reads in min:\t0.02508\n", "Summary:\t chr17\n", "edited 1\n", "\n", @@ -777,7 +871,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.55it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 11.56it/s]\n" ] }, { @@ -786,17 +880,17 @@ "text": [ "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43001715~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43001716~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/F2R1_end_second_in_pair_test/split_bams/chr17/chr17_008_41628728_46832319.bam.sorted.bam\n", " chr17:43001717~~~~~~~`\n", "coverage_at_pos 1\n" ] @@ -805,7 +899,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.65it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 11.61it/s]\n" ] }, { @@ -814,7 +908,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.023 minutes\n", + "Total time to calculate coverage: 0.024 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -840,20 +934,33 @@ "3 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "0 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 22.804306MB; Peak: 23.915032MB\n", - "Time elapsed: 4.41s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.514482MB; Peak: 22.624894MB\n", + "Time elapsed: 4.16s\n", "++++++\n", "Done!\n", "++++++\n", "same_pos_dif_reads_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/strandedness_tests/same_pos_dif_reads_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/same_pos_dif_reads.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/same_pos_dif_reads.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/same_pos_dif_reads_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -887,50 +994,17 @@ "~~~~~~~~~~~~~~~~~~\n", "Each contig is being split into 16 subsets...\n", "contig_lengths_dict:{'chr1': 248956422, 'chr2': 242193529, 'chr3': 198295559, 'chr4': 190214555, 'chr5': 181538259, 'chr6': 170805979, 'chr7': 159345973, 'chr8': 145138636, 'chr9': 138394717, 'chr10': 133797422, 'chr11': 135086622, 'chr12': 133275309, 'chr13': 114364328, 'chr14': 107043718, 'chr15': 101991189, 'chr16': 90338345, 'chr17': 83257441, 'chr18': 80373285, 'chr19': 58617616, 'chr20': 64444167, 'chr21': 46709983, 'chr22': 50818468, 'chrX': 156040895, 'chrY': 57227415, 'chrM': 16569, 'chr1_KI270706v1_random': 175055, 'chr1_KI270707v1_random': 32032, 'chr1_KI270708v1_random': 127682, 'chr1_KI270709v1_random': 66860, 'chr1_KI270710v1_random': 40176, 'chr1_KI270711v1_random': 42210, 'chr1_KI270712v1_random': 176043, 'chr1_KI270713v1_random': 40745, 'chr1_KI270714v1_random': 41717, 'chr2_KI270715v1_random': 161471, 'chr2_KI270716v1_random': 153799, 'chr3_GL000221v1_random': 155397, 'chr4_GL000008v2_random': 209709, 'chr5_GL000208v1_random': 92689, 'chr9_KI270717v1_random': 40062, 'chr9_KI270718v1_random': 38054, 'chr9_KI270719v1_random': 176845, 'chr9_KI270720v1_random': 39050, 'chr11_KI270721v1_random': 100316, 'chr14_GL000009v2_random': 201709, 'chr14_GL000225v1_random': 211173, 'chr14_KI270722v1_random': 194050, 'chr14_GL000194v1_random': 191469, 'chr14_KI270723v1_random': 38115, 'chr14_KI270724v1_random': 39555, 'chr14_KI270725v1_random': 172810, 'chr14_KI270726v1_random': 43739, 'chr15_KI270727v1_random': 448248, 'chr16_KI270728v1_random': 1872759, 'chr17_GL000205v2_random': 185591, 'chr17_KI270729v1_random': 280839, 'chr17_KI270730v1_random': 112551, 'chr22_KI270731v1_random': 150754, 'chr22_KI270732v1_random': 41543, 'chr22_KI270733v1_random': 179772, 'chr22_KI270734v1_random': 165050, 'chr22_KI270735v1_random': 42811, 'chr22_KI270736v1_random': 181920, 'chr22_KI270737v1_random': 103838, 'chr22_KI270738v1_random': 99375, 'chr22_KI270739v1_random': 73985, 'chrY_KI270740v1_random': 37240, 'chrUn_KI270302v1': 2274, 'chrUn_KI270304v1': 2165, 'chrUn_KI270303v1': 1942, 'chrUn_KI270305v1': 1472, 'chrUn_KI270322v1': 21476, 'chrUn_KI270320v1': 4416, 'chrUn_KI270310v1': 1201, 'chrUn_KI270316v1': 1444, 'chrUn_KI270315v1': 2276, 'chrUn_KI270312v1': 998, 'chrUn_KI270311v1': 12399, 'chrUn_KI270317v1': 37690, 'chrUn_KI270412v1': 1179, 'chrUn_KI270411v1': 2646, 'chrUn_KI270414v1': 2489, 'chrUn_KI270419v1': 1029, 'chrUn_KI270418v1': 2145, 'chrUn_KI270420v1': 2321, 'chrUn_KI270424v1': 2140, 'chrUn_KI270417v1': 2043, 'chrUn_KI270422v1': 1445, 'chrUn_KI270423v1': 981, 'chrUn_KI270425v1': 1884, 'chrUn_KI270429v1': 1361, 'chrUn_KI270442v1': 392061, 'chrUn_KI270466v1': 1233, 'chrUn_KI270465v1': 1774, 'chrUn_KI270467v1': 3920, 'chrUn_KI270435v1': 92983, 'chrUn_KI270438v1': 112505, 'chrUn_KI270468v1': 4055, 'chrUn_KI270510v1': 2415, 'chrUn_KI270509v1': 2318, 'chrUn_KI270518v1': 2186, 'chrUn_KI270508v1': 1951, 'chrUn_KI270516v1': 1300, 'chrUn_KI270512v1': 22689, 'chrUn_KI270519v1': 138126, 'chrUn_KI270522v1': 5674, 'chrUn_KI270511v1': 8127, 'chrUn_KI270515v1': 6361, 'chrUn_KI270507v1': 5353, 'chrUn_KI270517v1': 3253, 'chrUn_KI270529v1': 1899, 'chrUn_KI270528v1': 2983, 'chrUn_KI270530v1': 2168, 'chrUn_KI270539v1': 993, 'chrUn_KI270538v1': 91309, 'chrUn_KI270544v1': 1202, 'chrUn_KI270548v1': 1599, 'chrUn_KI270583v1': 1400, 'chrUn_KI270587v1': 2969, 'chrUn_KI270580v1': 1553, 'chrUn_KI270581v1': 7046, 'chrUn_KI270579v1': 31033, 'chrUn_KI270589v1': 44474, 'chrUn_KI270590v1': 4685, 'chrUn_KI270584v1': 4513, 'chrUn_KI270582v1': 6504, 'chrUn_KI270588v1': 6158, 'chrUn_KI270593v1': 3041, 'chrUn_KI270591v1': 5796, 'chrUn_KI270330v1': 1652, 'chrUn_KI270329v1': 1040, 'chrUn_KI270334v1': 1368, 'chrUn_KI270333v1': 2699, 'chrUn_KI270335v1': 1048, 'chrUn_KI270338v1': 1428, 'chrUn_KI270340v1': 1428, 'chrUn_KI270336v1': 1026, 'chrUn_KI270337v1': 1121, 'chrUn_KI270363v1': 1803, 'chrUn_KI270364v1': 2855, 'chrUn_KI270362v1': 3530, 'chrUn_KI270366v1': 8320, 'chrUn_KI270378v1': 1048, 'chrUn_KI270379v1': 1045, 'chrUn_KI270389v1': 1298, 'chrUn_KI270390v1': 2387, 'chrUn_KI270387v1': 1537, 'chrUn_KI270395v1': 1143, 'chrUn_KI270396v1': 1880, 'chrUn_KI270388v1': 1216, 'chrUn_KI270394v1': 970, 'chrUn_KI270386v1': 1788, 'chrUn_KI270391v1': 1484, 'chrUn_KI270383v1': 1750, 'chrUn_KI270393v1': 1308, 'chrUn_KI270384v1': 1658, 'chrUn_KI270392v1': 971, 'chrUn_KI270381v1': 1930, 'chrUn_KI270385v1': 990, 'chrUn_KI270382v1': 4215, 'chrUn_KI270376v1': 1136, 'chrUn_KI270374v1': 2656, 'chrUn_KI270372v1': 1650, 'chrUn_KI270373v1': 1451, 'chrUn_KI270375v1': 2378, 'chrUn_KI270371v1': 2805, 'chrUn_KI270448v1': 7992, 'chrUn_KI270521v1': 7642, 'chrUn_GL000195v1': 182896, 'chrUn_GL000219v1': 179198, 'chrUn_GL000220v1': 161802, 'chrUn_GL000224v1': 179693, 'chrUn_KI270741v1': 157432, 'chrUn_GL000226v1': 15008, 'chrUn_GL000213v1': 164239, 'chrUn_KI270743v1': 210658, 'chrUn_KI270744v1': 168472, 'chrUn_KI270745v1': 41891, 'chrUn_KI270746v1': 66486, 'chrUn_KI270747v1': 198735, 'chrUn_KI270748v1': 93321, 'chrUn_KI270749v1': 158759, 'chrUn_KI270750v1': 148850, 'chrUn_KI270751v1': 150742, 'chrUn_KI270752v1': 27745, 'chrUn_KI270753v1': 62944, 'chrUn_KI270754v1': 40191, 'chrUn_KI270755v1': 36723, 'chrUn_KI270756v1': 79590, 'chrUn_KI270757v1': 71251, 'chrUn_GL000214v1': 137718, 'chrUn_KI270742v1': 186739, 'chrUn_GL000216v2': 176608, 'chrUn_GL000218v1': 161147, 'chrEBV': 171823}\n", - "\tContig chr17\n", - "16 total jobs\n", - "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "\tContig chr17\n", + "16 total jobs\n", + "chr17:000, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "MD tag 55C43\n", "CIGAR tag 1S97M17144N2M\n", @@ -1654,9 +1728,6 @@ " TTTATTAATTTTTGCAAGACTTCGGAGGGTGCTGTGTGGCGATTAATATTGCTGAGCTGAGCACAGCTGCAATACTTTCCTGTAAAACTGACTAAAGGAGA\n", "Qualities after clipping:\n", " array('B', [34, 34, 34, 34, 26, 34, 34, 26, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 34, 34, 34, 26, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 34, 34, 12, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 26, 26, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 12, 34, 34, 34, 26])\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "['33', '37', '29']\n", "[33, 71, 101]\n", @@ -1734,8 +1805,44 @@ "chr17:015, total reads: 16, counts_df: chr17\n", "edited 15\n", "no_edits 1\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", "Reads processed:\t16\n", - "Time to process reads in min:\t0.0236\n", + "Time to process reads in min:\t0.02411\n", "Summary:\t chr17\n", "edited 15\n", "no_edits 1\n", @@ -1754,70 +1861,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.45it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199840~~~~~~~`\n", - "coverage_at_pos 9\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199872~~~~~~~`\n", - "coverage_at_pos 9\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199910~~~~~~~`\n", - "coverage_at_pos 7\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199912~~~~~~~`\n", - "coverage_at_pos 7\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199821~~~~~~~`\n", - "coverage_at_pos 5\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83195182~~~~~~~`\n", - "coverage_at_pos 1\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199829~~~~~~~`\n", - "coverage_at_pos 8\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199930~~~~~~~`\n", - "coverage_at_pos 4\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199867~~~~~~~`\n", - "coverage_at_pos 9\n", - "~~~~~~\n", - "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/same_pos_dif_reads_test/split_bams/chr17/chr17_015_78053865_83257456.bam.sorted.bam\n", - " chr17:83199839~~~~~~~`\n", - "coverage_at_pos 9\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 16/16 [00:01<00:00, 12.35it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 11.96it/s]\n", + "100%|██████████| 16/16 [00:01<00:00, 11.99it/s]\n" ] }, { @@ -1826,7 +1871,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.023 minutes\n", + "Total time to calculate coverage: 0.024 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -1852,20 +1897,33 @@ "10 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "2 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 22.836565MB; Peak: 23.946003MB\n", - "Time elapsed: 3.88s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.544627MB; Peak: 22.65451MB\n", + "Time elapsed: 3.80s\n", "++++++\n", "Done!\n", "++++++\n", "tax1bp3_chr17_3665556_read_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/tax1bp3_chr17_3665556_read.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/tax1bp3_chr17_3665556_read.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -1904,15 +1962,24 @@ "chr17:001, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "chr17:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", + "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "MD tag 2C1C6G3A1G4G47\n", "CIGAR tag 28S70M\n", @@ -1972,41 +2039,32 @@ "Getting info: T G 12 23\n", "chr17:000, total reads: 1, counts_df: chr17\n", "edited 1\n", - "chr17:004, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "chr17:010, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:007, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:003, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "chr17:014, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:012, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "chr17:011, total reads: 0, counts_df: Empty DataFrame\n", + "chr17:008, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "chr17:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "Reads processed:\t1\n", - "Time to process reads in min:\t0.02219\n", + "Time to process reads in min:\t0.02257\n", "Summary:\tEmpty DataFrame\n", "Columns: []\n", "Index: []\n", @@ -2025,7 +2083,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 13.31it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 12.81it/s]\n" ] }, { @@ -2034,32 +2092,32 @@ "text": [ "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665541~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665543~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665550~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665554~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665556~~~~~~~`\n", "coverage_at_pos 1\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/tax1bp3_chr17_3665556_read_test/split_bams/chr17/chr17_000_0_5203591.bam.sorted.bam\n", " chr17:3665561~~~~~~~`\n", "coverage_at_pos 1\n" ] @@ -2068,7 +2126,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 8.68it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 12.41it/s]\n" ] }, { @@ -2077,7 +2135,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.032 minutes\n", + "Total time to calculate coverage: 0.023 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -2103,20 +2161,33 @@ "6 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "1 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/hg38_gencode.v35.annotation.genes.bed...\n", - "Current memory usage 22.731296MB; Peak: 23.841517MB\n", - "Time elapsed: 4.71s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/hg38_gencode.v35.annotation.genes.bed...\n", + "Current memory usage 21.445789MB; Peak: 22.556501MB\n", + "Time elapsed: 3.69s\n", "++++++\n", "Done!\n", "++++++\n", "pair_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/strandedness_tests/pair_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running tests...\n", "Assuming 1 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/bams/pair_example_18_49488551_49590000.sorted.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/pair_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/bams/pair_example_18_49488551_49590000.sorted.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tNone\n", @@ -2469,7 +2540,7 @@ "no_edits 2\n", "secondary 2\n", "Reads processed:\t10\n", - "Time to process reads in min:\t0.02057\n", + "Time to process reads in min:\t0.03715\n", "Summary:\t 18\n", "edited 6\n", "no_edits 2\n", @@ -2488,7 +2559,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.19s/it]\n" + "100%|██████████| 1/1 [00:02<00:00, 2.19s/it]\n" ] }, { @@ -2497,12 +2568,12 @@ "text": [ "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/pair_test/split_bams/18/18_000_0_80373285.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test/split_bams/18/18_000_0_80373285.bam.sorted.bam\n", " 18:49491556~~~~~~~`\n", "coverage_at_pos 2\n", "~~~~~~\n", "!!!!PAIRED END!!!!!\n", - "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2/tests/strandedness_tests/pair_test/split_bams/18/18_000_0_80373285.bam.sorted.bam\n", + "pos: /tscc/projects/ps-yeolab3/ekofman/sailor2//tests/strandedness_tests/pair_test/split_bams/18/18_000_0_80373285.bam.sorted.bam\n", " 18:49567494~~~~~~~`\n", "coverage_at_pos 2\n" ] @@ -2511,7 +2582,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.80s/it]\n" + "100%|██████████| 1/1 [00:02<00:00, 2.01s/it]\n" ] }, { @@ -2520,7 +2591,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.03 minutes\n", + "Total time to calculate coverage: 0.034 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -2546,9 +2617,9 @@ "2 sites being converted to SAILOR format...\n", "0 rows had coverage of 0 or more edits than coverage... filtering these out, but look into them...\n", "2 final deduplicated SAILOR-formatted sites\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed...\n", - "Current memory usage 22.73438MB; Peak: 23.845373MB\n", - "Time elapsed: 4.42s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.445743MB; Peak: 22.555779MB\n", + "Time elapsed: 5.35s\n", "++++++\n", "Done!\n", "++++++\n" @@ -2565,7 +2636,11 @@ "pwd\n", "\n", "tests_folder=\"tests/strandedness_tests/\"\n", - "for t in \"F1R2_pair_test-single_end_mode\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", + "\n", + "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", + "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", + "\n", + "for t in \"unstranded_pair_test\" \"F1R2_pair_test-single_end_mode\" \"F1R2_pair_test\" \"F2R1_end_second_in_pair_test\" \"same_pos_dif_reads_test\" \"tax1bp3_chr17_3665556_read_test\" \"pair_test\"\n", "\n", "do\n", " echo $t\n", @@ -2573,7 +2648,7 @@ " rm $tests_folder$t/* -r\n", "\n", " echo \"Running tests...\"\n", - " bash tests/strandedness_tests/scripts/$t.sh \n", + " bash tests/strandedness_tests/scripts/$t.sh $mypython\n", " \n", "done\n", "\n" @@ -2581,7 +2656,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 28, "id": "57192b28-b498-4f81-bdcd-0d4d14f644ff", "metadata": { "scrolled": true @@ -2595,13 +2670,26 @@ "Running\n", "/tscc/projects/ps-yeolab3/ekofman/sailor2\n", "only_5_cells_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/singlecell_tests/only_5_cells_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running old tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/singlecell_tests/bams/9_3000526_only_5_cells.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/singlecell_tests/only_5_cells_test\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/bams/9_3000526_only_5_cells.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/only_5_cells_test\n", "\tBarcode whitelist:\tNone\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tCB\n", @@ -4285,53 +4373,47 @@ " CCATATTCTACGTCCTACAGTGGACATTTTTAAATTTTCCACCTTTTTCAGTTTTCTTCGCCACATTTCACGTCCTAAAGTGTGTATTTCTCATTTTCCGT\n", "Qualities before clipping:\n", " array('B', [37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 11, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 25, 37, 11, 37, 37, 37, 37, 37, 25, 37, 37, 37, 25, 37, 37, 37, 25, 37, 37, 37, 37, 37, 37])\n", - "9:014, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", "9:006, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:009, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "9:003, total reads: 0, counts_df: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "9:013, total reads: 0, counts_df: Empty DataFrame\n", + "9:001, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:010, total reads: 0, counts_df: Empty DataFrame\n", + "9:004, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:012, total reads: 0, counts_df: Empty DataFrame\n", + "9:005, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:001, total reads: 0, counts_df: Empty DataFrame\n", + "9:002, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "9:007, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:015, total reads: 0, counts_df: Empty DataFrame\n", + "9:010, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:011, total reads: 0, counts_df: Empty DataFrame\n", + "9:009, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "9:008, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:005, total reads: 0, counts_df: Empty DataFrame\n", + "9:015, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:002, total reads: 0, counts_df: Empty DataFrame\n", + "9:013, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", - "9:004, total reads: 0, counts_df: Empty DataFrame\n", + "9:014, total reads: 0, counts_df: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "9:011, total reads: 0, counts_df: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "Reads processed:\t31\n", - "Time to process reads in min:\t0.03322\n", + "Time to process reads in min:\t0.02746\n", "Summary:\t 9\n", "edited 31\n", "\n", @@ -4349,20 +4431,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 16/16 [00:01<00:00, 8.77it/s]\n" + "100%|██████████| 16/16 [00:01<00:00, 10.43it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\t9: num subcontigs to concat: 1\n", - "\t9: concatting\n", - "\t4 suffixes\n", - "\tcontig: 9 suffix: C-1, all_contents_df length: 5, all_contents_for_suffix length: 3\n", - "\tcontig: 9 suffix: G-1, all_contents_df length: 5, all_contents_for_suffix length: 1\n", - "\tcontig: 9 suffix: T-1, all_contents_df length: 5, all_contents_for_suffix length: 1\n", - "Total time to concat and write bams: 0.029 minutes\n", + "Total time to concat and write bams: 0.026 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Calculating coverage at edited sites, minimum read quality is 0...\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -4377,13 +4453,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.60s/it]\n" + "100%|██████████| 1/1 [00:01<00:00, 1.47s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", + "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", "barcode_specific_contig_without_subdivision: 9_GGGACCTTCGAGCCAC-1\n", "barcode_specific_contig_without_subdivision: 9_GGGACCTTCGAGCCAC-1\n", "barcode_specific_contig_without_subdivision: 9_GGGACCTTCGAGCCAC-1\n", @@ -4430,15 +4515,6 @@ "barcode_specific_contig_without_subdivision: 9_TACCTGCAGACTTGTC-1\n", "barcode_specific_contig_without_subdivision: 9_TACCTGCAGACTTGTC-1\n", "barcode_specific_contig_without_subdivision: 9_TACCTGCAGACTTGTC-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", - "barcode_specific_contig_without_subdivision: 9_GATCCCTCAGTAACGG-1\n", "barcode_specific_contig_without_subdivision: 9_GCTCAAACAAGGTCTT-1\n", "barcode_specific_contig_without_subdivision: 9_GCTCAAACAAGGTCTT-1\n", "barcode_specific_contig_without_subdivision: 9_GCTCAAACAAGGTCTT-1\n", @@ -4456,7 +4532,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 4/4 [00:01<00:00, 3.21it/s]\n" + "100%|██████████| 4/4 [00:01<00:00, 3.03it/s]\n" ] }, { @@ -4488,21 +4564,34 @@ "\n", "Adding strand-specific conversion...\n", "\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", - "Current memory usage 23.084938MB; Peak: 24.192066MB\n", - "Time elapsed: 6.83s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.795658MB; Peak: 22.904126MB\n", + "Time elapsed: 5.62s\n", "++++++\n", "Done!\n", "++++++\n", "long_read_sc_test\n", - "Removing old files...\n", + "Removing old files...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rm: cannot remove 'tests/singlecell_tests/long_read_sc_test/*': No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Running old tests...\n", "Assuming 16 cores available for multiprocessing. Set this to the number of available cores for optimal execution.\n", "Arguments:\n", - "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/examples/data/LR_single_cell.md.subset.filtered.sorted.bam\n", - "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", - "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/tests/singlecell_tests/long_read_sc_test\n", - "\tBarcode whitelist:\t/tscc/projects/ps-yeolab3/ekofman/sailor2/examples/data/sc_lr_barcodes.tsv.gz\n", + "\tBAM filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/LR_single_cell.md.subset.filtered.sorted.bam\n", + "\tAnnotation bedfile filepath:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed\n", + "\tOutput folder:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//tests/singlecell_tests/long_read_sc_test\n", + "\tBarcode whitelist:\t/tscc/projects/ps-yeolab3/ekofman/sailor2//examples/data/sc_lr_barcodes.tsv.gz\n", "\tStrandedness:\t2\n", "\tBarcode Tag:\tIB\n", "\tPaired End:\tFalse\n", @@ -4538,7 +4627,7 @@ "\tContig 6\n", "4 total jobs\n", "Reads processed:\t13\n", - "Time to process reads in min:\t0.0234\n", + "Time to process reads in min:\t0.0232\n", "Summary:\t 6\n", "edited 3\n", "no_edits 10\n", @@ -4556,14 +4645,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 4/4 [00:01<00:00, 3.11it/s]\n" + "100%|██████████| 4/4 [00:01<00:00, 3.07it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Total time to concat and write bams: 0.307 minutes\n", + "Total time to concat and write bams: 0.276 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Calculating coverage at edited sites, minimum read quality is 0...\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -4576,8 +4665,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:18<00:00, 18.29s/it]\n", - "100%|██████████| 400/400 [00:03<00:00, 126.80it/s]\n" + "100%|██████████| 1/1 [00:16<00:00, 16.48s/it]\n", + "100%|██████████| 400/400 [00:02<00:00, 138.86it/s]\n" ] }, { @@ -4586,7 +4675,7 @@ "text": [ "Concatenating results...\n", "Done concatenating.\n", - "Total time to calculate coverage: 0.055 minutes\n", + "Total time to calculate coverage: 0.05 minutes\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "Filtering and calculating site-level statistics\n", "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", @@ -4609,9 +4698,9 @@ "\n", "Adding strand-specific conversion...\n", "\n", - "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2/annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", - "Current memory usage 22.73849MB; Peak: 23.850397MB\n", - "Time elapsed: 26.68s\n", + "Annotating sites with GTF information from /tscc/projects/ps-yeolab3/ekofman/sailor2//annotations/cellranger-mm10-3.0.0.annotation.genes.bed...\n", + "Current memory usage 21.444407MB; Peak: 22.556205MB\n", + "Time elapsed: 24.32s\n", "++++++\n", "Done!\n", "++++++\n" @@ -4627,6 +4716,9 @@ "cd ..\n", "pwd\n", "\n", + "export MARINE=/tscc/projects/ps-yeolab3/ekofman/sailor2/\n", + "export mypython=/tscc/nfs/home/ekofman/miniconda3/envs/marine_environment/bin/python\n", + "\n", "tests_folder=\"tests/singlecell_tests/\"\n", "for t in \"only_5_cells_test\" \"long_read_sc_test\"\n", "\n", @@ -4636,7 +4728,7 @@ " rm $tests_folder$t/* -r\n", "\n", " echo \"Running old tests...\"\n", - " bash tests/singlecell_tests/scripts/$t.sh \n", + " bash tests/singlecell_tests/scripts/$t.sh $mypython\n", " \n", "done\n" ] @@ -4651,7 +4743,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 31, "id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387", "metadata": {}, "outputs": [ @@ -4737,10 +4829,60 @@ "\n", "There were 0 failures\n" ] + }, + { + "ename": "NameError", + "evalue": "name 'sys' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[31], line 234\u001b[0m\n\u001b[1;32m 232\u001b[0m sys\u001b[38;5;241m.\u001b[39mexit(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 234\u001b[0m \u001b[43msys\u001b[49m\u001b[38;5;241m.\u001b[39mexit(\u001b[38;5;241m0\u001b[39m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'sys' is not defined" + ] } ], "source": [ "test_name_to_expectations = {\n", + " \"unstranded_pair_test\": {\n", + " \"folder\": \"strandedness_tests\",\n", + " \"expectations\": [{\n", + " \"contig\": \"Citrine.dna\",\n", + " \"position\": 435,\n", + " \"count\": 22,\n", + " \"coverage\": 22,\n", + " \"num_rows\": 1,\n", + " \"strand_conversion\": \"C>T\",\n", + " \"strand\": \"+\"\n", + " }]\n", + " },\n", + "\n", + " \"pair_test\": {\n", + " \"folder\": \"strandedness_tests\",\n", + " \"expectations\": [{\n", + " \"contig\": \"18\",\n", + " \"position\": 49491556,\n", + " \"count\": 2,\n", + " \"coverage\": 2,\n", + " \"num_rows\": 1,\n", + " \"strand_conversion\": \"C>T\",\n", + " \"strand\": \"-\",\n", + " \"feature_name\": \"RPL17-C18orf32,RPL17\",\n", + " \"feature_strand\": \"-,-\"\n", + " },\n", + " {\n", + " \"contig\": \"18\",\n", + " \"position\": 49567494,\n", + " \"count\": 2,\n", + " \"coverage\": 2,\n", + " \"num_rows\": 1,\n", + " \"strand_conversion\": \"C>T\",\n", + " \"strand\": \"+\",\n", + " \"feature_name\": \"LIPG\",\n", + " \"feature_strand\": \"+\"\n", + " }]\n", + " },\n", + " \n", " \"pair_test\": {\n", " \"folder\": \"strandedness_tests\",\n", " \"expectations\": [{\n", @@ -4999,9 +5141,9 @@ ], "metadata": { "kernelspec": { - "display_name": "workhorse", + "display_name": "marine_environment", "language": "python", - "name": "workhorse" + "name": "marine_environment" }, "language_info": { "codemirror_mode": { diff --git a/tests/integration_tests_auto_check.py b/tests/integration_tests_auto_check.py index f193844..8c49e6a 100755 --- a/tests/integration_tests_auto_check.py +++ b/tests/integration_tests_auto_check.py @@ -2,6 +2,18 @@ import sys test_name_to_expectations = { + "unstranded_pair_test": { + "folder": "strandedness_tests", + "expectations": [{ + "contig": "Citrine.dna", + "position": 435, + "count": 22, + "coverage": 22, + "num_rows": 1, + "strand_conversion": "C>T", + "strand": "+" + }] + }, "pair_test": { "folder": "strandedness_tests", "expectations": [{ diff --git a/tests/integration_tests_run.sh b/tests/integration_tests_run.sh index 07547cf..187b020 100755 --- a/tests/integration_tests_run.sh +++ b/tests/integration_tests_run.sh @@ -13,8 +13,8 @@ tests_folder="strandedness_tests/" echo "Bulk tests scripts" ls -lh $MARINE/tests/$tests_folder/scripts/ -for t in "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" - +#for t in "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" +for t in "F1R2_pair_test-single_end_mode" do echo $t echo "Removing old files..." @@ -35,17 +35,17 @@ echo "SC tests scripts" ls -lh $MARINE/tests/$tests_folder/scripts/ -for t in "only_5_cells_test" "long_read_sc_test" - -do - echo $t - echo "Removing old files..." - rm $MARINE/tests/$tests_folder$t/* -r - - echo "Running tests..." - bash $MARINE/tests/$tests_folder/scripts/$t.sh $mypython - -done +#for t in "only_5_cells_test" "long_read_sc_test" +# +#do +# echo $t +# echo "Removing old files..." +# rm $MARINE/tests/$tests_folder$t/* -r +# +# echo "Running tests..." +# bash $MARINE/tests/$tests_folder/scripts/$t.sh $mypython +# +#done echo "Checking results..." diff --git a/tests/strandedness_tests/README.txt b/tests/strandedness_tests/README.txt index d23ca61..a543aed 100644 --- a/tests/strandedness_tests/README.txt +++ b/tests/strandedness_tests/README.txt @@ -5,6 +5,9 @@ and the read should only be counted once if MARINE is run in --paired_end mode. So at chr17 43044352 we should see 1 edit and 1 read depth coverage, as a G>A. If --paired_end mode is not used, the number of edits will not be double-counted but the coverage will be, so coverage at this position will be 2. +First reads in pairs align to the forward strand. +Should be on the BRCA1 gene. + F2R1_end.bam (edits at chr 17 43001715, 43001716, 43001717) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Paired ends, but F2R1 so that the second read in the pair is actually mapped to the forward strand. This second read @@ -18,8 +21,7 @@ unique C>G edits with 9 total coverage, because many of these are pairs. tax1bp3_chr17_3665556.bam and tax1bp3_chr17_3665556_read.bam (edits at chr17 3665556) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Should be one G>A edit on one read at this position. But since it is a negative stranded gene, TAX1BP3, ultimate -conversion would be C>T. +Should be one G>A edit on one read at this position. pair_test (pair_example_18_49488551_49590000.sorted.bam) (edits in RPL17 (-) at 49491556, edits in LIPG (+) at 49567494) @@ -33,3 +35,8 @@ In LIPG we have 3 reads but just two different pairs. One pair has both ends ove 2 reads with the edit that are both actually just different ends of 1 read. The other pair has just one end at this location, so it contributes 1 read. So there are 2 edited uniqe pairs and a coverage of 2 for LIPG at position 18:49567494. + +citrine435 (position 435) +~~~~~~~~~~~~~~~~~~~~~~~~~ +We have a C>T edit at position 435 across all reads in this paired-end example. However, the forward mapping reads are sometimes +read1 and sometimes read2, and likewise for the reverse mapping reads (unstranded). \ No newline at end of file diff --git a/tests/strandedness_tests/bams/citrine435.bam b/tests/strandedness_tests/bams/citrine435.bam new file mode 100644 index 0000000000000000000000000000000000000000..d80a72477caa446a11668dd8061e5e1b409801af GIT binary patch literal 2562 zcmV+d3jOsTiwFb&00000{{{d;LjnM10@an>a+*LC$4PHGWcm>21!O^qBwPittxg54 z*razmD|?LF!a{b}H0FK#9vxy#6QjM%q*t8b>~j9+>qjq2jz;e->wM6+FVfI+d^;P5 zc~vPcBr|w?o_)2mG>nB&LZZVZvqxzd_}=mP%06IzR+G65aTD3iQVNW4vlk<&RV z^*-@i9a)r^b&b%A5&+05WsvvQ@#Anmk(@;dGe(K|%AkgH004s_07Mx8$mV*|e|~%= zOIFv&;Zm5TkN~jV2+c}URfTT0;;%h<+z`+4PMiJM`rt<3FKD%v$T+IkFITkjJNjvX zoN8s9IV*RQa=hKN9__=a&Ea7a{mzM_n~%n0d!kQl-~V!R?AcF^=+u;Q)A z+9AauNW2X`yr9D#HXPdX{kRwm8&t>ylK%0LWmz9A%X;$9w0^&{tUp+E?h0%N001A0 z2m}BC000301^_}s0swvot(i-x+*BCHGb4hFfK7;amXkxE2SrFu-Vx>I0ZpmZMN1bh zN?W($(uH<$C&V&{h|tBgg$mkTp*~t)8{HHaige>+?NXNox8Cb_@=oR?bLaM+-kHhF z?QnnR&;NUzFJB7flHqak;IupwZVP7h+1^YLzK6$!R5vW@2d)`}VSLtpiL@O7VlD1u3X&a&%N|y;1tLM(H*ZZvgVuNrO0n?SbF{wH&8(^%|;*ms%u6 z`H2EotSln7*z6gh>Y~3DzS1@FRgO8X0q#o^$hHm|Q_J&!-qy_$nS_{x0G{P4mJ3*{ z<(MQ3)GUJfWQcSegE}vqKm_D(D!dOHbKLGBKR=FaSb%I8)G@)@zH14;!*?#t0Xxa1 zWR;RgfnZb>5@B$89$|SwM6r}wEcIq@ihqr1s{c^F6=8L-PXYE*fbGO)tM#WGt(iTZ zn*cXhAiBD%2R7hah1Lxu&e9OeBufZ{Vvf=s@v)K~#~$}Nj%I=Q>_qM`bmnE#(3#eL zX^$akw9>FfWUO`4op=P~E9=NVj|AZpQ$c7LENf6FFkNbt>8{Tyt7pkww0s~bZrvu5MkxG$)sTdd8nz|fZMmA)wXG#o;LPO~tH zLKTWhF5xoAk*(IWFOGt2gS?LK_}~lbx$aiThoa{r*ZtAUv-J|lkl)*Y?7}nak9_Jj z{SlhRW{Fu`?<)EWaj-f4(VH`V1YR7sGo?@f9n*85OLELQ0yQ-YBdFXFg5;(uAvC0_ z49)3I*%$S&T5NuS;WryUb{dNk`CNm{8jwHs4-O97Y%t`OQqGb{mZCgNQfVcJ3$a*5 zmOxE=!9HO*gRsq%zpQ@P_Bf3O)nj_fu+QruT*PKb8?UrGxZdo5b$t)UcfjrNx;oc8 zkYNYXqJS)PE?-eWEEaEyEwJsejTP^U?8p(_^v_$dBi!p0lOZ-MI!_;r3(qYu1Lm|~ zRwH$>F$TLKTN;ZKm8KUPRQR?Ngk@m1)a?9qWlU_&S&IcF3(gU4Ro)#?qCPI_EU|v( z1g2@laP+@7AnzKTWyW{L_2wq-QR*FXnAMg{W)z;@K&}nrp%WWL2B(eedz?hb<|A+Kk8fZ&3t_U4h`wCzPRNUz zSJYb^i&HnICWpGUIIBqiOe8|y!EqDyv{}lT1CRPjkNWEgty$CZ*|=G_4ax|0*l3OF z)}*M4ZSk)r4t44l2h`12(v1!8yw!|T_nLD;t2P_4rW0y&M!-k1NMCQR*d6SSMLy=_ zlbaA})@=Hy0H9 zQlB8(FJ3YMYI{;|M!mm*x;xc5CcXRGkLq2g!8=tt>3W=vB3_)RnS-%I4L*!XHIFoi zRYls}4w{Tq9WRDI_Fad2cr4z7jIx-RR+`KwU~Py{&o;g0Sga_^M?Ixcf;t7RLMY#bkHI0-X3%MvvY;jUgP zA^kigYU|?VZVoxCk1`wNpAsxg8f@ho{(rXe#9$j1qB#8u2^+F^7LZ<~$ literal 0 HcmV?d00001 diff --git a/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh b/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh index 1aae077..d743985 100644 --- a/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh +++ b/tests/strandedness_tests/scripts/F1R2_pair_test-single_end_mode.sh @@ -1,4 +1,5 @@ mypython=$1 +echo "Python is $mypython" $mypython $MARINE/marine.py \ --bam_filepath \ diff --git a/tests/strandedness_tests/scripts/unstranded_pair_test.sh b/tests/strandedness_tests/scripts/unstranded_pair_test.sh new file mode 100644 index 0000000..0bb2d2b --- /dev/null +++ b/tests/strandedness_tests/scripts/unstranded_pair_test.sh @@ -0,0 +1,19 @@ +mypython=$1 + +$mypython $MARINE/marine.py \ +--bam_filepath \ +$MARINE/tests/strandedness_tests/bams/citrine435.bam \ +--annotation_bedfile_path \ +$MARINE/annotations/hg38_gencode.v35.annotation.genes.bed \ +--output_folder \ +$MARINE/tests/strandedness_tests/unstranded_pair_test \ +--min_dist_from_end \ +0 \ +--min_base_quality \ +0 \ +--cores \ +1 \ +--paired_end \ +--strandedness 0 \ +--sailor \ +--num_intervals_per_contig 1 \ No newline at end of file diff --git a/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv b/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv new file mode 100644 index 0000000..bcfac7e --- /dev/null +++ b/tests/strandedness_tests/unstranded_pair_test/final_filtered_site_info.tsv @@ -0,0 +1,15 @@ +site_id barcode contig position ref alt strand count coverage conversion strand_conversion +no_barcode_Citrine.dna_437_A_G_+ no_barcode Citrine.dna 437 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_431_A_G_+ no_barcode Citrine.dna 431 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_454_T_G_+ no_barcode Citrine.dna 454 T G + 1 14 T>G T>G +no_barcode_Citrine.dna_439_A_G_+ no_barcode Citrine.dna 439 A G + 1 22 A>G A>G +no_barcode_Citrine.dna_435_C_T_+ no_barcode Citrine.dna 435 C T + 22 22 C>T C>T +no_barcode_Citrine.dna_428_A_G_+ no_barcode Citrine.dna 428 A G + 1 20 A>G A>G +no_barcode_Citrine.dna_414_G_C_+ no_barcode Citrine.dna 414 G C + 1 18 G>C G>C +no_barcode_Citrine.dna_21_G_A_+ no_barcode Citrine.dna 21 G A + 2 2 G>A G>A +no_barcode_Citrine.dna_441_C_A_+ no_barcode Citrine.dna 441 C A + 1 22 C>A C>A +no_barcode_Citrine.dna_438_C_A_+ no_barcode Citrine.dna 438 C A + 1 22 C>A C>A +no_barcode_Citrine.dna_149_C_G_+ no_barcode Citrine.dna 149 C G + 1 2 C>G C>G +no_barcode_Citrine.dna_411_C_A_+ no_barcode Citrine.dna 411 C A + 1 18 C>A C>A +no_barcode_Citrine.dna_432_C_T_+ no_barcode Citrine.dna 432 C T + 1 22 C>T C>T +no_barcode_Citrine.dna_430_T_G_+ no_barcode Citrine.dna 430 T G + 2 22 T>G T>G From 18804241a15401d19ae14cb79a99c7e2790e6600 Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Thu, 11 Jul 2024 15:52:06 -0700 Subject: [PATCH 2/3] test runner update --- tests/integration_tests_run.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration_tests_run.sh b/tests/integration_tests_run.sh index 187b020..e0fe017 100755 --- a/tests/integration_tests_run.sh +++ b/tests/integration_tests_run.sh @@ -13,8 +13,7 @@ tests_folder="strandedness_tests/" echo "Bulk tests scripts" ls -lh $MARINE/tests/$tests_folder/scripts/ -#for t in "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" -for t in "F1R2_pair_test-single_end_mode" +for t in "F1R2_pair_test-single_end_mode" "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test" "pair_test" "unstranded_pair_test" do echo $t echo "Removing old files..." From cfed91ee4f0c9d38375efac9fd7bd47b1e4ca724 Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Thu, 11 Jul 2024 16:17:53 -0700 Subject: [PATCH 3/3] fix tests --- tests/integration_tests_run.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration_tests_run.sh b/tests/integration_tests_run.sh index e0fe017..93bad69 100755 --- a/tests/integration_tests_run.sh +++ b/tests/integration_tests_run.sh @@ -34,17 +34,17 @@ echo "SC tests scripts" ls -lh $MARINE/tests/$tests_folder/scripts/ -#for t in "only_5_cells_test" "long_read_sc_test" -# -#do -# echo $t -# echo "Removing old files..." -# rm $MARINE/tests/$tests_folder$t/* -r -# -# echo "Running tests..." -# bash $MARINE/tests/$tests_folder/scripts/$t.sh $mypython -# -#done +for t in "only_5_cells_test" "long_read_sc_test" + +do + echo $t + echo "Removing old files..." + rm $MARINE/tests/$tests_folder$t/* -r + + echo "Running tests..." + bash $MARINE/tests/$tests_folder/scripts/$t.sh $mypython + +done echo "Checking results..."