Skip to content

Commit

Permalink
dist filter test
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric Kofman committed Nov 21, 2024
1 parent a99dfe4 commit 32e4688
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/read_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ def get_read_information(read, contig, barcode_tag='CB', verbose=False, stranded
distance_from_read_end = np.min([updated_position - reference_start, reference_end - updated_position])

if distance_from_read_end < dist_from_end:
return 'read_end_proximal'.format(dist_from_end), [], {}
continue

if int(qual) < min_base_quality:
return 'baseq_low'.format(min_base_quality), [], {}
continue

# If we have been provided with a barcode CB (single-cell), we need to preset our contigs to match
# the contigs that will be present in the reconfigured bams, ie. 9_GATCCCTCAGTAACGG-1 instead of 9.
Expand Down
2 changes: 1 addition & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections import OrderedDict, defaultdict
from itertools import product

cb_n = 3
cb_n = 2

def generate_permutations_list_for_CB(n):
"""
Expand Down
63 changes: 51 additions & 12 deletions tests/integration_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2889,7 +2889,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 16,
"id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387",
"metadata": {},
"outputs": [
Expand All @@ -2900,6 +2900,9 @@
"\n",
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
"Checking results for edge_case_test\n",
"\n",
" >>> Found 2 edits total, as expected <<<\n",
"\n",
"\tExpecting: {'contig': '10', 'barcode': 'AAACGAATCATTCATC-1', 'position': 132330481, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'T>G', 'strand_conversion': 'A>C', 'strand': '-', 'feature_name': 'STK32C', 'feature_strand': '-'}\n",
"\n",
"\t >>> edge_case_test passed! <<<\n",
Expand All @@ -2910,6 +2913,16 @@
"\n",
"\n",
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
"Checking results for edge_case_filter_dist_test\n",
"\n",
" >>> Found 1 edits total, as expected <<<\n",
"\n",
"\tExpecting: {'contig': '10', 'barcode': 'AAACGAATCATTCATC-1', 'position': 132330438, 'num_rows': 1, 'count': 1, 'coverage': 1, 'conversion': 'T>C', 'strand_conversion': 'A>G', 'strand': '-', 'feature_name': 'STK32C', 'feature_strand': '-'}\n",
"\n",
"\t >>> edge_case_filter_dist_test passed! <<<\n",
"\n",
"\n",
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
"Checking results for unstranded_pair_test\n",
"\tExpecting: {'contig': 'Citrine.dna', 'position': 435, 'count': 22, 'coverage': 22, 'num_rows': 1, 'strand_conversion': 'C>T', 'strand': '+'}\n",
"\n",
Expand Down Expand Up @@ -3012,6 +3025,7 @@
"test_name_to_expectations = {\n",
" \"edge_case_test\": {\n",
" \"folder\": \"singlecell_tests\",\n",
" \"total_edit_sites\": 2,\n",
" \"expectations\": [{\n",
" \"contig\": \"10\",\n",
" \"barcode\": \t\"AAACGAATCATTCATC-1\",\n",
Expand All @@ -3037,8 +3051,24 @@
" \"strand\": \"-\",\n",
" \"feature_name\": \"STK32C\",\n",
" \"feature_strand\": \"-\"\n",
" }\n",
" ]\n",
" }]\n",
" },\n",
" \"edge_case_filter_dist_test\": {\n",
" \"folder\": \"singlecell_tests\",\n",
" \"total_edit_sites\": 1,\n",
" \"expectations\": [{\n",
" \"contig\": \"10\",\n",
" \"barcode\": \t\"AAACGAATCATTCATC-1\",\n",
" \"position\": 132330438,\n",
" \"num_rows\": 1,\n",
" \"count\": 1,\n",
" \"coverage\": 1,\n",
" \"conversion\": \"T>C\",\n",
" \"strand_conversion\": \"A>G\",\n",
" \"strand\": \"-\",\n",
" \"feature_name\": \"STK32C\",\n",
" \"feature_strand\": \"-\"\n",
" }]\n",
" },\n",
" \"unstranded_pair_test\": {\n",
" \"folder\": \"strandedness_tests\",\n",
Expand Down Expand Up @@ -3251,32 +3281,41 @@
"failures = 0\n",
"for test_name, info in test_name_to_expectations.items():\n",
" print(\"\\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\\nChecking results for {}\".format(test_name))\n",
" \n",
"\n",
" failure = False\n",
"\n",
" folder = info.get(\"folder\")\n",
" final_filtered_site_info_annotated = \"{}/{}/final_filtered_site_info_annotated.tsv\".format(folder, test_name)\n",
" final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\\t', index_col=0)\n",
"\n",
" if \"total_edit_sites\" in info:\n",
" try:\n",
" assert(len(final_filtered_site_info_annotated_df) == info.get(\"total_edit_sites\"))\n",
" print(\"\\n >>> Found {} edits total, as expected <<<\\n\".format(info.get(\"total_edit_sites\")))\n",
" except Exception as e:\n",
" print(\"Exception:\\n\\tNum total rows in final_filtered_site_info_annotated.tsv expected: {}, was {}\".format(info.get(\"total_edit_sites\"), len(final_filtered_site_info_annotated_df)))\n",
" failure = True \n",
" failures += 1\n",
" continue \n",
" \n",
" expectations_list = info.get(\"expectations\")\n",
" for expectations in expectations_list:\n",
" failure = False\n",
" print(\"\\tExpecting: {}\".format(expectations))\n",
" \n",
" \n",
" folder = info.get(\"folder\")\n",
" \n",
" contig = expectations.get(\"contig\")\n",
" barcode = expectations.get(\"barcode\", None)\n",
" \n",
" position = expectations.get(\"position\")\n",
" \n",
" final_filtered_site_info_annotated = \"{}/{}/final_filtered_site_info_annotated.tsv\".format(folder, test_name)\n",
" final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\\t', index_col=0)\n",
" \n",
" row_of_interest = final_filtered_site_info_annotated_df[\n",
" (final_filtered_site_info_annotated_df['position'] == position) &\\\n",
" (final_filtered_site_info_annotated_df['contig'].astype(str) == contig)\n",
" ]\n",
" \n",
" \n",
" if barcode:\n",
" row_of_interest = row_of_interest[row_of_interest.barcode == barcode]\n",
" \n",
" failure = False\n",
" try:\n",
" assert(len(row_of_interest) == expectations.get(\"num_rows\", 1))\n",
" except Exception as e:\n",
Expand Down
54 changes: 41 additions & 13 deletions tests/integration_tests_auto_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@


test_name_to_expectations = {
"edge_case_test": {
"edge_case_test": {
"folder": "singlecell_tests",
"total_edit_sites": 2,
"expectations": [{
"contig": "10",
"barcode": "AAACGAATCATTCATC-1",
Expand All @@ -32,8 +33,24 @@
"strand": "-",
"feature_name": "STK32C",
"feature_strand": "-"
}
]
}]
},
"edge_case_filter_dist_test": {
"folder": "singlecell_tests",
"total_edit_sites": 1,
"expectations": [{
"contig": "10",
"barcode": "AAACGAATCATTCATC-1",
"position": 132330438,
"num_rows": 1,
"count": 1,
"coverage": 1,
"conversion": "T>C",
"strand_conversion": "A>G",
"strand": "-",
"feature_name": "STK32C",
"feature_strand": "-"
}]
},
"unstranded_pair_test": {
"folder": "strandedness_tests",
Expand Down Expand Up @@ -245,35 +262,46 @@

print("Current directory: {}".format(os.getcwd()))

# Check results of each test
# Check out results of each test
failures = 0
for test_name, info in test_name_to_expectations.items():
for test_name, info in test_name_to_expectations.items():
print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nChecking results for {}".format(test_name))


failure = False

folder = info.get("folder")
final_filtered_site_info_annotated = "{}/{}/final_filtered_site_info_annotated.tsv".format(folder, test_name)
final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\t', index_col=0)

if "total_edit_sites" in info:
try:
assert(len(final_filtered_site_info_annotated_df) == info.get("total_edit_sites"))
print("\n >>> Found {} edits total, as expected <<<\n".format(info.get("total_edit_sites")))
except Exception as e:
print("Exception:\n\tNum total rows in final_filtered_site_info_annotated.tsv expected: {}, was {}".format(info.get("total_edit_sites"), len(final_filtered_site_info_annotated_df)))
failure = True
failures += 1
continue

expectations_list = info.get("expectations")
for expectations in expectations_list:
failure = False
print("\tExpecting: {}".format(expectations))


folder = info.get("folder")

contig = expectations.get("contig")
barcode = expectations.get("barcode", None)

position = expectations.get("position")

final_filtered_site_info_annotated = "{}/{}/final_filtered_site_info_annotated.tsv".format(folder, test_name)
final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\t', index_col=0)

row_of_interest = final_filtered_site_info_annotated_df[
(final_filtered_site_info_annotated_df['position'] == position) &\
(final_filtered_site_info_annotated_df['contig'].astype(str) == contig)
]


if barcode:
row_of_interest = row_of_interest[row_of_interest.barcode == barcode]


failure = False
try:
assert(len(row_of_interest) == expectations.get("num_rows", 1))
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ echo "SC tests scripts"
ls -lh $MARINE/tests/$tests_folder/scripts/


for t in "only_5_cells_test" "only_5_cells_bulk_mode_test" "long_read_sc_test" "edge_case_test"
for t in "only_5_cells_test" "only_5_cells_bulk_mode_test" "long_read_sc_test" "edge_case_test" "edge_case_dist_filter_test"

do
echo $t
Expand Down
20 changes: 20 additions & 0 deletions tests/singlecell_tests/scripts/edge_case_dist_filter_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# This sample has an edit at the very end of the read. But we want to make sure it is not included if dist from end filter
# is active.

mypython=$1

$mypython $MARINE/marine.py \
--bam_filepath \
$MARINE/tests/singlecell_tests/bams/10_C-1_orig.bam \
--annotation_bedfile_path \
$MARINE/annotations/cellranger-GRCh38-3.0.0.annotation.genes.bed \
--output_folder \
$MARINE/tests/singlecell_tests/edge_case_filter_dist_test \
--min_base_quality \
15 \
--cores \
4 \
--min_dist_from_end 2 \
--barcode_tag "CB" \
--strandedness 2 --num_per_sublist 1 --verbose \
--contigs 10 --interval_length 40000000 --keep_intermediate_files

0 comments on commit 32e4688

Please sign in to comment.