Skip to content

Commit

Permalink
Merge pull request #50 from YeoLab/11222024_pairedend_and_readspecific
Browse files Browse the repository at this point in the history
11222024 pairedend and readspecific
  • Loading branch information
ekofman authored Dec 3, 2024
2 parents 0c31f54 + 9c66360 commit cf1eee9
Show file tree
Hide file tree
Showing 9 changed files with 823 additions and 385 deletions.
682 changes: 318 additions & 364 deletions marine.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def find_edits(bampath, contig, split_index, start, end, output_folder, barcode_

output_file = '{}/{}_{}_{}_{}_edit_info.tsv'.format(edit_info_subfolder, contig, split_index, start, end)
output_bedfile = '{}/{}_{}_{}_{}_edit_positions.bed'.format(edit_info_subfolder, contig, split_index, start, end)

remove_file_if_exists(output_file)

with open(output_file, 'w') as f:
Expand Down
509 changes: 495 additions & 14 deletions src/utils.py

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion tests/integration_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2889,7 +2889,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 7,
"id": "f5c3ed3e-13dd-4399-924e-3d1ac17ce387",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -3012,6 +3012,7 @@
"\t >>> long_read_sc_test passed! <<<\n",
"\n",
"Checking that analyzing a single-cell dataset in 'bulk' mode (i.e. not specificying the 'CB' barcode) yields the exact same positions and base changes, but with counts and coverages aggregated rather than at a single-cell resolution\n",
"grouped_sc_rows: 62, bulk_rows: 62\n",
"\n",
"\t >>> single-cell and bulk on same dataset comparison passed! <<<\n",
"\n",
Expand Down Expand Up @@ -3341,6 +3342,7 @@
"bulk_folder = 'only_5_cells_bulk_mode_test'\n",
"sc_5_cells = pd.read_csv('singlecell_tests/{}/final_filtered_site_info.tsv'.format(sc_folder), sep='\\t').sort_values(['position', 'strand_conversion'])\n",
"bulk_5_cells = pd.read_csv('singlecell_tests/{}/final_filtered_site_info.tsv'.format(bulk_folder), sep='\\t').sort_values(['position', 'strand_conversion'])\n",
"\n",
"print(\"Checking that analyzing a single-cell dataset in 'bulk' mode (i.e. not specificying the 'CB' barcode) yields the exact same positions and base changes, but with counts and coverages aggregated rather than at a single-cell resolution\")\n",
"grouped_sc = pd.DataFrame(sc_5_cells.groupby(['contig', 'position', 'strand_conversion']).agg({'count': sum, 'strand_conversion': 'unique'}))\n",
"grouped_sc.index.names = ['contig', 'position', 'c']\n",
Expand All @@ -3357,6 +3359,7 @@
" bulk_rows.append((r['contig'], r['position'], r['strand_conversion']))\n",
"\n",
"try:\n",
" print(\"grouped_sc_rows: {}, bulk_rows: {}\".format(len(grouped_sc_rows), len(bulk_rows)))\n",
" assert(grouped_sc_rows == bulk_rows)\n",
" for bulk_item, grouped_sc_item in zip(bulk_rows, grouped_sc_rows):\n",
" assert(bulk_item == grouped_sc_item)\n",
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ echo "SC tests scripts"
ls -lh $MARINE/tests/$tests_folder/scripts/


for t in "only_5_cells_test" "only_5_cells_bulk_mode_test" "long_read_sc_test" "edge_case_test" "edge_case_dist_filter_test"
for t in "only_5_cells_test" "only_5_cells_bulk_mode_test" "only_5_cells_all_cells_coverage_test" "long_read_sc_test" "edge_case_test" "edge_case_dist_filter_test"

do
echo $t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_bulk_mode_test \
--cores \
4 \
--strandedness 2 --verbose \
--contigs "9" --interval_length 32000000
--contigs "9" --interval_length 32000000 --keep_intermediate_files
2 changes: 1 addition & 1 deletion tests/singlecell_tests/scripts/only_5_cells_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_test \
4 \
--barcode_tag "CB" \
--strandedness 2 \
--contigs "9" --interval_length 32000000
--keep_intermediate_files --contigs "9" --interval_length 32000000
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ $MARINE/tests/strandedness_tests/F1R2_pair_test \
--strandedness 2 \
--contigs "chr17" \
--sailor \
--num_intervals_per_contig 16
--keep_intermediate_files
2 changes: 1 addition & 1 deletion tests/strandedness_tests/scripts/F1R2_pair_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ $MARINE/tests/strandedness_tests/F1R2_pair_test \
--strandedness 2 \
--contigs "chr17" \
--sailor \
--num_intervals_per_contig 16
--keep_intermediate_files

0 comments on commit cf1eee9

Please sign in to comment.