From 97e242d00dbaf5024f2e97c82e188a5583a8e193 Mon Sep 17 00:00:00 2001 From: Eric Kofman Date: Fri, 29 Nov 2024 09:00:18 -0800 Subject: [PATCH] adapt bulk mode --- marine.py | 44 ++++++++++++------- src/utils.py | 2 +- .../scripts/only_5_cells_bulk_mode_test.sh | 2 +- .../scripts/only_5_cells_test.sh | 2 +- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/marine.py b/marine.py index deb86e5..9804f24 100755 --- a/marine.py +++ b/marine.py @@ -33,7 +33,7 @@ from utils import get_intervals, index_bam, write_rows_to_info_file, write_header_to_edit_info, \ write_read_to_bam_file, remove_file_if_exists, make_folder, concat_and_write_bams_wrapper, \ pretty_print, read_barcode_whitelist_file, get_contigs_that_need_bams_written, \ -make_depth_command_script, generate_and_run_bash_merge, get_sailor_sites, \ +make_depth_command_script_single_cell, generate_and_run_bash_merge, get_sailor_sites, \ concatenate_files, run_command, get_edits_with_coverage_df, zero_edit_found, delete_intermediate_files from core import run_edit_identifier, run_bam_reconfiguration, \ @@ -197,11 +197,16 @@ def generate_depths(output_folder, bam_filepaths, paired_end=False, barcode_tag= ).format(output_folder, output_folder) run_command(combine_edit_sites_command) + all_depth_commands.append(combine_edit_sites_command) output_suffix = 'source_cells' if barcode_tag: + coverage_subfolder = '{}/coverage'.format(output_folder) + make_folder(coverage_subfolder) + + # Single cell mode split_bed_file( f"{output_folder}/combined_{output_suffix}.bed", f"{output_folder}/combined_{output_suffix}_split_by_suffix", @@ -209,9 +214,14 @@ def generate_depths(output_folder, bam_filepaths, paired_end=False, barcode_tag= output_suffix=output_suffix ) - make_depth_command_script(paired_end, bam_filepaths, output_folder, - all_depth_commands=all_depth_commands, output_suffix='source_cells', run=True, processes=cores, barcode_tag=barcode_tag) - + make_depth_command_script_single_cell(paired_end, bam_filepaths, output_folder, + all_depth_commands=all_depth_commands, output_suffix='source_cells', run=True, processes=cores, barcode_tag=barcode_tag) + + else: + # Bulk mode, we will not split the bed and simply use samtools depth on the combined.bed + samtools_depth_command = f"samtools depth -b {output_folder}/combined_source_cells.bed {bam_filepath} > {output_folder}/depths_source_cells.txt" + run_command(samtools_depth_command) + print("Concatenating edit info files...") concatenate_files(output_folder, "edit_info/*edit_info.tsv", @@ -223,10 +233,12 @@ def generate_depths(output_folder, bam_filepaths, paired_end=False, barcode_tag= header_columns = ['barcode', 'contig', 'position', 'ref', 'alt', 'read_id', 'strand', 'coverage'] + generate_and_run_bash_merge(output_folder, '{}/final_edit_info_no_coverage.tsv'.format(output_folder), '{}/depths_source_cells.txt'.format(output_folder), - '{}/final_edit_info.tsv'.format(output_folder), header_columns) + '{}/final_edit_info.tsv'.format(output_folder), + header_columns) coverage_total_time = time.perf_counter() - coverage_start_time @@ -425,9 +437,6 @@ def run(bam_filepath, annotation_bedfile_path, output_folder, contigs=[], strand # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pretty_print("Calculating coverage at edited sites, minimum read quality is {}...".format(min_read_quality), style='~') - coverage_subfolder = '{}/coverage'.format(output_folder) - make_folder(coverage_subfolder) - # We want to run the samtools depth command for each of the reconfigured bam files reconfigured_bam_filepaths = glob('{}/split_bams/*/*.bam'.format(output_folder)) @@ -553,15 +562,16 @@ def run(bam_filepath, annotation_bedfile_path, output_folder, contigs=[], strand output_suffix=output_suffix ) - make_depth_command_script(paired_end, - reconfigured_bam_filepaths, - output_folder, - output_suffix=output_suffix, - run=True, - pivot=True, - processes=cores, - barcode_tag=barcode_tag - ) + make_depth_command_script_single_cell( + paired_end, + reconfigured_bam_filepaths, + output_folder, + output_suffix=output_suffix, + run=True, + pivot=True, + processes=cores, + barcode_tag=barcode_tag + ) if not keep_intermediate_files: pretty_print("Deleting intermediate files...", style="-") diff --git a/src/utils.py b/src/utils.py index d451e67..dadda75 100644 --- a/src/utils.py +++ b/src/utils.py @@ -898,7 +898,7 @@ def prepare_pysam_coverage_args(bam_filepaths, output_folder, output_suffix='', -def make_depth_command_script(paired_end, bam_filepaths, output_folder, all_depth_commands=[], +def make_depth_command_script_single_cell(paired_end, bam_filepaths, output_folder, all_depth_commands=[], output_suffix='', run=False, pivot=False, processes=4, barcode_tag=None): """ Main function to generate and execute samtools depth commands, and optionally pivot and merge matrices. diff --git a/tests/singlecell_tests/scripts/only_5_cells_bulk_mode_test.sh b/tests/singlecell_tests/scripts/only_5_cells_bulk_mode_test.sh index 2a60470..88796ec 100644 --- a/tests/singlecell_tests/scripts/only_5_cells_bulk_mode_test.sh +++ b/tests/singlecell_tests/scripts/only_5_cells_bulk_mode_test.sh @@ -14,4 +14,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_bulk_mode_test \ --cores \ 4 \ --strandedness 2 --verbose \ ---contigs "9" --interval_length 32000000 \ No newline at end of file +--contigs "9" --interval_length 32000000 --keep_intermediate_files diff --git a/tests/singlecell_tests/scripts/only_5_cells_test.sh b/tests/singlecell_tests/scripts/only_5_cells_test.sh index cfd6ccc..5413f0c 100644 --- a/tests/singlecell_tests/scripts/only_5_cells_test.sh +++ b/tests/singlecell_tests/scripts/only_5_cells_test.sh @@ -15,4 +15,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_test \ 4 \ --barcode_tag "CB" \ --strandedness 2 \ ---contigs "9" --interval_length 32000000 \ No newline at end of file +--keep_intermediate_files --contigs "9" --interval_length 32000000