Skip to content

Commit

Permalink
adapt bulk mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric Kofman committed Nov 29, 2024
1 parent 39f04bc commit 97e242d
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 20 deletions.
44 changes: 27 additions & 17 deletions marine.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from utils import get_intervals, index_bam, write_rows_to_info_file, write_header_to_edit_info, \
write_read_to_bam_file, remove_file_if_exists, make_folder, concat_and_write_bams_wrapper, \
pretty_print, read_barcode_whitelist_file, get_contigs_that_need_bams_written, \
make_depth_command_script, generate_and_run_bash_merge, get_sailor_sites, \
make_depth_command_script_single_cell, generate_and_run_bash_merge, get_sailor_sites, \
concatenate_files, run_command, get_edits_with_coverage_df, zero_edit_found, delete_intermediate_files

from core import run_edit_identifier, run_bam_reconfiguration, \
Expand Down Expand Up @@ -197,21 +197,31 @@ def generate_depths(output_folder, bam_filepaths, paired_end=False, barcode_tag=
).format(output_folder, output_folder)
run_command(combine_edit_sites_command)


all_depth_commands.append(combine_edit_sites_command)

output_suffix = 'source_cells'

if barcode_tag:
coverage_subfolder = '{}/coverage'.format(output_folder)
make_folder(coverage_subfolder)

# Single cell mode
split_bed_file(
f"{output_folder}/combined_{output_suffix}.bed",
f"{output_folder}/combined_{output_suffix}_split_by_suffix",
bam_filepaths,
output_suffix=output_suffix
)

make_depth_command_script(paired_end, bam_filepaths, output_folder,
all_depth_commands=all_depth_commands, output_suffix='source_cells', run=True, processes=cores, barcode_tag=barcode_tag)

make_depth_command_script_single_cell(paired_end, bam_filepaths, output_folder,
all_depth_commands=all_depth_commands, output_suffix='source_cells', run=True, processes=cores, barcode_tag=barcode_tag)

else:
# Bulk mode, we will not split the bed and simply use samtools depth on the combined.bed
samtools_depth_command = f"samtools depth -b {output_folder}/combined_source_cells.bed {bam_filepath} > {output_folder}/depths_source_cells.txt"
run_command(samtools_depth_command)


print("Concatenating edit info files...")
concatenate_files(output_folder, "edit_info/*edit_info.tsv",
Expand All @@ -223,10 +233,12 @@ def generate_depths(output_folder, bam_filepaths, paired_end=False, barcode_tag=
header_columns = ['barcode', 'contig', 'position', 'ref', 'alt',
'read_id', 'strand', 'coverage']


generate_and_run_bash_merge(output_folder,
'{}/final_edit_info_no_coverage.tsv'.format(output_folder),
'{}/depths_source_cells.txt'.format(output_folder),
'{}/final_edit_info.tsv'.format(output_folder), header_columns)
'{}/final_edit_info.tsv'.format(output_folder),
header_columns)

coverage_total_time = time.perf_counter() - coverage_start_time

Expand Down Expand Up @@ -425,9 +437,6 @@ def run(bam_filepath, annotation_bedfile_path, output_folder, contigs=[], strand
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pretty_print("Calculating coverage at edited sites, minimum read quality is {}...".format(min_read_quality), style='~')

coverage_subfolder = '{}/coverage'.format(output_folder)
make_folder(coverage_subfolder)

# We want to run the samtools depth command for each of the reconfigured bam files
reconfigured_bam_filepaths = glob('{}/split_bams/*/*.bam'.format(output_folder))

Expand Down Expand Up @@ -553,15 +562,16 @@ def run(bam_filepath, annotation_bedfile_path, output_folder, contigs=[], strand
output_suffix=output_suffix
)

make_depth_command_script(paired_end,
reconfigured_bam_filepaths,
output_folder,
output_suffix=output_suffix,
run=True,
pivot=True,
processes=cores,
barcode_tag=barcode_tag
)
make_depth_command_script_single_cell(
paired_end,
reconfigured_bam_filepaths,
output_folder,
output_suffix=output_suffix,
run=True,
pivot=True,
processes=cores,
barcode_tag=barcode_tag
)

if not keep_intermediate_files:
pretty_print("Deleting intermediate files...", style="-")
Expand Down
2 changes: 1 addition & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def prepare_pysam_coverage_args(bam_filepaths, output_folder, output_suffix='',



def make_depth_command_script(paired_end, bam_filepaths, output_folder, all_depth_commands=[],
def make_depth_command_script_single_cell(paired_end, bam_filepaths, output_folder, all_depth_commands=[],
output_suffix='', run=False, pivot=False, processes=4, barcode_tag=None):
"""
Main function to generate and execute samtools depth commands, and optionally pivot and merge matrices.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_bulk_mode_test \
--cores \
4 \
--strandedness 2 --verbose \
--contigs "9" --interval_length 32000000
--contigs "9" --interval_length 32000000 --keep_intermediate_files
2 changes: 1 addition & 1 deletion tests/singlecell_tests/scripts/only_5_cells_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ $MARINE/tests/singlecell_tests/only_5_cells_test \
4 \
--barcode_tag "CB" \
--strandedness 2 \
--contigs "9" --interval_length 32000000
--keep_intermediate_files --contigs "9" --interval_length 32000000

0 comments on commit 97e242d

Please sign in to comment.