Skip to content

Commit

Permalink
Fixes and imports
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric Kofman committed Dec 20, 2024
1 parent ad0ad71 commit d156d7c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from utils import get_contig_lengths_dict, get_intervals, index_bam, write_rows_to_info_file, write_header_to_edit_info, \
write_read_to_bam_file, remove_file_if_exists, make_folder, concat_and_write_bams_wrapper, make_edit_finding_jobs, pretty_print, get_contigs_that_need_bams_written, split_bed_file, \
get_coverage_wrapper, write_reads_to_file, sort_bam, rm_bam, suffixes, get_broken_up_contigs, run_command, \
make_depth_command_script_single_cell, concatenate_files, generate_and_run_bash_merge
make_depth_command_script_single_cell, concatenate_files, generate_and_run_bash_merge, read_barcode_whitelist_file

import os, psutil

Expand Down
20 changes: 19 additions & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from multiprocessing import Pool
import multiprocessing
import time
import scipy.sparse as sp
import anndata as ad
from scipy.sparse import csr_matrix

# Number of barcode characters to use as suffix during splitting
CB_N = 1
Expand Down Expand Up @@ -976,6 +976,7 @@ def merge_files_by_chromosome(args):
first_file = files[0]
other_files = files[1:]
merged_file = os.path.join(output_folder, f"{chromosome}_comprehensive_coverage_matrix.tsv")
h5ad_file = os.path.join(output_folder, f"{chromosome}_comprehensive_coverage_matrix.h5ad")

# Prepare the paste command
strip_headers_command = " ".join(
Expand All @@ -987,6 +988,23 @@ def merge_files_by_chromosome(args):
run_command(f"bash -c '{paste_command}'")
print(f"\tColumnar merge complete for {chromosome}. Output saved to {merged_file}.")

# Convert the merged file to an h5ad format with a sparse matrix
print(f"\tConverting {merged_file} to {h5ad_file} as a sparse matrix.")
df = pd.read_csv(merged_file, sep='\t', index_col=0) # Assuming first column is positions

# Convert DataFrame to sparse matrix
sparse_matrix = sp.csr_matrix(df.values) # Use csr_matrix here

# Create AnnData object with sparse matrix
adata = sc.AnnData(sparse_matrix)
adata.obs_names = df.index # Set row (position) names
adata.var_names = df.columns # Set column (barcode) names

# Write to .h5ad file
adata.write_h5ad(h5ad_file)
print(f"\th5ad conversion complete. Output saved to {h5ad_file}.")



def prepare_matrix_files_multiprocess(output_matrix_folder,
output_folder,
Expand Down

0 comments on commit d156d7c

Please sign in to comment.