Skip to content

Commit

Permalink
make SangerDB optional
Browse files Browse the repository at this point in the history
  • Loading branch information
camiel-m committed Jul 26, 2023
1 parent 80394de commit 0904d98
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 38 deletions.
4 changes: 1 addition & 3 deletions chromograph.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
Metadata-Version: 1.0
Metadata-Version: 2.1
Name: chromograph
Version: 0.0.1
Summary: Pipeline for single-cell ATAC-seq analysis
Home-page: https://github.com/linnarsson-lab/chromograph
Author: Linnarsson Lab
Author-email: [email protected]
License: MIT
Description: UNKNOWN
Platform: UNKNOWN
2 changes: 1 addition & 1 deletion chromograph.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
loompy
numpy
scikit-learn
scipy==1.4.1
scipy
matplotlib
networkx
python-louvain
Expand Down
46 changes: 12 additions & 34 deletions chromograph/preprocessing/Chromgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import os
import sys
import yaml
import pybedtools
from pybedtools import BedTool
import collections
Expand Down Expand Up @@ -43,37 +43,8 @@ def __init__(self) -> None:
self.RNA_file = ''
pybedtools.helpers.set_bedtools_path(self.config.paths.bedtools)
logging.info("Chromgen initialised")

# def fragments_to_count(self, ff, outdir, meta, bsize, chromosomes):
# '''
# '''
# ## Read Fragments and generate size bins
# logging.info("Read fragments into dict")
# frag_dict = read_fragments(ff)

# ## Split fragments to seperate files for fast indexing
# logging.info(f"Saving fragments to separate folder for fast indexing")
# fdir = os.path.join(outdir, 'fragments')
# if not os.path.isdir(fdir):
# os.mkdir(fdir)
# if len(os.listdir(fdir)) < len(meta['barcode']):
# i = 0
# for x in meta['barcode']:
# f = os.path.join(fdir, f'{x}.tsv.gz')
# if not os.path.exists(f):
# frags = BedTool(frag_dict[x]).filter(lambda x: x[0] in chromosomes.keys()).saveas(f)
# i += 1
# if i%1000 == 0:
# logging.info(f'Finished separating fragments for {i} cells')

# ## Count fragments inside bins
# logging.info("Count fragments overlapping with bins")
# Count_dict = count_bins(frag_dict, meta['barcode'], bsize)
# logging.info("Finished counting fragments")

# return Count_dict

def fit(self, indir: str, bsize: int = 5000, outdir: str = None, genome_size: str = None, blacklist: str = None, min_fragments: bool = False) -> None:

def fit(self, indir: str, bsize: int = 5000, outdir: str = None, genome_size: str = None, blacklist: str = None, min_fragments: bool = False, path_meta: str = None) -> None:
''''
Create a .loom file from 10X Genomics cellranger output with reads binned
Args:
Expand All @@ -82,6 +53,8 @@ def fit(self, indir: str, bsize: int = 5000, outdir: str = None, genome_size: st
outdir (str): output folder wher the new loom file should be saved (default to indir)
genome_size (str): path to file containing chromosome sizes, usually derived from encode (i.e. 'hg19.chrom.sizes.tsv')
blacklist (str): path to bedfile containing blacklisted region (i.e. 'blacklist_hg19.bed')
path_meta (str): If a defined meta data file (.yaml format) should be used instead of the default database define path here (None for sangerDB),
at minimum use a file defining the sample name
Returns:
path (str): Full path to the created loom file.
Remarks:
Expand Down Expand Up @@ -169,8 +142,13 @@ def fit(self, indir: str, bsize: int = 5000, outdir: str = None, genome_size: st
meta['CellID'] = np.array([x.split('-')[0] for x in meta['CellID']])

## Retrieve sample metadata from SangerDB
logging.info(f'Retrieve metadata from {[self.config.paths.metadata, sample]}')
m = load_sample_metadata(self.config.paths.metadata, sample)
if not path_meta:
logging.info(f'Retrieve metadata from {[self.config.paths.metadata, sample]}')
m = load_sample_metadata(self.config.paths.metadata, sample)
else:
logging.info(f'Retrieve metadata from {path_meta}')
with open(path_meta, 'r') as file:
m = yaml.load(file, Loader=yaml.FullLoader)
for k,v in m.items():
meta[k] = np.array([v] * len(meta['barcode']))

Expand Down

0 comments on commit 0904d98

Please sign in to comment.