Skip to content

Commit

Permalink
Merge pull request #25 from bigict/dev
Browse files Browse the repository at this point in the history
refactor: using different dir for kclust results
  • Loading branch information
chungongyu authored Mar 10, 2024
2 parents 43a8ce7 + 6067034 commit a3cc41a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
16 changes: 9 additions & 7 deletions kclust2db.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@
kclust_threshold_n2s_list = [99, 90, 80, 70, 60, 50, 40, 30]

kclust_template = Template(
"$kClust -i $infile -s $threshold -d $tmpdir/kClust -M 5000MB")
"$kClust -i $infile -s $threshold -d $tmpdir -M 5000MB")
kClust_mkAln_template = Template(
"$kClust_mkAln -c '$clustalo --threads=$ncpu -i $$infile -o $$outfile' -d $tmpdir/kClust --no-pseudo-headers|grep -P '^Filename:'|cut -d' ' -f2" # pylint: disable=line-too-long
"$kClust_mkAln -c '$clustalo --threads=$ncpu -i $$infile -o $$outfile' -d $tmpdir --no-pseudo-headers|grep -P '^Filename:'|cut -d' ' -f2" # pylint: disable=line-too-long
)
reformat_template = Template(
"$reformat fas a3m $filename $tmpdir/a3m/$basename.a3m")
"$reformat fas a3m $filename $tmpdir/$basename.a3m")
# hhblitsdb_template = Template(
# "$hhblitsdb --cpu $ncpu -o $outdb --input_a3m $tmpdir/a3m")
cdhit_template = Template(
Expand Down Expand Up @@ -119,11 +119,13 @@ def kclust2db(infile, tmpdir=".", s=1.12, ncpu=1): # pylint: disable=redefined-
"""Cluster sequences in FASTA file \"infile\", and generate hhblits
style database at outdb"""
logger.info("#### cluster input fasta ####")
kclustdir = os.path.join(tmpdir, "kClust")
mkdir_if_not_exist(kclustdir)
cmd = kclust_template.substitute(
kClust=bin_dict["kClust"],
infile=infile,
threshold=s,
tmpdir=tmpdir,
tmpdir=kclustdir,
)
logger.info(cmd)
os.system(cmd)
Expand All @@ -133,7 +135,7 @@ def kclust2db(infile, tmpdir=".", s=1.12, ncpu=1): # pylint: disable=redefined-
kClust_mkAln=bin_dict["kClust_mkAln"],
clustalo=bin_dict["clustalo"],
ncpu=ncpu,
tmpdir=tmpdir,
tmpdir=kclustdir,
)
logger.info(cmd)
with subprocess.Popen(cmd, shell=True, text=True,
Expand All @@ -147,10 +149,10 @@ def kclust2db(infile, tmpdir=".", s=1.12, ncpu=1): # pylint: disable=redefined-
cmd = reformat_template.substitute(
reformat=bin_dict["reformat"],
filename=filename,
tmpdir=tmpdir,
tmpdir=a3mdir,
basename=os.path.basename(os.path.splitext(filename)[0]),
)
logger.info(cmd)
logger.debug(cmd)
os.system(cmd)

# logger.info("#### build hhblitsdb ####")
Expand Down
6 changes: 3 additions & 3 deletions msa_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def run_jackblits(query_fasta, db_list, ncpu, hhblits_prefix, jackblits_prefix):
a3mdir = kclust2db(a3mdir,
s=id2s_dict[30],
ncpu=ncpu,
tmpdir=os.path.dirname(jackblits_prefix))
tmpdir=jackblits_prefix)
# else:
# ### split jackhmmer hits into a3m ###
# # a3mdir = jackblits_prefix + "-mya3m"
Expand Down Expand Up @@ -632,7 +632,7 @@ def run_bfd(query_fasta, db_list, ncpu, hhblits_prefix, jackblits_prefix,
a3mdir = kclust2db(a3mdir,
s=id2s_dict[30],
ncpu=ncpu,
tmpdir=os.path.dirname(bfd_prefix))
tmpdir=bfd_prefix)
# else:
# ### split jackhmmer hits into a3m ###
# # a3mdir = jackblits_prefix + "-mya3m"
Expand Down Expand Up @@ -804,7 +804,7 @@ def run_hmsblits(query_fasta, sequence, hhblits_prefix, db_list, ncpu, # pylint
a3mdir = kclust2db(a3mdir,
s=id2s_dict[30],
ncpu=ncpu,
tmpdir=os.path.dirname(hmmsearch_prefix))
tmpdir=hmmsearch_prefix)
# else:
# ### split hmmsearch hits into a3m ###
# # a3mdir = hmmsearch_prefix + "-mya3m"
Expand Down

0 comments on commit a3cc41a

Please sign in to comment.