Skip to content

Commit

Permalink
moved from entrez to datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurVM committed Jan 22, 2024
1 parent 2d065be commit c49a110
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 35 deletions.
12 changes: 6 additions & 6 deletions src/Afanc/autodatabase/runFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def runAutoDB(args):
""" Run autodatabase pipeline
"""
from ..utilities.makeWD import initAutoDBDirStructure
from ..utilities.getVersions import get_versions_autodatabase
from ..utilities.getVersions import getVersionsAutodatabase

subprocessID = "MAIN"
vprint(
Expand All @@ -30,7 +30,7 @@ def runAutoDB(args):
fasta_db_path = args.fastaDir

## capture python package and software versions for the autodatabase module in a JSON file
get_versions_autodatabase(args)
getVersionsAutodatabase(args)

## download ncbi taxonomy and preprocess fastas
fasta_dict, mapping_dict = preprocessing(args, fasta_db_path)
Expand All @@ -42,13 +42,13 @@ def runAutoDB(args):
makeK2db(args)

## make the variant index from quality controlled assemblies
make_variant_index(args)
makeVariantIndex(args)

## make a Krona chart for pleasing visualisation
makeKronaChart(args)

## clean the output directory
clean_outdir(args)
cleanOutdir(args)

vprint(
"FINISHED",
Expand Down Expand Up @@ -205,7 +205,7 @@ def makeK2db(args):
chdir(args.autoDB_WDir)


def make_variant_index(args):
def makeVariantIndex(args):
""" Generates a variant index of parent child distances
"""
from .makeVariantIndex import make_variant_index
Expand Down Expand Up @@ -251,7 +251,7 @@ def makeKronaChart(args):
chdir(args.autoDB_WDir)


def clean_outdir(args):
def cleanOutdir(args):
""" Cleans the output directory according to provided arguments.
clean : remove the mash working directory.
Expand Down
54 changes: 37 additions & 17 deletions src/Afanc/screen/runFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def runScreen(args):

## if no_map is True then exit Afanc screen
if args.no_map:
vprint("FINISHED", f"no_map mode finished. Metagenomic report can be found in {out_json}\n", "prGreen")
final_report = makeFinalReport(args, None, None)
vprint("FINISHED", f"no_map mode finished. Metagenomic report can be found in {final_report}\n", "prGreen")
return 0

## parse kraken2 report to a json
Expand All @@ -44,7 +45,7 @@ def runScreen(args):
final_report = makeFinalReport(args, variant_profile, reports)

if args.clean or args.superclean:
final_report = clean_outdir(args, final_report)
final_report = cleanOutdir(args, final_report)

vprint("FINISHED", f"Final report can be found at {final_report}\n", "prGreen")

Expand Down Expand Up @@ -193,7 +194,7 @@ def makeFinalReport(args, variant_profile, reports):
import json
from os import listdir

from Afanc.utilities.getVersions import get_versions_screen
from Afanc.utilities.getVersions import getVersionsScreen

subprocessID = "REPORT"
vprint(
Expand Down Expand Up @@ -228,7 +229,7 @@ def makeFinalReport(args, variant_profile, reports):

## collect versions
jsondict["versions"] = {
"screen" : get_versions_screen()
"screen" : getVersionsScreen()
}

## collect k2 json reports
Expand All @@ -243,25 +244,44 @@ def makeFinalReport(args, variant_profile, reports):
## initialise warnings
event["warnings"] = []

## block to deal with most likely variants
if "closest_variant" in event:
variant_flag = True
taxon_id = str(event["closest_variant"]["taxon_id"])
## handle instances where an assembly cannot be found or no_map mode was used
if "assembly" in event:

## in instances where this hit was subjected to variant profiling, the assembly used for mapping will
## belong to the species rather than the closest variant
if "assembly" in event["closest_variant"]:
assembly = event["closest_variant"]["assembly"]
## block to deal with most likely variants
if "closest_variant" in event:
variant_flag = True
taxon_id = str(event["closest_variant"]["taxon_id"])

## in instances where this hit was subjected to variant profiling, the assembly used for mapping will
## belong to the species rather than the closest variant
if "assembly" in event["closest_variant"]:
assembly = event["closest_variant"]["assembly"]
else:
assembly = event["assembly"]

## no variants
else:
variant_flag = False
assembly = event["assembly"]
taxon_id = str(event["taxon_id"])

## block to deal with instances where there is no assembly
## this is to ensure that a final json is constructed when no_map mode is used
else:
variant_flag = False
assembly = event["assembly"]
taxon_id = str(event["taxon_id"])
## block to deal with most likely variants
if "closest_variant" in event:
variant_flag = True
assembly = None
taxon_id = str(event["closest_variant"]["taxon_id"])

## no variants
else:
variant_flag = False
assembly = None
taxon_id = str(event["taxon_id"])

## block dealing with hits which have an accompanying assembly which reads were mapped to
if not assembly == None:

assembly_prefix = path.basename(path.splitext(assembly)[0])
if assembly_prefix.endswith("_genomic"):
assembly_prefix = assembly_prefix.strip("_genomic")
Expand Down Expand Up @@ -310,7 +330,7 @@ def makeFinalReport(args, variant_profile, reports):
return final_report


def clean_outdir(args, final_report):
def cleanOutdir(args, final_report):
""" Cleans the output directory according to provided arguments.
clean : remove the bt2 working directory.
Expand Down
4 changes: 2 additions & 2 deletions src/Afanc/utilities/getVersions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .runCommands import command


def get_versions_autodatabase(args):
def getVersionsAutodatabase(args):
""" get python package and software versions for afanc-autodatabase
"""

Expand Down Expand Up @@ -34,7 +34,7 @@ def get_versions_autodatabase(args):
json.dump({ "afanc-autodatabase_versions" : version_dict }, fout, indent = 4)


def get_versions_screen():
def getVersionsScreen():
""" Gets python package and softeware versions for afanc-screen
"""

Expand Down
26 changes: 16 additions & 10 deletions test/afanc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
import argparse
from argparse import RawTextHelpFormatter

def run_test(args):

def runTest(args):

nomap = True

## tsv for results
tsv_handle = open("./Afanc_test.tsv", 'w')
Expand Down Expand Up @@ -61,16 +64,19 @@ def run_test(args):
print(f"Error at {fq1} {fq2} {truth_val}. FASTQ files must end with either .fq.gz or .fastq.gz. Exiting.")
sys.exit(1)

runline = f"afanc screen -o {prefix} -v {args.variants} -n 10 {args.db} {fq1} {fq2} -c > {prefix}.log"
# print(runline)
if nomap:
runline = f"afanc screen -o {prefix} -v {args.variants} -n 10 {args.db} {fq1} {fq2} -a > {prefix}.log"

else:
runline = f"afanc screen -o {prefix} -v {args.variants} -n 10 {args.db} {fq1} {fq2} -c > {prefix}.log"

if not os.path.exists(prefix):
subprocess.call(runline, shell=True)

results_json = f"./{prefix}/{prefix}.json"

if os.path.exists(results_json):
results = check_hits(results_json, truth_val)
results = checkHits(results_json, truth_val)

else:
result = "RUNFAIL"
Expand All @@ -94,24 +100,24 @@ def run_test(args):

tsv_handle.close()

def check_hits(results_json, truth_val):
def checkHits(results_json, truth_val):

with open(results_json, "r") as fin:
jdata = json.load(fin)

hits = jdata["results"]["Detection_events"]

cluster_results, cluster_hits = get_cluster_hits(hits["Clustering_results"], truth_val)
cluster_results, cluster_hits = getClusterHits(hits["Clustering_results"], truth_val)

results = { "cluster" : [cluster_results, cluster_hits], "variant" : [None, [["None", "None"]]] }

if "Variant_profile" in hits:
variant_results, variant_hits = get_variant_hits(hits["Variant_profile"], truth_val)
variant_results, variant_hits = getClusterHits(hits["Variant_profile"], truth_val)
results["variant"] = [variant_results, variant_hits]

return results

def get_variant_hits(hits, truth_val):
def getVariantHits(hits, truth_val):

variant_box = []

Expand All @@ -130,7 +136,7 @@ def get_variant_hits(hits, truth_val):

return result, variant_box

def get_cluster_hits(hits, truth_val):
def getClusterHits(hits, truth_val):
""" checks hits against a known truth value
"""

Expand Down Expand Up @@ -184,6 +190,6 @@ def parse_args(argv):
parser.add_argument("run_list", action='store', help="A tab separated txt file containing the paired end .fq paths and a truth value to compare the output to.")
parser.add_argument("variants", action='store', help="A tab separated txt file containing variant info.")

run_test(parser.parse_args(argv))
runTest(parser.parse_args(argv))

parse_args(sys.argv)
Loading

0 comments on commit c49a110

Please sign in to comment.