From 9fed4f895e6462e5afcd2fa3fdf898f2d7d43a10 Mon Sep 17 00:00:00 2001 From: nickp60 Date: Sat, 27 Jan 2018 18:03:18 +0000 Subject: [PATCH] update plotting scripts and manifest --- .../entropy_manifest.tab | 6 ++--- riboSeed/riboStructure.py | 2 +- scripts/recreating_tables_and_figures.md | 24 +++++++++++++++++++ scripts/runEntropyComparisons.sh | 19 ++++++++++++++- 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/2018-01-17-within_vs_across/entropy_manifest.tab b/2018-01-17-within_vs_across/entropy_manifest.tab index a64a1a9..69835b4 100644 --- a/2018-01-17-within_vs_across/entropy_manifest.tab +++ b/2018-01-17-within_vs_across/entropy_manifest.tab @@ -11,9 +11,9 @@ NZ_CP012981.1 NZ_CP012982.1,NZ_CP012983.1 Burkholderia cepacia ATCC25416 APZ15_R NC_008095.1 NA Myxococcus xanthus DK_1622 MXAN_RS01560 WP_011550462.1 Proteobacteria Deltaproteobacteria Myxococcales Myxococcaceae SRR4236978 DSM_16526 NC_020555.1 NA Helicobacter cinaedi 18818=ATCC_BAA-847 HCBAA847_RS00680 WP_002956213.1 Proteobacteria Epsilonproteobacteria Campylobacterales Helicobacteraceae DRR090193 MRY12-0051 NC_008570.1 NA Aeromonas hydrophila ATCC7966 AHA_0076 YP_854603.1 Proteobacteria Gammaproteobacteria Aeromonadales Aeromonadaceae GAGE-B SSU -NC_000913.3 NA Escherichia coli MG1655 b0200 NP_414742.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated -BA000007.2 NA Escherichia coli Sakai ECs0202 BAB33625.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated -CP003200.1 NA Klebsiella pneumoniae HS11286 KPHS_00140 AEW58712.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated +NC_000913.3 NA Escherichia coli MG1655 b0200 NP_414742.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated NA +BA000007.2 NA Escherichia coli Sakai ECs0202 BAB33625.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated NA +CP003200.1 NA Klebsiella pneumoniae HS11286 KPHS_00140 AEW58712.1 Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Simulated NA CP015377.1 NA Pseudomonas aeruginosa BAMCPA07-48 A6R75_01940 ANA68941.1 Proteobacteria Gammaproteobacteria Pseudomonadaceae Pseudomonadaceae SRR3500543 BAMCPA07-48-HiSeq NZ_CP017149.1 NA Pseudomonas aeruginosa ATCC15692 BGV84_RS03410 WP_003113167.1 Proteobacteria Gammaproteobacteria Pseudomonadaceae Pseudomonadaceae SRR3500543 BAMCPA07-48-HiSeq NC_002505.1 NC_002506.1 Vibrio cholerae El_tor_st_N16961 VC0058 NP_229717.1 Proteobacteria Gammaproteobacteria Vibrionales Vibrionaceae GAGE-B CO_1032(5) diff --git a/riboSeed/riboStructure.py b/riboSeed/riboStructure.py index 4a293df..9a8a96f 100644 --- a/riboSeed/riboStructure.py +++ b/riboSeed/riboStructure.py @@ -322,7 +322,7 @@ def plot_rDNAs( ax2.xaxis.set_ticks_position('none') # ax.tick_params(axis='y', colors='dimgrey') ax.tick_params(axis='x', colors='dimgrey') - + ax.xaxis.get_major_formatter().set_scientific(False) # set axis labels ax.xaxis.label.set_color('black') diff --git a/scripts/recreating_tables_and_figures.md b/scripts/recreating_tables_and_figures.md index 4c2ce5e..a22d031 100644 --- a/scripts/recreating_tables_and_figures.md +++ b/scripts/recreating_tables_and_figures.md @@ -293,3 +293,27 @@ Then, we ran riboSeed: /home/nw42839/miniconda3/bin/ribo select ./scan/scannedScaffolds.gb -o select /home/nw42839/miniconda3/bin/ribo seed ./select/riboSelect_grouped_loci.txt -r ./scan/scannedScaffolds.gb -F ./SRR2064286_1_sub.fastq -R ./SRR2064286_2_sub.fastq --ref_as_contig trusted --cores 4 --memory 24 -z -o seed_trusted ``` + + +## More entropy the suppl figures: +for generating the entropy figures within and across genomes, we updated things to use a script: runEntropyComparisons.sh. Its a clunky one, but it works. It reads the entropy_manifest.tab file, and requires a combined archaeal/bacterial assemblies_summary.txt file from NCBI. The beginning of the script has comments about how to regenerate that file. In short, I manually inspected each genome and picked a gene close the the first rDNA operon. The script picks up to 25 randome genomes of the same genus and species, and runs the analysis outlined above (extract those regions + 10bk up and down, find the rDNAs, extract the rDNAs, allign with mafft, plot). It also plots the within-genome rDNA entropy for that genome. We got the sense that the supplementary material was too short, and figured this would lengthen things substantially. (Just kidding, a reviewer asked). + +## The ribo structure analysis +We found several genomes with strange rDNA structures, and downloaded them manually from ncbi. Then, after putting them in a folder called "./2018-01-26-odd_rDNAs/", we ran riboStrcuture: + +``` +ribo structure ./2018-01-26-odd_rDNAs/ -o oddballs +``` + +as a comparison, because we had the genomes already for the entropy experiment, we copied those all to a new dir: + +``` +cp 2018-01-17-within_vs_across/*/ref/*.fasta ./2018-01-27-normal/ +``` + +and ran ribostructure + + +``` +rm normal/ -r; ribo structure ./2018-01-27-normal/ -o normal +``` diff --git a/scripts/runEntropyComparisons.sh b/scripts/runEntropyComparisons.sh index 29ec17d..2b0b752 100755 --- a/scripts/runEntropyComparisons.sh +++ b/scripts/runEntropyComparisons.sh @@ -97,7 +97,24 @@ do title="$ ${genus} $ $ ${species} $ ${strain}" echo "$title" ribo snag ./scan_${refacc}/scannedScaffolds.gb ./select_${refacc}/riboSelect_grouped_loci.txt -o ./snag_${refacc}/ --msa_tool mafft -v 3 --title "rDNAs within ${title}" --skip_kmers --skip_blast + cd ../ +done < ./entropy_manifest.tab - cd ../ +# copy and rename results: +mkdir results_figs +counter=1 +while read refacc refother genus species strain gene geneacc phylum order class family sra compstrain +do + echo "Processing $genus $species $strain $geneacc ..." + thisdir="${genus}_${species}_${strain}" + if [ -d "${thisdir}/snag_${gene}" ] + then + + cp ${thisdir}/snag_${refacc}/entropy_plot.png results_figs/${counter}_genome.png + cp ${thisdir}/snag_${gene}/entropy_plot.png results_figs/${counter}_gene.png + counter=$((counter + 1)) + else + echo "no results found" + fi done < ./entropy_manifest.tab