Skip to content

Commit

Permalink
merge branch dev
Browse files Browse the repository at this point in the history
  • Loading branch information
mariabernard committed Aug 8, 2017
2 parents 204f187 + 9c6d2df commit fa5b2e0
Show file tree
Hide file tree
Showing 80 changed files with 1,093 additions and 422 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
FROGS.Rproj
24 changes: 21 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,26 @@
RScript
Version : >= 3.3.0
Named as : RScript
Tools : all FROGS_Phyloseq and FROGS_DESeq
Tools : all FROGS_Phyloseq
Download : https://cran.r-project.org/

Phangorn R package
Version : depending on your R version
Tools : FROGS_Tree
Installation in R session : # https://cran.r-project.org/web/packages/phangorn/index.html
install.packages("phangorn")
Test in R session : library(phangorn)

Rmarkdown R package
Version : depending on your R version
Tools : all FROGS_Phyloseq and FROGS_DESeq
Tools : all FROGS_Phyloseq
Install in R session : # https://cran.r-project.org/web/packages/rmarkdown/index.html
install.packages("rmarkdown")

Pandoc
Version : >= 1.12.3
Named as : pandoc
Tools : all FROGS_Phyloseq and FROGS_DESeq
Tools : all FROGS_Phyloseq
Download/Installation : # http://pandoc.org/installing.html#linux or simply soft-link pandoc binary from RStudio path (if you have Rstudio installed)

Phyloseq R package
Expand All @@ -204,6 +211,7 @@
install.packages("gridExtra")
Test in R session : library(gridExtra)


### 4. Check intallation
To check your installation you can type:
cd <FROGS_PATH>/test
Expand Down Expand Up @@ -259,6 +267,7 @@
Example:
...
<section id="FROGS_wrappers" name="FROGS">
<label text="OTUs reconstruction" id="FROGS_OTU" />
<tool file="FROGS/app/upload_tar.xml" />
<tool file="FROGS/app/demultiplex.xml" />
<tool file="FROGS/app/preprocess.xml" />
Expand All @@ -272,6 +281,15 @@
<tool file="FROGS/app/biom_to_tsv.xml" />
<tool file="FROGS/app/tsv_to_biom.xml" />
<tool file="FROGS/app/normalisation.xml" />
<tool file="FROGS/app/tree.xml" />
<label text="OTUs structure and composition analysis" id="FROGS_Phyloseq" />
<tool file="FROGS/app/r_import_data.xml" />
<tool file="FROGS/app/r_composition.xml" />
<tool file="FROGS/app/r_alpha_diversity.xml" />
<tool file="FROGS/app/r_beta_diversity.xml" />
<tool file="FROGS/app/r_structure.xml" />
<tool file="FROGS/app/r_clustering.xml" />
<tool file="FROGS/app/r_manova.xml" />
</section>
...
Note:
Expand Down
4 changes: 4 additions & 0 deletions RELEASES_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
* FROGS Phyloseq Clustering
* FROGS Phyloseq Manova

### libexec program added:
* rooted_tree.R : Rscript to root FastTree tree. (used by Tree)

### Bugs fixes:
* Preprocess : min overlap at least equal to 1
* biom2tsv : not working with stdBiom containing RDP affiliation

### Functions added:
* Preprocess: add Flash mismatch rate option
Expand Down
8 changes: 4 additions & 4 deletions assessment/bin/assessRealMock.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
__license__ = 'GNU General Public License'
__version__ = '1.0.0'
__email__ = '[email protected]'
__status__ = 'dev'
__status__ = 'prod'


import re
Expand Down Expand Up @@ -138,15 +138,15 @@ def get_expected( abund_file ):
expected_by_depth = dict()
FH_expected = open(abund_file)
for line in FH_expected:
taxonomy, count = line.strip().split()
taxonomy, count = line.strip().split("\t")
clean_taxonomy = getCleanedTaxonomy(taxonomy)
for rank_depth in range(len(clean_taxonomy)):
rank_taxonomy = ";".join(clean_taxonomy[:rank_depth + 1])
if rank_depth not in expected_by_depth:
expected_by_depth[rank_depth] = dict()
if rank_taxonomy not in expected_by_depth[rank_depth]:
expected_by_depth[rank_depth][rank_taxonomy] = 0
expected_by_depth[rank_depth][rank_taxonomy] += int(count)
expected_by_depth[rank_depth][rank_taxonomy] += float(count)
FH_expected.close()
return expected_by_depth

Expand All @@ -155,7 +155,7 @@ def get_checked( abund_file, checked_sample, taxonomy_key, expected_by_depth ):
checked_by_depth = dict()
biom = BiomIO.from_json(abund_file)
for current_obs in biom.get_observations():
clean_taxonomy = getCleanedTaxonomy(current_obs["metadata"][taxonomy_key])
clean_taxonomy = getCleanedTaxonomy(current_obs["metadata"][taxonomy_key]) if current_obs["metadata"][taxonomy_key] is not None else ["unknown_taxa"]*len(expected_by_depth)
count = biom.get_count(current_obs["id"], checked_sample)
if count > 0:
if clean_taxonomy[len(clean_taxonomy)-1] == "Multi-affiliation":
Expand Down
21 changes: 18 additions & 3 deletions assessment/bin/assessment_4real.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__author__ = 'Plateforme bioinformatique Toulouse - Sigenae Jouy en Josas'
__copyright__ = 'Copyright (C) 2016 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.1.1'
__version__ = '1.2.1'
__email__ = '[email protected]'
__status__ = 'prod'

Expand Down Expand Up @@ -163,7 +163,7 @@ def uparse(udb_databank, reads_directory, out_biom, out_fasta, min_length, max_l
)########################################################## Problem threads > 1


def mothur(affiliation_databank, affiliation_taxonomy, mothur_databank, mothur_taxonomy, reads_directory, out_biom, out_fasta, min_length, max_length, pcr_start, pcr_end, kept_start, kept_end, nb_cpus):
def mothur(affiliation_databank, affiliation_taxonomy, mothur_databank, mothur_taxonomy, reads_directory, out_biom, out_fasta, min_length, max_length, pcr_start, pcr_end, kept_start, kept_end, diffs, nb_cpus):
"""
@summary: Launch mothur pipeline.
@param affiliation_databank: [str] Path to the databank used in affiliation. If affiliation_databank is None the affiliation step is skipped.
Expand All @@ -179,6 +179,7 @@ def mothur(affiliation_databank, affiliation_taxonomy, mothur_databank, mothur_t
@param pcr_end: [int] End position for amplicon region. This value speedup pipeline by databank restriction.
@param kept_start: [int] In PCR region the start position kept. All sequences must have same size.
@param kept_end: [int] In PCR region the end position kept. All sequences must have same size.
@param diffs : [int] Number of mismatch to pre.cluster sequence (1 difference for every 100 bp of sequence)
@param nb_cpus: [int] Number of used CPUs.
"""
exec_cmd(
Expand All @@ -190,6 +191,7 @@ def mothur(affiliation_databank, affiliation_taxonomy, mothur_databank, mothur_t
+ " --pcr-end " + str(pcr_end) \
+ " --kept-start " + str(kept_start) \
+ " --kept-end " + str(kept_end) \
+ " --preclusters-difference " + str(diffs) \
+ (" --affiliation-databank-fasta " + affiliation_databank if affiliation_databank is not None else "") \
+ (" --affiliation-databank-tax " + affiliation_taxonomy if affiliation_taxonomy is not None else "") \
+ " --restriction-databank-fasta " + mothur_databank \
Expand Down Expand Up @@ -293,6 +295,15 @@ def frogs_affiliation(fasta_databank, in_biom, in_fasta, output_biom, nb_cpus):
"pcr_end": 26000,
"kept_start": 1862,
"kept_end": 10588
},
"V4V4_forward100": {
"min_length": 50,
"max_length": 150,
"pcr_start": 12000,
"pcr_end": 26000,
"kept_start": 1862,
"kept_end": 4307,
"diffs" : 1
}
}

Expand All @@ -309,6 +320,10 @@ def frogs_affiliation(fasta_databank, in_biom, in_fasta, output_biom, nb_cpus):
pcr_end = primers_param[current_primers]["pcr_end"]
kept_start = primers_param[current_primers]["kept_start"]
kept_end = primers_param[current_primers]["kept_end"]
if "diffs" in primers_param[current_primers]:
diffs = primers_param[current_primers]["diffs"]
else:
diffs = 2
for current_nb_sp in args.nb_sp:
for dataset_idx in args.datasets:
for current_distribution in args.distribution_laws:
Expand Down Expand Up @@ -369,7 +384,7 @@ def frogs_affiliation(fasta_databank, in_biom, in_fasta, output_biom, nb_cpus):
mothur_assess_affi = os.path.join(mothur_out_dir, "mothur_affiResults.txt")
mothur_assess_clst = os.path.join(mothur_out_dir, "mothur_OTUResults.txt")
# Execution
mothur(args.affiliation_databank_fasta, args.affiliation_databank_tax, args.mothur_databank, args.mothur_taxonomy, reads_directory, mothur_biom, mothur_fasta, min_length, max_length, pcr_start, pcr_end, kept_start, kept_end, args.nb_cpus)
mothur(args.affiliation_databank_fasta, args.affiliation_databank_tax, args.mothur_databank, args.mothur_taxonomy, reads_directory, mothur_biom, mothur_fasta, min_length, max_length, pcr_start, pcr_end, kept_start, kept_end, diffs, args.nb_cpus)

# QIIME
if "qiime" in args.pipelines:
Expand Down
13 changes: 6 additions & 7 deletions assessment/bin/qiime_4real.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,21 @@ def exec_cmd( cmd, output=None ):
file_list=",".join([os.path.join(args.input_folder,f) for f in os.listdir(args.input_folder) ] )
sample_name_list=",".join([f.split("-")[0] for f in os.listdir(args.input_folder)])

exec_cmd("qiime; split_libraries_fastq.py -i " + file_list \
exec_cmd("split_libraries_fastq.py -i " + file_list \
+ " --sample_ids " + sample_name_list \
+" -o " + os.path.join(working_path_prefix, "qiime_preprocess") \
+" --barcode_type 'not-barcoded' " \
+" --phred_offset 33")
+" --barcode_type 'not-barcoded' " )

merge_fasta=os.path.join(working_path_prefix,"qiime_preprocess","seqs.fna")

# Launch chimera identification (in Qiime)
exec_cmd("qiime; identify_chimeric_seqs.py -i "+ merge_fasta \
exec_cmd("identify_chimeric_seqs.py -i "+ merge_fasta \
+ " -m usearch61 --suppress_usearch61_ref " \
+ " -o " + os.path.join(working_path_prefix,"usearch61_chimeras") )

# Remove chimera
qiime_input_fasta=os.path.join(working_path_prefix,"usearch61_chimeras","seqs_chimeras_filtered.fna")
exec_cmd("qiime; filter_fasta.py -f " + merge_fasta \
exec_cmd("filter_fasta.py -f " + merge_fasta \
+ " -o " + qiime_input_fasta \
+ " -s " + os.path.join(working_path_prefix,"usearch61_chimeras","chimeras.txt") \
+ " -n")
Expand All @@ -97,7 +96,7 @@ def exec_cmd( cmd, output=None ):
if args.nb_cpus > 1 :
cpus_opt = " -aO "+str(args.nb_cpus)

qiime_command ="qiime; pick_open_reference_otus.py -i " + qiime_input_fasta\
qiime_command ="pick_open_reference_otus.py -i " + qiime_input_fasta\
+ cpus_opt \
+ " -o "+ os.path.join(working_path_prefix, "pick_open_reference_otus") \
+ " -r "+ args.ref_fasta \
Expand All @@ -107,7 +106,7 @@ def exec_cmd( cmd, output=None ):

qiime_fasta=os.path.join(working_path_prefix, "pick_open_reference_otus","rep_set.fna")
if args.ref_tax is not None:
exec_cmd("qiime; assign_taxonomy.py -o " + os.path.join(working_path_prefix,"uclust_assigned_taxonomy") \
exec_cmd("assign_taxonomy.py -o " + os.path.join(working_path_prefix,"uclust_assigned_taxonomy") \
+ " -i " + qiime_fasta \
+ " -t " + args.ref_tax \
+ " -r " + args.ref_fasta )
Expand Down
2 changes: 1 addition & 1 deletion lib/frogsBiom.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.13.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import re
Expand Down
2 changes: 1 addition & 1 deletion lib/frogsNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.2.1'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'dev'


Expand Down
2 changes: 1 addition & 1 deletion lib/frogsSequenceIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0.2'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import gzip
Expand Down
6 changes: 3 additions & 3 deletions lib/frogsUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.2.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down Expand Up @@ -133,15 +133,15 @@ def submit(self, log_file=None):
if log_file is not None:
FH_log = Logger( log_file )
FH_log.write( '# ' + self.description + ' (' + os.path.basename(self.program) + ' version : ' + self.get_version() + ')\n' )
FH_log.write( 'Command:\n\t' + self.get_cmd() + '\n' )
FH_log.write( 'Command:\n\t' + self.get_cmd() + '\n\n' )
FH_log.write( 'Execution:\n\tstart: ' + time.strftime("%d %b %Y %H:%M:%S", time.localtime()) + '\n' )
FH_log.close()
# Process
subprocess.check_output( self.get_cmd(), shell=True )
# Log
if log_file is not None:
FH_log = Logger( log_file )
FH_log.write( '\tend: ' + time.strftime("%d %b %Y %H:%M:%S", time.localtime()) + '\n' )
FH_log.write( '\tend: ' + time.strftime("%d %b %Y %H:%M:%S", time.localtime()) + '\n\n' )
FH_log.close()
# Post-process results
self.parser(log_file)
Expand Down
2 changes: 1 addition & 1 deletion libexec/addAffiliation2biom.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '2.3.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
14 changes: 10 additions & 4 deletions libexec/biom2tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
__author__ = 'Maria Bernard - Sigenae AND Frederic Escudie - Plateforme bioinformatique Toulouse'
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.4.0'
__email__ = 'frogs@toulouse.inra.fr'
__version__ = '1.4.1'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down Expand Up @@ -62,12 +62,18 @@ def observation_line_parts( observation, count_by_sample, fields, list_separator
elif current_field == '@observation_sum':
line.append( str(sum(count_by_sample)) )
elif current_field == "@rdp_tax_and_bootstrap":
rdp_and_bootstrap = ""
rdp_and_bootstrap = ""
if issubclass(observation['metadata']["rdp_taxonomy"].__class__, list) :
rdp_taxonomy = observation['metadata']["rdp_taxonomy"]
rdp_bootstrap = observation['metadata']["rdp_bootstrap"]
for i, tax in enumerate(rdp_taxonomy):
rdp_and_bootstrap += tax + ";(" + str(rdp_bootstrap[i]) + ");" # tax1;(boots1);tax2;(boots2);
line.append(str(rdp_and_bootstrap))
else:
rdp_taxonomy = observation['metadata']["rdp_taxonomy"].split(";")
rdp_bootstrap = observation['metadata']["rdp_bootstrap"].split(";")
for i, tax in enumerate(rdp_taxonomy):
rdp_and_bootstrap += tax + ";(" + str(rdp_bootstrap[i]) + ");" # tax1;(boots1);tax2;(boots2);
line.append(str(rdp_and_bootstrap))
elif current_field == "@blast_perc_identity":
if len(observation['metadata']["blast_affiliations"]) > 0:
line.append( str(uniq(observation['metadata']["blast_affiliations"], "perc_identity", "multi-identity")) )
Expand Down
2 changes: 1 addition & 1 deletion libexec/biomFastaUpdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0.1'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/biomTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.10.1'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'beta'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/derepSamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.5.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/extractSwarmsFasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.3.1'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/filterSeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.4.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/multiAffiFromBiom.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.3.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/parallelChimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.7.1'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
2 changes: 1 addition & 1 deletion libexec/remove454Adapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
__copyright__ = 'Copyright (C) 2015 INRA'
__license__ = 'GNU General Public License'
__version__ = '0.5.0'
__email__ = 'frogs@toulouse.inra.fr'
__email__ = '[email protected]'
__status__ = 'prod'

import os
Expand Down
Loading

0 comments on commit fa5b2e0

Please sign in to comment.