-
Notifications
You must be signed in to change notification settings - Fork 5
/
settings.conf
72 lines (57 loc) · 3.83 KB
/
settings.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
## AutoCoEv CONFIGURATION FILE
## INPUT FILES
PROTEIN="proteins/" # This is the **FOLDER** with proteins list(s)
SPECIES="placental.tsv" # FILE list of species
EXTTREE="placental.nwk" # External species tree file (e.g. from TimeTree). Specify if PhyML/PRANK need a guide.
PAIRLST="" # A list of defined protein pairs (e.g. "negatome_mouse_pairs.tsv", needed only if: PAIRINGMANNER="defined")
## REFERENCE ORGANISM AND ORTHOLOGUES
ORGANISM="10090" # Taxid of the reference organism (e.g. "10090" for M. musculus; "9606" for H. sapiens)
LEVEL="40674" # Level at which to search for orthologues; 2759 (Eukaryota); 33208 (Metazoa); 7742 (Vertebrata); 32523 (Tetrapoda); 40674 (Mammalia)
## WORKING AND DATABASE DIRS
TMP="/var/tmp/CORUM/3047.mmus" # Working folder
DTB="/var/tmp/DB10v1" # Folder where databases are unpacked
## THREADS UTILIZATION
THREADS="$(nproc)" # Number of (logical) cores to use (automatically detected, but you can change it)
## BLAST OPTIONS
DETBLAST="yes" # Detailed BLAST results ("yes", "no")
PIDENT="35.000" # Minimum allowed idenity (%) to the reference organism
PGAPS="25" # Maximum allowed gaps (%) to the reference organism
BLASTCORES="4" # Number of threads to use for BLAST runs. This may or may not work. Leave blank.
# ORTHOLOGUES ASSESSMENT BY GUIDANCE
GUIDANCEMSA="muscle" # MSA method to use (see below for options).
GUIDANCECUT="0.95" # Sequence cutoff value, to exclude too divergent sequences (0: no cut, 0.95, 0.98)
GUIDANCEKEEP="keep" # Shall we clean the additional results ("keep", "clean")? Not implemented yet.
## MSA OPTIONS
MSAMETHOD="prank" # MSA method to use ("mafft", "muscle", "prank"; supported MAFFT aliaces: "mafft-linsi", "mafft-ginsi", "mafft-einsi", "mafft-fftns", "mafft-fftnsi").
MUSCLEOPTIONS="" # Any additional options to pass to MUSCLE
MAFFTOPTIONS="" # Any additional options to pass to MAFFT
PRANKOPTIONS="" # Any additional options to pass to PRANK
PRANKGUIDE="noguide" # Use external guide tree for PRANK ("exguide", "noguide")?
GBLOCKSOPT="-b5=h" # Gblocks options, e.g. allowed gaps: "-b5=h" (half), "-b5=n" (none), "-b5=a" (all)
## PhyML OPTIONS
PHYMLOPTIONS="" # Any additional options to pass to PhyML (e.g. "-m Blosum62")
PHYMLGUIDE="noguide" # Use external guide tree for PhyML ("exguide", "noguide")?
TREESROOT="rooted" # Root the generated trees by TreeBeST? ("rooted" or "noroot")
## PAIRING
PAIRINGMANNER="all" # Pairing manner ("all" or "defined")
MINCOMMONSPCS="20" # Minimum number of common species per protein pair
TREESCAPS="auto" # Tree to use with CAPS ("auto", "phyml", "external")
INCR="1000" # Divide folders of protein pairs into groups of e.g. 1000
## CAPS RUN-TIME OPTIONS
ALPHA="0.01" # Alpha value for threshold cut-off. Do NOT leave blank
BOOT="0.6" # Bootstrap threshold. Do NOT leave blank
CAPSOPTIONS="-c" # Any additional options to pass to CAPS, such as Number of alignments to simulate ("-r 100"), Convergence ("-c"), or Gap ("-g 0.8") threshold (seems buggy).
REFER="-H ${ORGANISM}" # Reference organism sequence for CAPS run, automatically set to ${ORGANISM}
PVALUE="$ALPHA" # Post run P-value cutoff, by default equals to ALPHA
### DATABASES SECTION BELOW ###
## DATABASES version
ORTHODBVER="v101" # Databases download version
## Databases. Names only, no ".tab" or ".gz" file extensions!
GENEXREFALL="odb10v1_gene_xrefs" # UniProt, ENSEMBL, NCBI, GO and InterPro ids associated with Ortho DB gene
OG2GENESALL="odb10v1_OG2genes" # OGs to genes correspondence
ALLFASTA="odb10v1_all_fasta" # AA sequence of the longest isoform for all genes, fasta formatted
## MD5SUMs of databases (gzipped). Change accordingly if version is different!
GENEXREFALLM5="3ab6d2efdc43ed051591514a3cc9044e" # odb10v1_gene_xrefs.tab.gz
OG2GENESALLM5="33e63fa97ee420707cd3cddcb5e282a6" # odb10v1_OG2genes.tab.gz
ALLFASTAM5="831ef830fff549857a4c8d1639a760cb" # odb10v1_all_fasta.tab.gz