-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconfig.yaml
55 lines (55 loc) · 2.78 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# ------ Resssources ------
threads : 8 # single task nb threads
task_memory: 200 # nb of Go of Ram you expect bedtool and rpsblast to take
Percent_memory: 0.5 # maximum quantity of memory the snakemake can take
# ------ Output folder ------
execution_directory: ~/path/to/output/folder/ # path to output folder,
# ------ Path to data folder ------
# - all samples are required to be stored in independant folders, the folder name will later define sample names in profiles.
# - only paired reads
# - there must be a unique R1 file and R2 file
# - "R1" and "R2" must be in the filenames
# - only following extensions : .fq, .fq.gz, .fastq, .fastq.gz, .fa, .fa.gz, .fasta, .fasta.gz
data: ~/path/to/data/folder/ # path to data folder,
# ------ Samples preprocessing ------
filtering: /path/to/fasta.file # database for read filtering prior to pipeline
# ------ Assembly parameters ------
assembly:
parameters: --k-min 29 # additional parameters to be passed to megahit
per_sample: [folder_name|sample*] # a subfolder name where the anlysis is done | a regular expression to select which samples to runs
groups: # will create 3 directories PerAD/AD7/AD7_1_11, PerAD/AD1, AD2 :
PerAD/AD7/AD7_1_11:
['AD7_W@(?([0-9])|1[0-1])?(_Repeat?)'] # you can put here any regular expression that ls can support. Here, it will mach with AD7_W1 to AD7_W11 ending or not with repeat
PerAD/AD1:
["*"]
AD2:
["sample1"]
# ------ Binning parameters------
binning :
concoct :
contig_size : 1000 # min contig size considered
metabat2 :
contig_size : 1500 # min contig size considered
# ------ Annotation parameters ------
annotation: # support only diamond for now
diamond :
CARD:
db: [/mnt/gpfs/seb/Database/CARD/CARD_DB_0.9.24.dmnd]
# Annotation is a tsv file, 1st column is gene name, 2nd is new name (ex KO,COG), all others are optional/additional info (function, definition, pathway...)
annotation: [/mnt/gpfs/seb/Database/Kihyun_Lee_Databases/CARD_Annotation_V2.tsv]
# min_Bitscore , max_Evalue , min_Pid , min_subject_pid , min_coverage , min_Query_coverage
filter: [0,1e-5,0.8,0,0.2,0.8]
IntI:
db: [/mnt/gpfs/seb/Database/Kihyun_Lee_Databases/IntI.db.dmnd]
# Annotation can be none, in this case, only gene name appears
annotation: [none]
# min_Bitscore , max_Evalue , min_Pid , min_subject_pid , min_coverage , min_Query_coverage
filter: [0,1e-5,0.8,0,0.2,0.8]
color: "#ff0000" # orf graph
cat_db: /mnt/gpfs/seb/Database/NR_Cat
checkm: /mnt/gpfs/seb/Database/CheckM
ip_db: /mnt/gpfs/seb/Database/ip
kraken_db: /mnt/gpfs/seb/Database/Kraken_Standard
kofamscan:
profiles: /mnt/gpfs/seb/Database/KEGG/kofamscan/ko_hmm.hmm
ko_list: /mnt/gpfs/seb/Database/KEGG/kofamscan/ko_list