-
Notifications
You must be signed in to change notification settings - Fork 25
/
conf_all_defaults.txt
55 lines (41 loc) · 3.97 KB
/
conf_all_defaults.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# 本配置文件用于给GETA程序运行中调用的各项命令提供其详细参数。位于GETA软件目录下的conf_all_defaults.txt文件是GETA程序运行所必须的,为GETA提供了各项命令默认的参数信息,缺少则程序无法运行。
[para_RepeatMasker]
--min_coverge_ratio 0.25
# GETA调用para_RepeatMasker命令进行并行化RepeatMasker分析,能加快对基因组的重复序列分析速度。--min_coverge_ratio参数表示当鉴定的基因组序列和目标转座子序列重叠超过指定阈值,才认可其为转座子序列并进行屏蔽。
[trimmomatic]
TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:50 TOPHRED33
# In the module NGSReads_prediction, trimmomatic was invoked to process the NGS reads.
# GETA调用NGSReads_prediction程序进行分析时,使用了trimmomatic进行转录组数据的质量控制。
[hisat2]
--min-intronlen 20 --max-intronlen 20000 --dta --score-min L,0.0,-0.4 -k 1
# In the module NGSReads_prediction, hisat2 was invoked to align NGS reads to genome sequences. During the step of hisat2: -x was set to "genome"; -p was passed by --cpu; --rna-strandness RF was passed by --strand_specific.
[Sam2Transfrag]
--fraction 0.05 --min_expressed_base_depth 2 --max_expressed_base_depth 50 --min_junction_depth 2 --max_junction_depth 50 --min_fragment_count_per_transfrag 10
# In the module NGSReads_prediction, NGSReads_prediction.02Sam2Transfrag was invoked to get transcripts from SAM file.
# During the step of sam2transfrag: --no_strand_specific was passed reversely by --strand_specific;
# The defalut value means: the dynamic coverage threshold of mapping regions (come frome SAM file) was determinated by the maximum base depth of each region * 0.05, as well as this threshold should between 2 and 50.
[NGSReads_prediction]
--min_protein_length 100 --genetic_code 1
[homolog_prediction]
--identity 0.2 --evalue 1e-9 --homolog_coverage 0.3 --max_hits_num_per_match_region 10 --max_hit_num_per_single_species 2 --method all --threshod_ratio_of_intron_Supported_times 0.0
[GFF3_merging_and_removing_redundancy]
--intron_score 0.3 --complete5p_score 0.5 --complete3p_score 0.5 --first_file_score 0.5 --overlap_coverage 0.3
[geneModels2AugusutsTrainingInput]
--min_evalue 1e-9 --min_identity 0.8 --min_coverage_ratio 0.8 --min_cds_num 2 --min_cds_length 450 --min_cds_exon_ratio 0.60
# During the step of geneModels2AugusutsTrainingInput: --cpu was passed by --CPU; --out_prefix was set to ati.
# if gene models number of ati.filter2.gff3 was few, recommed to decrease --min_cds_exon_ratio and increase --min_coverage_ratio.
[BGM2AT]
--min_gene_number_for_augustus_training 500 --gene_number_for_accuracy_detection 200 --gene_models_minimum_num_for_test 100 --gene_models_maximum_num_for_test 600 --gene_models_ratio_for_test 0.2 --gene_models_num_per_test 200 --pstep 6 --method1_rounds 6 --method2_rounds 3 --optimize_augustus_method 3 --min_intron_len 20
# During the step of BGM2AT: --flanking_length was automatically calculated; --stopAfterFirstEtraining was set or not set at different steps; --onlytrain_GFF3 was set to a intermediate file.
[prepareAugusutusHints]
--margin 20 --CDS_ratio 0.333
# note: to obtain the exonpart or cdspart hints, the margin 20bp will be removed.
[paraAugusutusWithHints]
--gene_prefix augustus --min_intron_len 20
# During the step of paraAugusutusWithHints: --species was passed by --augustus_species; --cpu was passed by --CPU; --segmentSize and --overlapSize were automatically calculated.
[GFF3_remove_genes_in_repeats]
--ratio 0.6 --min_coverge_ratio 0.6 --ignore_Simple_repeat --ignore_Unknown
[pickout_reliable_geneModels]
--reliable_keyword excellent --min_exon_hint_ratio_for_augustus 0.4 --threshold_CDS_ratio 0.5
[GFF3_database_validation]
--hmm_evalue1 1e-5 --hmm_evalue2 1e-3 --hmm_length 80 --hmm_coverage 0.25 --blast_evalue 1e-9 --blast_CIP 0.4 --blast_coverage 0.4 --hmm_evalue_onlyOne 1e-7 --hmm_coverage_onlyOne 0.5 --blast_evalue_onlyOne 1e-12 --blast_CIP_onlyOne 0.6 --blast_coverage_onlyOne 0.6