conf_all_defaults.txt

# 本配置文件用于给GETA程序运行中调用的各项命令提供其详细参数。位于GETA软件目录下的conf_all_defaults.txt文件是GETA程序运行所必须的，为GETA提供了各项命令默认的参数信息，缺少则程序无法运行。

[para_RepeatMasker]
--min_coverge_ratio 0.25
# GETA调用para_RepeatMasker命令进行并行化RepeatMasker分析，能加快对基因组的重复序列分析速度。--min_coverge_ratio参数表示当鉴定的基因组序列和目标转座子序列重叠超过指定阈值，才认可其为转座子序列并进行屏蔽。

[trimmomatic]
TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:50 TOPHRED33
# In the module NGSReads_prediction, trimmomatic was invoked to process the NGS reads.
# GETA调用NGSReads_prediction程序进行分析时，使用了trimmomatic进行转录组数据的质量控制。

[hisat2]
--min-intronlen 20 --max-intronlen 20000 --dta --score-min L,0.0,-0.4 -k 1
# In the module NGSReads_prediction, hisat2 was invoked to align NGS reads to genome sequences. During the step of hisat2: -x was set to "genome"; -p was passed by --cpu; --rna-strandness RF was passed by --strand_specific.

[Sam2Transfrag]
--fraction 0.05 --min_expressed_base_depth 2 --max_expressed_base_depth 50 --min_junction_depth 2 --max_junction_depth 50 --min_fragment_count_per_transfrag 10
# In the module NGSReads_prediction, NGSReads_prediction.02Sam2Transfrag was invoked to get transcripts from SAM file.
# During the step of sam2transfrag: --no_strand_specific was passed reversely by --strand_specific; 
# The defalut value means: the dynamic coverage threshold of mapping regions (come frome SAM file) was determinated by the maximum base depth of each region * 0.05, as well as this threshold should between 2 and 50.

[NGSReads_prediction]
--min_protein_length 100 --genetic_code 1

[homolog_prediction]
--identity 0.2 --evalue 1e-9 --homolog_coverage 0.3 --max_hits_num_per_match_region 10 --max_hit_num_per_single_species 2 --method all --threshod_ratio_of_intron_Supported_times 0.0

[GFF3_merging_and_removing_redundancy]
--intron_score 0.3 --complete5p_score 0.5 --complete3p_score 0.5 --first_file_score 0.5 --overlap_coverage 0.3

[geneModels2AugusutsTrainingInput]
--min_evalue 1e-9 --min_identity 0.8 --min_coverage_ratio 0.8 --min_cds_num 2 --min_cds_length 450 --min_cds_exon_ratio 0.60
# During the step of geneModels2AugusutsTrainingInput: --cpu was passed by --CPU; --out_prefix was set to ati.
# if gene models number of ati.filter2.gff3 was few, recommed to decrease --min_cds_exon_ratio and increase --min_coverage_ratio.

[BGM2AT]
--min_gene_number_for_augustus_training 500 --gene_number_for_accuracy_detection 200 --gene_models_minimum_num_for_test 100 --gene_models_maximum_num_for_test 600 --gene_models_ratio_for_test 0.2 --gene_models_num_per_test 200 --pstep 6 --method1_rounds 6 --method2_rounds 3 --optimize_augustus_method 3 --min_intron_len 20
# During the step of BGM2AT: --flanking_length was automatically calculated; --stopAfterFirstEtraining was set or not set at different steps; --onlytrain_GFF3 was set to a intermediate file.

[prepareAugusutusHints]
--margin 20 --CDS_ratio 0.333
# note: to obtain the exonpart or cdspart hints, the margin 20bp will be removed.

[paraAugusutusWithHints]
--gene_prefix augustus --min_intron_len 20
# During the step of paraAugusutusWithHints: --species was passed by --augustus_species; --cpu was passed by --CPU; --segmentSize and --overlapSize were automatically calculated.

[GFF3_remove_genes_in_repeats]
--ratio 0.6 --min_coverge_ratio 0.6 --ignore_Simple_repeat --ignore_Unknown

[pickout_reliable_geneModels]
--reliable_keyword excellent --min_exon_hint_ratio_for_augustus 0.4 --threshold_CDS_ratio 0.5

[GFF3_database_validation]
--hmm_evalue1 1e-5 --hmm_evalue2 1e-3 --hmm_length 80 --hmm_coverage 0.25 --blast_evalue 1e-9 --blast_CIP 0.4 --blast_coverage 0.4 --hmm_evalue_onlyOne 1e-7 --hmm_coverage_onlyOne 0.5 --blast_evalue_onlyOne 1e-12 --blast_CIP_onlyOne 0.6 --blast_coverage_onlyOne 0.6