user.yml

# Blast options and task
#  updated to blast+ version: 2.2.30+
#
# Options currently implemented for engine are:
#  - tblastn
#  - blastn
# default in user.yml is tblastn
engine: tblastn
#
#
# separate each transcriptome in a individual folder
#  this will create many folders in the output directory
#  each ending with the database name
separate_db: true
#
#
# number of threads to use
#  only valid with separate_db option
use_threads: 2
#
# path to debugging file, change it in respect to user.yml
debug:
  file: output/log.txt
#
#
# Opts are engine specific, the default in user.yml
#  are for tblastn
#
# Example of valid options (as of the writing of this file):
#  - for tblastn (default in user.yml):
# opts: "-max_target_seqs 500 -evalue 1E-20"
#  - for blastn:
# opts: "-dust no -max_target_seqs 500 -evalue 1E-20"
opts: "-max_target_seqs 500 -evalue 1E-20"
#
#
#
# Threshold that should filter out results by identity
#  values can take between 0 and 1 (i.e. 0% and 100%)
identity:
  # minimum range for identity
  min: .40
  # maximum range for identity
  max: 1
#
#
# Filtering options
#
prune_identical:
  #  use_worst: will pick the result with highest identity if false
  #   and the lowest identity if true
  use_worst: false
  # first: the initial filtering step will remove all identical sseqid and
  #  pick the best/worst (depending on use_worst option)
  first: sseqid
  # list: list of subsequent filtering columns, works the same as first
  list:
    - nt_aligned_seq
#
#
# directory where csv are kept to add annotation to the results
#  each csv should have at least 3 columns, with headers
# the csv must be separated by tabs (\t)
#
# column 1 & 2: header should match columns in the trimmed.csv results and
#  each line that matches will be added the following columns
#
# columns 3 and on: header will be the header of the new column and information
#  on these columns will be added to the matched elements
#
# example:
# db          sseqid    expression_level
# backberry   comp20_u1 23
# backberry   comp33_u1 33
# bilberry    comp33_u1 42
# ....
annotation_dir: "db_and_queries/annotation"
#
# Directory that holds all database files in blastdb format
#  if you need to convert a fasta file to this format, you
#  can use the tool from blast+:
#    $ makeblastdb
#  or put the files in db_and_queries/import_db and run the
#  the script that is in that folder
db:
  parent: db_and_queries/db
  # In case list is empty it will use all blast dbs in parent
  #  that have a .nhr extension
  list:
#
# Directories that holds all queries in form of fasta files
# All query files must have as extensions:
#  .fasta .query .fas .fna
query:
  parent: db_and_queries
  # It supports multiple folders holding the queries by
  #  adding new lines in 'folders' options
  # note: all new lines must follow indentation and start
  #  with a dash '-' (without quotes)
  folders:
    - queries

#
# Configuration for finding longest ORF
#  TODO:
#   - max: maximum size of sequence, Inf
orf:
  # stop codon: sequence that indicates where ORF ends
  stop_codon:
    - tag
    - taa
    - tga
  # start codon: sequence that indicates where ORF begins
  start_codon:
    - atg
  #  reverse: search the reverse sequence
  reverse: true
  #  direct: search the given sequence (at least this and reverse must be used)
  direct: true
  #  min: minimum size of sequence
  min: 120