nextflow.config

/*
========================================================================================
    nf-16s-pipe Nextflow config file
========================================================================================
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {
    // Process parameters
    cutadapt {
        // Primer removal
        num_cores               = 1
        error_rate              = 0.1
        indels                  = "True"
        times                   = 1
        overlap                 = 3
        match_read_wildcards    = "False"
        match_adapter_wildcards = "True"
        minimum_length          = 1
        discard_untrimmed       = "False"
        max_expected_errors     = null
        max_n                   = null
        quality_cutoff_5end     = 0
        quality_cutoff_3end     = 0
        quality_base            = 33
    }

    dada2 {
        // Single-end reads
        trunc_len = 0
        trim_left = 0
        max_ee    = 2.0

        // Paired-end reads
        trunc_len_f = 0
        trunc_len_r = 0
        trim_left_f = 0
        trim_left_r = 0
        max_ee_f    = 2.0
        max_ee_r    = 2.0
        min_overlap = 12

        // General parameters
        trunc_q                        = 2
        pooling_method                 = "independent"
        chimera_method                 = "consensus"
        min_fold_parent_over_abundance = 1.0
        num_threads                    = 1   // set to 0 to use all available cores
        num_reads_learn                = 1000000
        hashed_feature_ids             = "True"
    }

    vsearch {
        perc_identity = 0.8
        strand        = "plus"
        num_threads   = 1   // set to 0 to launch one thread per CPU
    }

    classifier {
        method = "sklearn"

        // sklearn-based classifier
        reads_per_batch  = "auto"
        num_jobs         = 1    // set to -1 to launch max worker processes for all CPUs
        pre_dispatch     = "2*n_jobs"
        confidence       = 0.7
        read_orientation = "auto"

        // BLAST+ and VSEARCH consensus classifiers
        max_accepts        = 10
        perc_identity      = 0.8
        query_cov          = 0.8
        strand             = "both"
        min_consensus      = 0.51
        unassignable_label = "Unassigned"

        // Additional params for BLAST+ classifier
        evalue             = 0.001

        // Additional params for VSEARCH classifier
        search_exact   = "False"
        top_hits_only  = "False"
        max_hits       = "all"
        max_rejects    = "all"
        output_no_hits = "True"
        weak_id        = 0.0
        num_threads    = 1   // set to 0 to launch one thread per CPU
    }

    uchime_ref {
        dn          = 1.4
        min_diffs   = 3
        min_div     = 0.8
        min_h       = 0.28
        xn          = 8.0
        num_threads = 1     // set to 0 to launch one thread per CPU
    }

    fastq_split {
        enabled = true
        suffix  = "_split.tsv"
        method  = "sample"
    }

    closed_ref_cluster = true
    taxa_level         = 6    // collapse to genus
    phred_offset       = 33
    vsearch_chimera    = false

    // Use of import/download input processes
    generate_input = true    

    // Validation of parameters 
    validate_parameters = true

    // Run 16S by default, but toggle for ITS
    run_its = false

    // Reference files
    otu_ref_url            = "https://data.qiime2.org/2024.2/common/silva-138-99-seqs.qza"
    trained_classifier_url = "https://data.qiime2.org/2024.2/common/silva-138-99-nb-classifier.qza"
    taxonomy_ref_url       = "https://data.qiime2.org/2024.2/common/silva-138-99-tax.qza"
    qiime_release          = "2024.2"
    qiime_container        = "quay.io/qiime2/amplicon:${params.qiime_release}"
    fastqc_release         = "v0.11.9_cv8"
    fastqc_container       = "biocontainers/fastqc:${params.fastqc_release}"
    multiqc_release        = "v1.18"
    multiqc_container      = "ewels/multiqc:${params.multiqc_release}"
    fondue_release         = "2023.2-ps"
    fondue_container       = "linathekim/q2-fondue:${params.fondue_release}"
    pandas_release         = "1.4.2"
    pandas_container       = "amancevice/pandas:${params.pandas_release}"
}

// Determine necessary QIIME 2 Conda envs to incorporate
sys_properties = System.getProperties()
executor_sys  = sys_properties['os.name'].toLowerCase()
if (executor_sys.contains("mac")) {
    sys_abbreviation = "osx"
} else {
    sys_abbreviation = "linux"   // Note Windows users need QIIME 2 on WSL
}
params.qiime_conda_env   = "${baseDir}/assets/qiime2-amplicon-2024.2-py38-${sys_abbreviation}-conda.yml"
params.fondue_conda_env  = "${baseDir}/assets/q2-fondue-2023.2-${sys_abbreviation}.yml"
params.fastqc_conda_env  = "bioconda::fastqc"
params.multiqc_conda_env = "bioconda::multiqc typing-extensions"
params.pandas_conda_env  = "pandas"

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

profiles {
    parallel {
        includeConfig 'conf/parallel.config'
    }

    conda {
        includeConfig 'conf/conda.config'
    }

    docker {
        includeConfig 'conf/docker.config'
    }

    singularity {
        includeConfig 'conf/singularity.config'
    }
}

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss' )
timeline {
    enabled = false
    file    = "execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = false
    file    = "execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = false
    file    = "pipeline_dag_${trace_timestamp}.mmd"
}

manifest {
    name            = 'nf-ducken'
    author          = 'Food Systems Biotechnology Laboratory of ETH Zurich'
    homePage        = 'https://github.com/bokulich-lab/nf-ducken'
    description     = 'Process amplicon meta-analysis data, from NCBI accession IDs to taxonomic diversity metrics.'
    mainScript      = 'main.nf'
    nextflowVersion = '!>=21.0.0'
    version         = '0.1'
    defaultBranch   = 'main'
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit; copied from nf-core/ampliseq
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}

// Function to establish appropriate environments respecting hierarchy