diff --git a/CHANGELOG.md b/CHANGELOG.md
index b938b44..c0b209a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# v0.7 changelog
+* New feature to limit reads to N reads (`--downsample`) in case of samples with extremely high depth.
+* Updated Qiime2 to 2023.2 version (This should not affect downstream results) to get rid of
+annoying installation issue with old Qiime2 version.
+
 # v0.5 changelog
 * Allow splitting samples into group for dada2 noise (pool column in metadata TSV, see
   GitHub documentation [here](https://github.com/PacificBiosciences/pb-16S-nf#pooling)).
diff --git a/README.md b/README.md
index bef9595..c6ea2a3 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,7 @@ nextflow run main.nf --help
   --front_p    Forward primer sequence. Default to F27. (default: AGRGTTYGATYMTGGCTCAG)
   --adapter_p    Reverse primer sequence. Default to R1492. (default: AAGTCGTAACAAGGTARCY)
   --filterQ    Filter input reads above this Q value (default: 20).
+  --downsample    Limit reads to a maximum of N reads if there are more than N reads (default: off)
   --max_ee    DADA2 max_EE parameter. Reads with number of expected errors higher than
               this value will be discarded (default: 2)
   --minQ    DADA2 minQ parameter. Reads with any base lower than this score 
diff --git a/env/qiime2-2023.2-py38-linux-conda.yml b/env/qiime2-2023.2-py38-linux-conda.yml
new file mode 100644
index 0000000..e47d2a4
--- /dev/null
+++ b/env/qiime2-2023.2-py38-linux-conda.yml
@@ -0,0 +1,684 @@
+channels:
+  - qiime2/label/r2023.2
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - _r-mutex=1.0.1
+  - alsa-lib=1.2.8
+  - altair=4.2.2
+  - anyio=3.6.2
+  - argcomplete=2.0.0
+  - argon2-cffi=21.3.0
+  - argon2-cffi-bindings=21.2.0
+  - astor=0.8.1
+  - asttokens=2.2.1
+  - atpublic=3.0.1
+  - attr=2.5.1
+  - attrs=22.2.0
+  - backcall=0.2.0
+  - backports=1.0
+  - backports.functools_lru_cache=1.6.4
+  - beautifulsoup4=4.11.2
+  - bibtexparser=1.4.0
+  - binutils_impl_linux-64=2.40
+  - bioconductor-ancombc=2.0.1
+  - bioconductor-beachmat=2.14.0
+  - bioconductor-biobase=2.58.0
+  - bioconductor-biocbaseutils=1.0.0
+  - bioconductor-biocgenerics=0.44.0
+  - bioconductor-biocneighbors=1.16.0
+  - bioconductor-biocparallel=1.32.5
+  - bioconductor-biocsingular=1.14.0
+  - bioconductor-biomformat=1.26.0
+  - bioconductor-biostrings=2.66.0
+  - bioconductor-dada2=1.26.0
+  - bioconductor-data-packages=20230202
+  - bioconductor-decipher=2.26.0
+  - bioconductor-decontam=1.18.0
+  - bioconductor-delayedarray=0.24.0
+  - bioconductor-delayedmatrixstats=1.20.0
+  - bioconductor-dirichletmultinomial=1.40.0
+  - bioconductor-genomeinfodb=1.34.8
+  - bioconductor-genomeinfodbdata=1.2.9
+  - bioconductor-genomicalignments=1.34.0
+  - bioconductor-genomicranges=1.50.0
+  - bioconductor-iranges=2.32.0
+  - bioconductor-matrixgenerics=1.10.0
+  - bioconductor-mia=1.6.0
+  - bioconductor-multiassayexperiment=1.24.0
+  - bioconductor-multtest=2.54.0
+  - bioconductor-phyloseq=1.42.0
+  - bioconductor-rhdf5=2.42.0
+  - bioconductor-rhdf5filters=1.10.0
+  - bioconductor-rhdf5lib=1.20.0
+  - bioconductor-rhtslib=2.0.0
+  - bioconductor-rsamtools=2.14.0
+  - bioconductor-s4vectors=0.36.0
+  - bioconductor-scaledmatrix=1.6.0
+  - bioconductor-scater=1.26.0
+  - bioconductor-scuttle=1.8.0
+  - bioconductor-shortread=1.56.0
+  - bioconductor-singlecellexperiment=1.20.0
+  - bioconductor-sparsematrixstats=1.10.0
+  - bioconductor-summarizedexperiment=1.28.0
+  - bioconductor-treeio=1.22.0
+  - bioconductor-treesummarizedexperiment=2.6.0
+  - bioconductor-xvector=0.38.0
+  - bioconductor-zlibbioc=1.44.0
+  - biom-format=2.1.12
+  - blast=2.13.0
+  - bleach=6.0.0
+  - bokeh=3.0.3
+  - bowtie2=2.5.1
+  - brotli=1.0.9
+  - brotli-bin=1.0.9
+  - brotlipy=0.7.0
+  - bwidget=1.9.14
+  - bzip2=1.0.8
+  - c-ares=1.18.1
+  - ca-certificates=2022.12.7
+  - cachecontrol=0.12.11
+  - cached_property=1.5.2
+  - cairo=1.16.0
+  - certifi=2022.12.7
+  - cffi=1.15.1
+  - charset-normalizer=2.1.1
+  - click=8.1.3
+  - colorama=0.4.6
+  - comm=0.1.2
+  - contourpy=1.0.7
+  - cryptography=39.0.0
+  - curl=7.87.0
+  - cutadapt=4.2
+  - cycler=0.11.0
+  - cython=0.29.33
+  - dbus=1.13.6
+  - deblur=1.1.1
+  - debugpy=1.6.6
+  - decorator=4.4.2
+  - defusedxml=0.7.1
+  - dendropy=4.5.2
+  - dill=0.3.6
+  - dnaio=0.10.0
+  - emperor=1.0.3
+  - entrez-direct=16.2
+  - entrypoints=0.4
+  - exceptiongroup=1.1.0
+  - executing=1.2.0
+  - expat=2.5.0
+  - fastcluster=1.2.6
+  - fasttree=2.1.11
+  - fftw=3.3.10
+  - flit-core=3.8.0
+  - flufl.lock=7.1
+  - font-ttf-dejavu-sans-mono=2.37
+  - font-ttf-inconsolata=3.000
+  - font-ttf-source-code-pro=2.038
+  - font-ttf-ubuntu=0.83
+  - fontconfig=2.14.2
+  - fonts-conda-ecosystem=1
+  - fonts-conda-forge=1
+  - fonttools=4.38.0
+  - formulaic=0.5.2
+  - freetype=2.12.1
+  - fribidi=1.0.10
+  - future=0.18.3
+  - gcc_impl_linux-64=12.2.0
+  - gettext=0.21.1
+  - gfortran_impl_linux-64=12.2.0
+  - giflib=5.2.1
+  - glib=2.74.1
+  - glib-tools=2.74.1
+  - glpk=5.0
+  - gmp=6.2.1
+  - gneiss=0.4.6
+  - graphite2=1.3.13
+  - graphlib-backport=1.0.3
+  - gsl=2.7
+  - gst-plugins-base=1.21.3
+  - gstreamer=1.21.3
+  - gstreamer-orc=0.4.33
+  - gxx_impl_linux-64=12.2.0
+  - h5py=2.10.0
+  - harfbuzz=6.0.0
+  - hdf5=1.10.6
+  - hdmedians=0.14.2
+  - hmmer=3.1b2
+  - htslib=1.16
+  - icu=70.1
+  - idna=3.4
+  - ijson=3.2.0.post0
+  - importlib-metadata=4.8.3
+  - importlib_metadata=4.8.3
+  - importlib_resources=5.12.0
+  - iniconfig=2.0.0
+  - interface_meta=1.3.0
+  - iow=1.0.5
+  - ipykernel=6.21.2
+  - ipython=8.10.0
+  - ipython_genutils=0.2.0
+  - ipywidgets=8.0.4
+  - iqtree=2.2.0.3
+  - isa-l=2.30.0
+  - jack=1.9.22
+  - jedi=0.18.2
+  - jinja2=3.1.2
+  - joblib=1.2.0
+  - jpeg=9e
+  - jq=1.6
+  - jsonschema=4.17.3
+  - jupyter_client=8.0.3
+  - jupyter_core=5.2.0
+  - jupyter_events=0.6.3
+  - jupyter_server=2.3.0
+  - jupyter_server_terminals=0.4.4
+  - jupyterlab_pygments=0.2.2
+  - jupyterlab_widgets=3.0.5
+  - kernel-headers_linux-64=2.6.32
+  - keyutils=1.6.1
+  - kiwisolver=1.4.4
+  - krb5=1.20.1
+  - lame=3.100
+  - lcms2=2.14
+  - ld_impl_linux-64=2.40
+  - lerc=4.0.0
+  - libblas=3.9.0
+  - libbrotlicommon=1.0.9
+  - libbrotlidec=1.0.9
+  - libbrotlienc=1.0.9
+  - libcap=2.66
+  - libcblas=3.9.0
+  - libclang=15.0.7
+  - libclang13=15.0.7
+  - libcups=2.3.3
+  - libcurl=7.87.0
+  - libdb=6.2.32
+  - libdeflate=1.13
+  - libedit=3.1.20191231
+  - libev=4.33
+  - libevent=2.1.10
+  - libffi=3.4.2
+  - libflac=1.4.2
+  - libgcc-devel_linux-64=12.2.0
+  - libgcc-ng=12.2.0
+  - libgcrypt=1.10.1
+  - libgfortran-ng=12.2.0
+  - libgfortran5=12.2.0
+  - libglib=2.74.1
+  - libgomp=12.2.0
+  - libgpg-error=1.46
+  - libhwloc=2.9.0
+  - libiconv=1.17
+  - libidn2=2.3.4
+  - liblapack=3.9.0
+  - liblapacke=3.9.0
+  - libllvm11=11.1.0
+  - libllvm15=15.0.7
+  - libnghttp2=1.51.0
+  - libnsl=2.0.0
+  - libogg=1.3.4
+  - libopenblas=0.3.21
+  - libopus=1.3.1
+  - libpng=1.6.39
+  - libpq=15.1
+  - libsanitizer=12.2.0
+  - libsndfile=1.2.0
+  - libsodium=1.0.18
+  - libsqlite=3.40.0
+  - libssh2=1.10.0
+  - libstdcxx-devel_linux-64=12.2.0
+  - libstdcxx-ng=12.2.0
+  - libsystemd0=252
+  - libtiff=4.4.0
+  - libtool=2.4.7
+  - libudev1=253
+  - libunistring=0.9.10
+  - libuuid=2.32.1
+  - libvorbis=1.3.7
+  - libwebp-base=1.2.4
+  - libxcb=1.13
+  - libxkbcommon=1.5.0
+  - libxml2=2.10.3
+  - libxslt=1.1.37
+  - libzlib=1.2.13
+  - llvmlite=0.39.1
+  - lockfile=0.12.2
+  - lxml=4.9.2
+  - lz4=4.3.2
+  - lz4-c=1.9.4
+  - mafft=7.515
+  - make=4.3
+  - markupsafe=2.1.2
+  - matplotlib=3.6.0
+  - matplotlib-base=3.6.0
+  - matplotlib-inline=0.1.6
+  - mistune=2.0.5
+  - mpfr=4.1.0
+  - mpg123=1.31.2
+  - msgpack-python=1.0.4
+  - munkres=1.1.4
+  - mysql-common=8.0.32
+  - mysql-libs=8.0.32
+  - natsort=8.3.0
+  - nbclassic=0.5.2
+  - nbclient=0.7.2
+  - nbconvert=7.2.9
+  - nbconvert-core=7.2.9
+  - nbconvert-pandoc=7.2.9
+  - nbformat=5.7.3
+  - ncurses=6.3
+  - nest-asyncio=1.5.6
+  - networkx=3.0
+  - nlopt=2.7.1
+  - nose=1.3.7
+  - notebook=6.5.2
+  - notebook-shim=0.2.2
+  - nspr=4.35
+  - nss=3.88
+  - numba=0.56.4
+  - numpy=1.23.5
+  - oniguruma=6.9.8
+  - openjdk=17.0.3
+  - openjpeg=2.5.0
+  - openssl=1.1.1t
+  - packaging=23.0
+  - pandas=1.5.3
+  - pandoc=2.19.2
+  - pandocfilters=1.5.0
+  - pango=1.50.13
+  - parso=0.8.3
+  - patsy=0.5.3
+  - pbzip2=1.1.13
+  - pcre=8.45
+  - pcre2=10.40
+  - perl=5.32.1
+  - perl-archive-tar=2.40
+  - perl-carp=1.50
+  - perl-common-sense=3.75
+  - perl-compress-raw-bzip2=2.201
+  - perl-compress-raw-zlib=2.202
+  - perl-encode=3.19
+  - perl-exporter=5.74
+  - perl-exporter-tiny=1.002002
+  - perl-extutils-makemaker=7.66
+  - perl-io-compress=2.201
+  - perl-io-zlib=1.14
+  - perl-json=4.10
+  - perl-json-xs=2.34
+  - perl-list-moreutils=0.430
+  - perl-list-moreutils-xs=0.430
+  - perl-parent=0.241
+  - perl-pathtools=3.75
+  - perl-scalar-list-utils=1.63
+  - perl-storable=3.15
+  - perl-types-serialiser=1.01
+  - pexpect=4.8.0
+  - pickleshare=0.7.5
+  - pigz=2.6
+  - pillow=9.2.0
+  - pip=23.0.1
+  - pixman=0.40.0
+  - pkgutil-resolve-name=1.3.10
+  - platformdirs=3.0.0
+  - pluggy=1.0.0
+  - ply=3.11
+  - prometheus_client=0.16.0
+  - prompt-toolkit=3.0.36
+  - psutil=5.9.4
+  - pthread-stubs=0.4
+  - ptyprocess=0.7.0
+  - pulseaudio=16.1
+  - pure_eval=0.2.2
+  - pycparser=2.21
+  - pygments=2.14.0
+  - pynndescent=0.5.8
+  - pyopenssl=23.0.0
+  - pyparsing=3.0.9
+  - pyqt=5.15.7
+  - pyqt5-sip=12.11.0
+  - pyrsistent=0.19.3
+  - pysocks=1.7.1
+  - pytest=7.2.1
+  - python=3.8.15
+  - python-dateutil=2.8.2
+  - python-fastjsonschema=2.16.3
+  - python-isal=1.1.0
+  - python-json-logger=2.0.7
+  - python_abi=3.8
+  - pytz=2022.7.1
+  - pyyaml=6.0
+  - pyzmq=25.0.0
+  - q2-alignment=2023.2.0
+  - q2-composition=2023.2.0
+  - q2-cutadapt=2023.2.0
+  - q2-dada2=2023.2.0
+  - q2-deblur=2023.2.0
+  - q2-demux=2023.2.0
+  - q2-diversity=2023.2.0
+  - q2-diversity-lib=2023.2.0
+  - q2-emperor=2023.2.0
+  - q2-feature-classifier=2023.2.0
+  - q2-feature-table=2023.2.0
+  - q2-fragment-insertion=2023.2.0
+  - q2-gneiss=2023.2.0
+  - q2-longitudinal=2023.2.0
+  - q2-metadata=2023.2.0
+  - q2-mystery-stew=2023.2.0
+  - q2-phylogeny=2023.2.0
+  - q2-quality-control=2023.2.0
+  - q2-quality-filter=2023.2.0
+  - q2-sample-classifier=2023.2.0
+  - q2-taxa=2023.2.0
+  - q2-types=2023.2.0
+  - q2-vsearch=2023.2.0
+  - q2cli=2023.2.0
+  - q2galaxy=2023.2.0
+  - q2templates=2023.2.0
+  - qiime2=2023.2.0
+  - qt-main=5.15.6
+  - r-acepack=1.4.1
+  - r-ade4=1.7_22
+  - r-ape=5.7
+  - r-askpass=1.1
+  - r-assertthat=0.2.1
+  - r-backports=1.4.1
+  - r-base=4.2.2
+  - r-base64enc=0.1_3
+  - r-beeswarm=0.4.0
+  - r-bh=1.81.0_1
+  - r-bibtex=0.5.1
+  - r-bit=4.0.5
+  - r-bit64=4.0.5
+  - r-bitops=1.0_7
+  - r-blob=1.2.3
+  - r-boot=1.3_28.1
+  - r-broom=1.0.3
+  - r-bslib=0.4.2
+  - r-cachem=1.0.7
+  - r-cairo=1.6_0
+  - r-callr=3.7.3
+  - r-cellranger=1.1.0
+  - r-checkmate=2.1.0
+  - r-class=7.3_21
+  - r-cli=3.6.0
+  - r-clipr=0.8.0
+  - r-cluster=2.1.4
+  - r-codetools=0.2_19
+  - r-colorspace=2.1_0
+  - r-cpp11=0.4.3
+  - r-crayon=1.5.2
+  - r-curl=4.3.3
+  - r-cvxr=1.0_11
+  - r-data.table=1.14.8
+  - r-dbi=1.1.3
+  - r-dbplyr=2.3.1
+  - r-deldir=1.0_6
+  - r-desctools=0.99.48
+  - r-digest=0.6.31
+  - r-doparallel=1.0.17
+  - r-dorng=1.8.6
+  - r-dplyr=1.1.0
+  - r-dqrng=0.3.0
+  - r-dtplyr=1.3.0
+  - r-e1071=1.7_13
+  - r-ecosolver=0.5.4
+  - r-ellipsis=0.3.2
+  - r-emmeans=1.8.4_1
+  - r-energy=1.7_11
+  - r-estimability=1.4.1
+  - r-evaluate=0.20
+  - r-exact=3.2
+  - r-expm=0.999_7
+  - r-fansi=1.0.4
+  - r-farver=2.1.1
+  - r-fastmap=1.1.1
+  - r-fnn=1.1.3.1
+  - r-forcats=1.0.0
+  - r-foreach=1.5.2
+  - r-foreign=0.8_84
+  - r-formatr=1.14
+  - r-formula=1.2_5
+  - r-frictionless=1.0.2
+  - r-fs=1.6.1
+  - r-futile.logger=1.4.3
+  - r-futile.options=1.0.1
+  - r-gargle=1.3.0
+  - r-gbrd=0.4_11
+  - r-generics=0.1.3
+  - r-getopt=1.20.3
+  - r-ggbeeswarm=0.7.1
+  - r-ggplot2=3.4.1
+  - r-ggrastr=1.0.1
+  - r-ggrepel=0.9.3
+  - r-gld=2.6.6
+  - r-glue=1.6.2
+  - r-gmp=0.7_1
+  - r-googledrive=2.0.0
+  - r-googlesheets4=1.0.1
+  - r-gridextra=2.3
+  - r-gsl=2.1_8
+  - r-gtable=0.3.1
+  - r-haven=2.5.1
+  - r-highr=0.10
+  - r-hmisc=4.8_0
+  - r-hms=1.1.2
+  - r-htmltable=2.4.1
+  - r-htmltools=0.5.4
+  - r-htmlwidgets=1.6.1
+  - r-httr=1.4.5
+  - r-hwriter=1.3.2.1
+  - r-ids=1.0.1
+  - r-igraph=1.4.1
+  - r-interp=1.1_3
+  - r-irlba=2.3.5.1
+  - r-isoband=0.2.7
+  - r-iterators=1.0.14
+  - r-jpeg=0.1_10
+  - r-jquerylib=0.1.4
+  - r-jsonlite=1.8.4
+  - r-knitr=1.42
+  - r-labeling=0.4.2
+  - r-lambda.r=1.2.4
+  - r-lattice=0.20_45
+  - r-latticeextra=0.6_30
+  - r-lazyeval=0.2.2
+  - r-lifecycle=1.0.3
+  - r-lme4=1.1_31
+  - r-lmertest=3.1_3
+  - r-lmom=2.9
+  - r-lubridate=1.9.2
+  - r-magrittr=2.0.3
+  - r-mass=7.3_58.2
+  - r-matrix=1.5_3
+  - r-matrixstats=0.63.0
+  - r-memoise=2.0.1
+  - r-mgcv=1.8_41
+  - r-mime=0.12
+  - r-minqa=1.2.5
+  - r-modelr=0.1.10
+  - r-munsell=0.5.0
+  - r-mvtnorm=1.1_3
+  - r-nlme=3.1_162
+  - r-nloptr=2.0.3
+  - r-nnet=7.3_18
+  - r-numderiv=2016.8_1.1
+  - r-openssl=2.0.5
+  - r-optparse=1.7.3
+  - r-osqp=0.6.0.8
+  - r-permute=0.9_7
+  - r-pheatmap=1.0.12
+  - r-pillar=1.8.1
+  - r-pixmap=0.4_12
+  - r-pkgconfig=2.0.3
+  - r-pkgmaker=0.32.8
+  - r-plogr=0.2.0
+  - r-plyr=1.8.8
+  - r-png=0.1_8
+  - r-prettyunits=1.1.1
+  - r-processx=3.8.0
+  - r-progress=1.2.2
+  - r-proxy=0.4_27
+  - r-ps=1.7.2
+  - r-purrr=1.0.1
+  - r-r6=2.5.1
+  - r-ragg=1.2.4
+  - r-rappdirs=0.3.3
+  - r-rbibutils=2.2.13
+  - r-rcolorbrewer=1.1_3
+  - r-rcpp=1.0.10
+  - r-rcppannoy=0.0.20
+  - r-rcppeigen=0.3.3.9.3
+  - r-rcpphnsw=0.4.1
+  - r-rcppml=0.3.7
+  - r-rcppparallel=5.1.6
+  - r-rcppprogress=0.4.2
+  - r-rcurl=1.98_1.10
+  - r-rdpack=2.4
+  - r-readr=2.1.4
+  - r-readxl=1.4.2
+  - r-registry=0.5_1
+  - r-rematch=1.0.1
+  - r-rematch2=2.1.2
+  - r-reprex=2.0.2
+  - r-reshape2=1.4.4
+  - r-rlang=1.0.6
+  - r-rmarkdown=2.20
+  - r-rmpfr=0.9_1
+  - r-rngtools=1.5.2
+  - r-rootsolve=1.8.2.3
+  - r-rpart=4.1.19
+  - r-rspectra=0.16_1
+  - r-rsqlite=2.2.20
+  - r-rstudioapi=0.14
+  - r-rsvd=1.0.5
+  - r-rtsne=0.16
+  - r-rvest=1.0.3
+  - r-sass=0.4.5
+  - r-scales=1.2.1
+  - r-scs=3.0_1
+  - r-selectr=0.4_2
+  - r-sitmo=2.0.2
+  - r-snow=0.4_4
+  - r-sp=1.6_0
+  - r-statmod=1.5.0
+  - r-stringi=1.7.12
+  - r-stringr=1.5.0
+  - r-survival=3.5_3
+  - r-sys=3.4.1
+  - r-systemfonts=1.0.4
+  - r-textshaping=0.3.6
+  - r-tibble=3.1.8
+  - r-tidyr=1.3.0
+  - r-tidyselect=1.2.0
+  - r-tidytree=0.4.2
+  - r-tidyverse=1.3.2
+  - r-timechange=0.2.0
+  - r-tinytex=0.44
+  - r-tzdb=0.3.0
+  - r-utf8=1.2.3
+  - r-uuid=1.1_0
+  - r-uwot=0.1.14
+  - r-vctrs=0.5.2
+  - r-vegan=2.6_4
+  - r-vipor=0.4.5
+  - r-viridis=0.6.2
+  - r-viridislite=0.4.1
+  - r-vroom=1.6.1
+  - r-withr=2.5.0
+  - r-xfun=0.37
+  - r-xml2=1.3.3
+  - r-xtable=1.8_4
+  - r-yaml=2.3.7
+  - r-yulab.utils=0.0.6
+  - raxml=8.2.12
+  - readline=8.1.2
+  - requests=2.28.2
+  - rfc3339-validator=0.1.4
+  - rfc3986-validator=0.1.1
+  - samtools=1.16.1
+  - scikit-bio=0.5.7
+  - scikit-learn=0.24.1
+  - scipy=1.8.1
+  - seaborn=0.12.2
+  - seaborn-base=0.12.2
+  - sed=4.8
+  - send2trash=1.8.0
+  - sepp=4.3.10
+  - setuptools=67.4.0
+  - sip=6.7.7
+  - six=1.16.0
+  - sniffio=1.3.0
+  - sortmerna=2.0
+  - soupsieve=2.3.2.post1
+  - stack_data=0.6.2
+  - statsmodels=0.13.5
+  - sysroot_linux-64=2.12
+  - tbb=2021.8.0
+  - terminado=0.17.1
+  - threadpoolctl=3.1.0
+  - tinycss2=1.2.1
+  - tk=8.6.12
+  - tktable=2.10
+  - toml=0.10.2
+  - tomli=2.0.1
+  - toolz=0.12.0
+  - tornado=6.2
+  - tqdm=4.64.1
+  - traitlets=5.9.0
+  - typing-extensions=4.4.0
+  - typing_extensions=4.4.0
+  - tzlocal=2.1
+  - umap-learn=0.5.3
+  - unicodedata2=15.0.0
+  - unifrac=1.1.1
+  - unifrac-binaries=1.1.1
+  - urllib3=1.26.14
+  - vsearch=2.22.1
+  - wcwidth=0.2.6
+  - webencodings=0.5.1
+  - websocket-client=1.5.1
+  - wget=1.20.3
+  - wheel=0.38.4
+  - widgetsnbextension=4.0.5
+  - wrapt=1.15.0
+  - xcb-util=0.4.0
+  - xcb-util-image=0.4.0
+  - xcb-util-keysyms=0.4.0
+  - xcb-util-renderutil=0.3.9
+  - xcb-util-wm=0.4.1
+  - xmltodict=0.13.0
+  - xopen=1.7.0
+  - xorg-fixesproto=5.0
+  - xorg-inputproto=2.3.2
+  - xorg-kbproto=1.0.7
+  - xorg-libice=1.0.10
+  - xorg-libsm=1.2.3
+  - xorg-libx11=1.7.2
+  - xorg-libxau=1.0.9
+  - xorg-libxdmcp=1.1.3
+  - xorg-libxext=1.3.4
+  - xorg-libxfixes=5.0.3
+  - xorg-libxi=1.7.10
+  - xorg-libxrender=0.9.10
+  - xorg-libxt=1.2.1
+  - xorg-libxtst=1.2.3
+  - xorg-recordproto=1.14.2
+  - xorg-renderproto=0.11.1
+  - xorg-xextproto=7.3.0
+  - xorg-xproto=7.0.31
+  - xyzservices=2023.2.0
+  - xz=5.2.6
+  - yaml=0.2.5
+  - yq=3.1.1
+  - zeromq=4.3.4
+  - zipp=3.15.0
+  - zlib=1.2.13
+  - zstandard=0.19.0
+  - zstd=1.5.2
+  - krona=2.8.1
+  - bc=1.07.1
+  - git=2.39.1
+  - jq=1.6
diff --git a/main.nf b/main.nf
index 28d4a2d..68da920 100644
--- a/main.nf
+++ b/main.nf
@@ -7,7 +7,7 @@ in demultiplexed 16S amplicon sequencing FASTQ file.
 ===============================================================================
 
 Author: Khi Pin, Chua
-Updated: 2023-1-25
+Updated: 2023-3-31
 */
 
 nextflow.enable.dsl=2
@@ -32,6 +32,7 @@ def helpMessage() {
   --front_p    Forward primer sequence. Default to F27. (default: AGRGTTYGATYMTGGCTCAG)
   --adapter_p    Reverse primer sequence. Default to R1492. (default: AAGTCGTAACAAGGTARCY)
   --filterQ    Filter input reads above this Q value (default: 20).
+  --downsample    Limit reads to a maximum of N reads if there are more than N reads (default: off)
   --max_ee    DADA2 max_EE parameter. Reads with number of expected errors higher than
               this value will be discarded (default: 2)
   --minQ    DADA2 minQ parameter. Reads with any base lower than this score 
@@ -86,7 +87,7 @@ def helpMessage() {
 params.help = false
 if (params.help) exit 0, helpMessage()
 params.version = false
-version = "0.5"
+version = "0.6"
 if (params.version) exit 0, log.info("$version")
 params.download_db = false
 params.skip_primer_trim = false
@@ -98,6 +99,8 @@ params.max_len = 1600
 params.colorby = "condition"
 params.skip_phylotree = false
 params.minQ = 0
+// Downsample to N reads, default to 0 which means disable
+params.downsample = 0
 // Check input
 params.input = false
 if (params.input){
@@ -150,7 +153,7 @@ params.rmd_vis_biom_script= "$projectDir/scripts/visualize_biom.Rmd"
 params.rmd_helper = "$projectDir/scripts/import_biom.R"
 params.cutadapt_cpu = 16
 params.primer_fasta = "$projectDir/scripts/16S_primers.fasta"
-params.dadaCCS_script = "$projectDir/scripts/run_dada_ccs.R"
+params.dadaCCS_script = "$projectDir/scripts/run_dada_2023.2.R"
 params.dadaAssign_script = "$projectDir/scripts/dada2_assign_tax.R"
 params.publish_dir_mode = "symlink"
 
@@ -160,6 +163,7 @@ log_text = """
   Number of samples in samples TSV: $n_sample
   Filter input reads above Q: $params.filterQ
   Trim primers with cutadapt: $trim_cutadapt
+  Limit to N reads if exceeding N reads (0 = disabled): $params.downsample
   Forward primer: $params.front_p
   Reverse primer: $params.adapter_p
   Minimum amplicon length filtered in DADA2: $params.min_len
@@ -194,6 +198,7 @@ process write_log{
   conda (params.enable_conda ? "$projectDir/env/pb-16s-pbtools.yml" : null)
   container "kpinpb/pb-16s-nf-tools:latest" 
   publishDir "$params.outdir", mode: params.publish_dir_mode
+  label 'cpu_def'
 
   input:
   val(logs)
@@ -224,6 +229,16 @@ process QC_fastq {
   path("${sampleID}.filterQ${params.filterQ}.fastq.gz"), emit: filtered_fastq_files
 
   script:
+  if (params.downsample > 0){
+  """
+  seqkit fx2tab -j $task.cpus -q --gc -l -H -n -i $sampleFASTQ |\
+    csvtk mutate2 -C '%' -t -n sample -e '"${sampleID}"' > ${sampleID}.seqkit.readstats.tsv
+  seqkit stats -T -j $task.cpus -a ${sampleFASTQ} |\
+    csvtk mutate2 -C '%' -t -n sample -e '"${sampleID}"' > ${sampleID}.seqkit.summarystats.tsv
+  seqkit seq -j $task.cpus --min-qual $params.filterQ $sampleFASTQ |\
+    seqkit head -n $params.downsample --out-file ${sampleID}.filterQ${params.filterQ}.fastq.gz
+  """
+  } else {
   """
   seqkit fx2tab -j $task.cpus -q --gc -l -H -n -i $sampleFASTQ |\
     csvtk mutate2 -C '%' -t -n sample -e '"${sampleID}"' > ${sampleID}.seqkit.readstats.tsv
@@ -231,12 +246,13 @@ process QC_fastq {
     csvtk mutate2 -C '%' -t -n sample -e '"${sampleID}"' > ${sampleID}.seqkit.summarystats.tsv
   seqkit seq -j $task.cpus --min-qual $params.filterQ $sampleFASTQ --out-file ${sampleID}.filterQ${params.filterQ}.fastq.gz
   """
+  }
 }
 
 // Trim full length 16S primers with cutadapt
 process cutadapt {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/trimmed_primers_FASTQ", pattern: '*.fastq.gz', mode: params.publish_dir_mode
   publishDir "$params.outdir/cutadapt_summary", pattern: '*.report', mode: params.publish_dir_mode
   cpus params.cutadapt_cpu
@@ -389,7 +405,7 @@ process prepare_qiime2_manifest_skip_cutadapt {
   path(metadata)
 
   output:
-  path "samplefile.txt", emit: sample_trimmed_file
+  path "samplefile*.txt", emit: sample_trimmed_file
 
   """
   echo -e "sample-id\tabsolute-filepath" > samplefile.txt
@@ -412,8 +428,8 @@ process prepare_qiime2_manifest_skip_cutadapt {
 // Import data into QIIME 2, note the `path *` in input is staging all the input 
 // FASTQ into the process so it'll work on AWS that has no notion of local directory path
 process import_qiime2 {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/import_qiime", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -455,8 +471,8 @@ process merge_sample_manifest {
 
 // Summarize demultiplex statistics
 process demux_summarize {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/summary_demux", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -480,8 +496,8 @@ process demux_summarize {
 
 // Denoise into ASVs
 process dada2_denoise {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/dada2", mode: params.publish_dir_mode
   cpus params.dada2_cpu
 
@@ -501,12 +517,12 @@ process dada2_denoise {
   """
   # Use custom script that can skip primer trimming
   mkdir -p dada2_custom_script
-  cp $dada_ccs_script dada2_custom_script/run_dada_ccs_original.R
-  sed 's/minQ\\ =\\ 0/minQ=$minQ/g' dada2_custom_script/run_dada_ccs_original.R > \
-    dada2_custom_script/run_dada_ccs.R
-  chmod +x dada2_custom_script/run_dada_ccs.R
+  cp $dada_ccs_script dada2_custom_script/run_dada.R
+  # sed 's/minQ\\ =\\ 0/minQ=$minQ/g' dada2_custom_script/run_dada_ccs_original.R > \
+  #   dada2_custom_script/run_dada_ccs.R
+  chmod +x dada2_custom_script/run_dada.R
   export PATH="./dada2_custom_script:\$PATH"
-  which run_dada_ccs.R
+  which run_dada.R
   qiime dada2 denoise-ccs --i-demultiplexed-seqs $samples_qza \
     --o-table dada2-ccs_table.qza \
     --o-representative-sequences dada2-ccs_rep.qza \
@@ -525,8 +541,8 @@ process dada2_denoise {
 // Take note of the output above! There will only be one set of out from
 // different group in dada2 folder after dada2_denoise
 process mergeASV {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/dada2", mode: params.publish_dir_mode
   cpus params.dada2_cpu
 
@@ -564,8 +580,8 @@ process mergeASV {
 
 // Filter DADA2 ASVs using minimum number of samples and frequency of ASVs
 process filter_dada2 {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/dada2", mode: params.publish_dir_mode
   cpus params.dada2_cpu
 
@@ -621,8 +637,8 @@ process filter_dada2 {
 // Assign taxonomies to SILVA, GTDB and RefSeq using DADA2
 // assignTaxonomy function based on Naive Bayes classifier
 process dada2_assignTax {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", pattern: 'best_tax*', mode: params.publish_dir_mode
   publishDir "$params.outdir/nb_tax", mode: params.publish_dir_mode
   cpus params.vsearch_cpu
@@ -670,8 +686,8 @@ process dada2_assignTax {
 
 // QC summary for dada2
 process dada2_qc {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -748,8 +764,8 @@ process dada2_qc {
 }
 
 process qiime2_phylogeny_diversity {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results/phylogeny_diversity", mode: params.publish_dir_mode
   label 'cpu8' 
 
@@ -826,8 +842,8 @@ process qiime2_phylogeny_diversity {
 
 // Rarefaction visualization
 process dada2_rarefaction {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -850,8 +866,8 @@ process dada2_rarefaction {
 
 // Classify taxonomy and export table using VSEARCH approach
 process class_tax {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   cpus params.vsearch_cpu
 
@@ -866,6 +882,7 @@ process class_tax {
   path "tax_export/taxonomy.tsv", emit: tax_tsv
   path "merged_freq_tax.qzv", emit: tax_freq_tab
   path "vsearch_merged_freq_tax.tsv", emit: tax_freq_tab_tsv
+  path "taxonomy.vsearch.blast_results.qza", emit: tax_vsearch_blast
 
   script:
   """
@@ -877,7 +894,8 @@ process class_tax {
     --p-maxrejects $params.maxreject \
     --p-maxaccepts $params.maxaccept \
     --p-perc-identity $params.vsearch_identity \
-    --p-top-hits-only
+    --p-top-hits-only \
+    --o-search-results taxonomy.vsearch.blast_results.qza
 
   qiime tools export --input-path taxonomy.vsearch.qza --output-path tax_export
 
@@ -898,8 +916,8 @@ process class_tax {
 
 // Export results into biom for use with phyloseq
 process export_biom {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -936,8 +954,8 @@ process export_biom {
 
 // Export results into biom for use with phyloseq
 process export_biom_skip_nb {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -985,8 +1003,8 @@ process picrust2 {
 }
 
 process barplot {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -1008,8 +1026,8 @@ process barplot {
 }
 
 process barplot_nb {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
 
@@ -1122,8 +1140,8 @@ process html_rep_skip_cutadapt {
 
 // Krona plot
 process krona_plot {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$params.outdir/results", mode: params.publish_dir_mode
   label 'cpu_def'
   // Ignore if this fail
@@ -1153,8 +1171,8 @@ process krona_plot {
 }
 
 process download_db {
-  conda (params.enable_conda ? "$projectDir/env/qiime2-2022.2-py38-linux-conda.yml" : null)
-  container "kpinpb/pb-16s-nf-qiime:latest"
+  conda (params.enable_conda ? "$projectDir/env/qiime2-2023.2-py38-linux-conda.yml" : null)
+  container "kpinpb/pb-16s-nf-qiime:v0.7"
   publishDir "$projectDir/databases", mode: "copy"
   label 'cpu_def'
 
@@ -1171,8 +1189,8 @@ process download_db {
   wget -N --content-disposition 'https://zenodo.org/record/4735821/files/RefSeq_16S_6-11-20_RDPv16_fullTaxo.fa.gz?download=1'
 
   echo "Downloading SILVA sequences and taxonomies for Naive Bayes classifier"
-  wget -N --content-disposition 'https://data.qiime2.org/2022.2/common/silva-138-99-seqs.qza'
-  wget -N --content-disposition 'https://data.qiime2.org/2022.2/common/silva-138-99-tax.qza'
+  wget -N --content-disposition 'https://data.qiime2.org/2023.2/common/silva-138-99-seqs.qza'
+  wget -N --content-disposition 'https://data.qiime2.org/2023.2/common/silva-138-99-tax.qza'
 
   echo "Downloading GTDB database"
   wget -N --content-disposition --no-check-certificate 'https://zenodo.org/record/6912512/files/GTDB_ssu_all_r207.taxonomy.qza?download=1'
diff --git a/nextflow.config b/nextflow.config
index 75a3006..5680b67 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -104,4 +104,5 @@ timeline {
 dag {
   enabled = true
   file = "report_$params.outdir/dag.html"
+  overwrite = true
 }
diff --git a/scripts/run_dada_2023.2.R b/scripts/run_dada_2023.2.R
new file mode 100755
index 0000000..2d8be72
--- /dev/null
+++ b/scripts/run_dada_2023.2.R
@@ -0,0 +1,527 @@
+#!/usr/bin/env Rscript
+
+###################################################
+# This R script takes an input directory of .fastq.gz files
+# and outputs a tsv file of the dada2 processed sequence
+# table. It is intended for use with the QIIME2 plugin
+# for DADA2.
+#
+# Ex: Rscript run_dada_single.R input_dir output.tsv track.tsv filtered_dir 200 0 2.0 2 Inf pooled 1.0 0 1000000 NULL 32
+####################################################
+
+####################################################
+#             DESCRIPTION OF ARGUMENTS             #
+####################################################
+# NOTE: All numeric arguments should be zero or positive.
+# NOTE: All numeric arguments save maxEE are expected to be integers.
+# NOTE: Currently the filterered_dir must already exist.
+# NOTE: ALL ARGUMENTS ARE POSITIONAL!
+#
+### FILE SYSTEM ARGUMENTS ###
+#
+# 1) input_directory - File path to directory with the .fastq.gz files to be processed.
+#    Ex: path/to/dir/with/fastqgzs
+#
+# 2) output_path - File path to output tsv file. If already exists, will be overwritten.
+#    Ex: path/to/output_file.tsv
+#
+# 3) output_track - File path to tracking tsv file. If already exists, will be overwritten.
+#    Ex: path/to/tracking_stats.tsv
+#
+# 4) removed_primer_directory - File path to directory in which to write the primer.removed .fastq.gz
+#                 files. These files are intermediate for the full workflow.
+#                 Currently they remain after the script finishes.
+#                 Directory must already exist.
+#    Ex: path/to/dir/with/fastqgzs/primerremoved
+#
+# 5) filtered_directory - File path to directory in which to write the filtered .fastq.gz files.
+#                 These files are intermediate for the full workflow.
+#                 Currently they remain after the script finishes.
+#                 Directory must already exist.
+#    Ex: path/to/dir/with/fastqgzs/filtered
+#
+### PRIMER REMOVING ARGUMENTS ###
+#
+# 6) forward_primer - Primer front of Pacbio CCS sequences.
+#    Ex: 'AGRGTTYGATYMTGGCTCAG'
+#
+# 7) reverse_primer - Primer adapter of Pacbio CCS sequences.
+#    Ex: 'RGYTACCTTGTTACGACTT'
+#
+# 8) max_mismatch - The number of mismatches to tolerate when matching
+#                  reads to primer sequences.
+#    Ex: 2
+#
+# 9) indels - Allow insertions or deletions of bases when matching adapters.
+#    Ex: FALSE
+#
+### FILTERING ARGUMENTS ###
+#
+# 10) truncation_length - The position at which to truncate reads. Reads shorter
+#               than truncation_length  will be discarded.
+#               Special values: 0 - no truncation or length filtering.
+#    Ex: 150
+#
+# 11) trim_left - The number of nucleotides to remove from the start of
+#               each read. Should be less than truncation_length  for obvious reasons.
+#    Ex: 0
+#
+# 12) max_expected_errors - Reads with expected errors higher than maxEE are discarded.
+#    Ex: 2.0
+#
+# 13) truncation_quality_score - Reads are truncated at the first instance of quality score truncation_quality_score.
+#                If the read is then shorter than truncation_length , it is discarded.
+#    Ex: 2
+#
+# 14) min_length - Remove reads with length shorter than min_length. min_length is enforced
+#              after trimming and truncation.
+#              Default Inf - no maximum.
+#
+# 15) max_length - Remove reads with length greater than max_length. max_length is enforced on the raw reads.
+#             Default Inf - no maximum.
+#    Ex: 300
+#
+### SENSITIVITY ARGUMENTS ###
+#
+# 16) pooling_method- The method used to pool (or not) samples during denoising.
+#             Valid options are:
+#               independent: (Default) No pooling, samples are denoised indpendently.
+#               pseudo: Samples are "pseudo-pooled" for denoising.
+#    Ex: independent
+#
+#
+### CHIMERA ARGUMENTS ###
+#
+# 17) chimera_method - The method used to remove chimeras. Valid options are:
+#               none: No chimera removal is performed.
+#               pooled: All reads are pooled prior to chimera detection.
+#               consensus: Chimeras are detected in samples individually, and a consensus decision
+#                           is made for each sequence variant.
+#    Ex: consensus
+#
+# 18) min_parental_fold - The minimum abundance of potential "parents" of a sequence being
+#               tested as chimeric, expressed as a fold-change versus the abundance of the sequence being
+#               tested. Values should be greater than or equal to 1 (i.e. parents should be more
+#               abundant than the sequence being tested).
+#    Ex: 1.0
+#
+### SPEED ARGUMENTS ###
+#
+# 19) num_threads - The number of threads to use.
+#                 Special values: 0 - detect available cores and use all.
+#    Ex: 1
+#
+# 20) learn_min_reads - The minimum number of reads to learn the error model from.
+#                 Special values: 0 - Use all input reads.
+#    Ex: 1000000
+#
+### GLOBAL OPTION ARGUMENTS ###
+#
+# 21) homopolymer_gap_penalty - The cost of gaps in homopolymer regions (>=3 repeated bases).
+#                               Default is NULL, which causes homopolymer gaps
+#                               to be treated as normal gaps.
+#    Ex: -1
+#
+# 22) band_size - When set, banded Needleman-Wunsch alignments are performed.
+#                 The default value of band_size is 16. Setting BAND_SIZE to a negative
+#                 number turns off banding (i.e. full Needleman-Wunsch).
+#    Ex: 32
+#
+
+# error handling -----------------
+options(error = function() {
+  sink(stderr())
+  on.exit(sink(NULL))
+  traceback(3)
+  if (!interactive()) {
+    q(status = 1)
+  }
+})
+
+library("optparse")
+
+cat(R.version$version.string, "\n")
+errQuit <- function(mesg, status=1) { message("Error: ", mesg); q(status=status) }
+getN <- function(x) sum(getUniques(x)) #Function added from paired read processing
+
+option_list = list(
+  make_option(c("--input_directory"), action="store", default='NULL', type='character',
+              help="File path to directory with the .fastq.gz files to be processed"),
+  make_option(c("--input_directory_reverse"), action="store", default='NULL', type='character',
+              help="File path to reverse directory with the .fastq.gz files to be processed. Only used in paired-end processing"),
+  make_option(c("--output_path"), action="store", default='NULL', type='character',
+              help="File path to output tsv file. If already exists, will be overwritten"),
+  make_option(c("--output_track"), action="store", default='NULL', type='character',
+              help="File path to tracking tsv file. If already exists, will be overwritten"),
+  make_option(c("--removed_primer_directory"), action="store", default='NULL', type='character',
+              help="File path to directory in which to write the primer.removed .fastq.gz files"),
+  make_option(c("--filtered_directory"), action="store", default='NULL', type='character',
+              help="File path to directory in which to write the filtered .fastq.gz files. These files are intermediate"),
+  make_option(c("--filtered_directory_reverse"), action="store", default='NULL', type='character',
+              help="File path to directory in which to write the reverse filtered .fastq.gz files. These files are intermediate. Only used in paired-end processing"),
+  make_option(c("--forward_primer"), action="store", default='NULL', type='character',
+              help="Primer front of Pacbio CCS sequences"),
+  make_option(c("--reverse_primer"), action="store", default='NULL', type='character',
+              help="Primer adapter of Pacbio CCS sequences"),
+  make_option(c("--max_mismatch"), action="store", default='NULL', type='character',
+              help="The number of mismatches to tolerate when matching reads to primer sequences."),
+  make_option(c("--indels"), action="store", default='NULL', type='character',
+              help="Allow insertions or deletions of bases when matching adapters"),
+  make_option(c("--truncation_length"), action="store", default='NULL', type='character',
+              help="The position at which to truncate reads. Reads shorter then truncation_length will be discarded."),
+  make_option(c("--truncation_length_reverse"), action="store", default='NULL', type='character',
+              help="The position at which to truncate reverse reads. Reads shorter then truncation_length will be discarded. Only used in paired-end processing"),
+  make_option(c("--trim_left"), action="store", default='NULL', type='character',
+              help="The number of nucleotides to remove from the start of each read. Should be less than truncation_length for obvious reasons"),
+  make_option(c("--trim_left_reverse"), action="store", default='NULL', type='character',
+              help="The number of nucleotides to remove from the start of each reverse read. Should be less than truncation_length for obvious reasons. Only used in paired-end processing"),
+  make_option(c("--max_expected_errors"), action="store", default='NULL', type='character',
+              help="Reads with expected errors higher than max_expected_errors are discarded"),
+  make_option(c("--max_expected_errors_reverse"), action="store", default='NULL', type='character',
+              help="Reverse reads with expected errors higher than max_expected_errors are discarded. Only used in paired-end processing"),
+  make_option(c("--truncation_quality_score"), action="store", default='NULL', type='character',
+              help="Reads are truncated at the first instance of quality score truncation_quality_score.If the read is then shorter than truncation_length, it is discarded"),
+  make_option(c("--min_length"), action="store", default='NULL', type='character',
+              help="Remove reads with length shorter than min_length. min_length is enforced after trimming and truncation."),
+  make_option(c("--max_length"), action="store", default='NULL', type='character',
+              help="Remove reads with length greater than max_length. max_length is enforced on the raw reads."),
+  make_option(c("--min_overlap"), action="store", default='NULL', type='character',
+              help="Minimum overlap allowed between reads"),
+  make_option(c("--pooling_method"), action="store", default='NULL', type='character',
+              help="The method used to pool (or not) samples during denoising (independent/pseudo)"),
+  make_option(c("--chimera_method"), action="store", default='NULL', type='character',
+              help="The method used to remove chimeras (none/pooled/consensus)"),
+  make_option(c("--min_parental_fold"), action="store", default='NULL', type='character',
+              help="The minimum abundance of potential parents of a sequence being tested as chimeric, expressed as a fold-change versus the abundance of the sequence being tested. Values should be greater than or equal to 1"),
+  make_option(c("--allow_one_off"), action="store", default='NULL', type='character',
+              help="Bimeras that are one-off (one mismatch/indel away from an exact bimera) are also identified if allow_one_off is TRUE."),
+  make_option(c("--num_threads"), action="store", default='NULL', type='character',
+              help="The number of threads to use"),
+  make_option(c("--learn_min_reads"), action="store", default='NULL', type='character',
+              help="The minimum number of reads to learn the error model from"),
+  make_option(c("--homopolymer_gap_penalty"), action="store", default='NULL', type='character',
+              help="The cost of gaps in homopolymer regions (>=3 repeated bases).Default is NULL, which causes homopolymer gaps to be treated as normal gaps."),
+  make_option(c("--band_size"), action="store", default='NULL', type='character',
+              help="When set, banded Needleman-Wunsch alignments are performed.")
+)
+opt = parse_args(OptionParser(option_list=option_list))
+
+# Added by Khi Pin 2023-4-21 to save all outputs from scripts
+con <- file("dada2.log")
+sink(con, append=TRUE)
+sink(con, append=TRUE, type="message")
+
+
+# Assign each of the arguments, in positional order, to an appropriately named R variable
+inp.dir <- opt$input_directory
+inp.dirR <- opt$input_directory_reverse #added from paired arguments
+out.path <- opt$output_path
+out.track <- opt$output_track
+primer.removed.dir <- opt$removed_primer_directory #added from CCS arguments
+filtered.dir <- opt$filtered_directory
+filtered.dirR<- opt$filtered_directory_reverse #added from paired arguments
+primerF <- opt$forward_primer #added from CCS arguments
+primerR <- opt$reverse_primer #added from CCS arguments
+maxMismatch <- if(opt$max_mismatch=='NULL') NULL else as.numeric(opt$max_mismatch) #added from CCS arguments
+indels <- if(opt$indels=='NULL') NULL else as.logical(opt$indels)  #added from CCS arguments
+truncLen <- if(opt$truncation_length=='NULL') NULL else as.integer(opt$truncation_length)
+truncLenR <- if(opt$truncation_length_reverse=='NULL') NULL else as.integer(opt$truncation_length_reverse) #added from paired arguments
+trimLeft <- if(opt$trim_left=='NULL') NULL else as.integer(opt$trim_left)
+trimLeftR<-if(opt$trim_left_reverse=='NULL') NULL else as.integer(opt$trim_left_reverse) #added from paired arguments
+maxEE <- if(opt$max_expected_errors=='NULL') NULL else as.numeric(opt$max_expected_errors)
+maxEER <- if(opt$max_expected_errors_reverse=='NULL') NULL else as.numeric(opt$max_expected_errors_reverse) #added from paired arguments
+truncQ <- if(opt$truncation_quality_score=='NULL') NULL else as.integer(opt$truncation_quality_score)
+minLen <- if(opt$min_length=='NULL') NULL else as.numeric(opt$min_length) #added from CCS arguments
+maxLen <- if(opt$max_length=='NULL') NULL else as.numeric(opt$max_length) # Allows Inf
+minOverlap <- if(opt$min_overlap=='NULL') NULL else as.integer(opt$min_overlap) #added from paired arguments
+poolMethod <- opt$pooling_method
+chimeraMethod <- opt$chimera_method
+minParentFold <- if(opt$min_parental_fold=='NULL') NULL else as.numeric(opt$min_parental_fold)
+allowOneOff <-if(opt$allow_one_off=='NULL') NULL else as.logical(opt$allow_one_off)
+nthreads <- if(opt$num_threads=='NULL') NULL else as.integer(opt$num_threads)
+nreads.learn <- if(opt$learn_min_reads=='NULL') NULL else as.integer(opt$learn_min_reads)
+# The following args are not directly exposed to end users in q2-dada2,
+# but rather indirectly, via the methods `denoise-single` and `denoise-pyro`.
+if (opt$homopolymer_gap_penalty=='NULL'){
+  HOMOPOLYMER_GAP_PENALTY<-NULL
+}else{
+  HOMOPOLYMER_GAP_PENALTY<-as.integer(opt$homopolymer_gap_penalty)
+  if(HOMOPOLYMER_GAP_PENALTY>0){
+    HOMOPOLYMER_GAP_PENALTY<-HOMOPOLYMER_GAP_PENALTY*(-1) #negative numbers cannot be passed using optparse, so we convert it here
+  }
+}
+BAND_SIZE <- if(opt$band_size=='NULL') NULL else as.integer(opt$band_size)
+
+### VALIDATE ARGUMENTS ###
+# Input directory is expected to contain .fastq.gz file(s)
+# that have not yet been filtered and globally trimmed
+# to the same length.
+if(!dir.exists(inp.dir)) {
+  errQuit("Input directory does not exist.")
+} else {
+  unfilts <- list.files(inp.dir, pattern=".fastq.gz$", full.names=TRUE)
+  if(length(unfilts) == 0) {
+    errQuit("No input files with the expected filename format found.")
+  }
+  if(inp.dirR!='NULL'){
+    unfiltsR <- list.files(inp.dirR, pattern=".fastq.gz$", full.names=TRUE)
+    if(length(unfiltsR) == 0) {
+      errQuit("No input reverse files with the expected filename format found.")
+    }
+    if(length(unfilts) != length(unfiltsR)) {
+      errQuit("Different numbers of forward and reverse .fastq.gz files.")
+    }
+
+  }
+
+}
+
+# Output files are to be filenames (not directories) and are to be
+# removed and replaced if already present.
+for(fn in c(out.path, out.track)) {
+  if(dir.exists(fn)) {
+    errQuit("Output filename ", fn, " is a directory.")
+  } else if(file.exists(fn)) {
+    invisible(file.remove(fn))
+  }
+}
+
+# Convert nthreads to the logical/numeric expected by dada2
+if(nthreads < 0) {
+  errQuit("nthreads must be non-negative.")
+} else if(nthreads == 0) {
+  multithread <- TRUE # detect and use all
+} else if(nthreads == 1) {
+  multithread <- FALSE
+} else {
+  multithread <- nthreads
+}
+
+### LOAD LIBRARIES ###
+suppressWarnings(library(methods))
+suppressWarnings(library(dada2))
+cat("DADA2:", as.character(packageVersion("dada2")), "/",
+    "Rcpp:", as.character(packageVersion("Rcpp")), "/",
+    "RcppParallel:", as.character(packageVersion("RcppParallel")), "\n")
+
+### Remove Primers ###
+if(primer.removed.dir!='NULL'){ #for CCS read analysis
+  cat("1) Removing Primers\n")
+  nop <- file.path(primer.removed.dir, basename(unfilts))
+
+  if(primerF == 'none' && primerR == 'none'){
+    nop <- unfilts
+  } else {
+    prim <- suppressWarnings(removePrimers(unfilts, nop, primer, dada2:::rc(primerR),
+                                           max.mismatch = maxMismatch, allow.indels = indels,
+                                           orient = TRUE, verbose = TRUE))
+    cat(ifelse(file.exists(nop), ".", "x"), sep="")
+    nop <- list.files(primer.removed.dir, pattern=".fastq.gz$", full.names=TRUE)
+    cat("\n")
+    if(length(nop) == 0) { # All reads were filtered out
+      errQuit("No reads passed the Removing Primers step  (Did you select the right primers?)", status=2)
+    }
+  }
+}
+
+### TRIM AND FILTER ###
+cat("2) Filtering ")
+if(primer.removed.dir!='NULL'){ #for CCS read analysis
+  filts <- file.path(filtered.dir, basename(nop))
+  out <- suppressWarnings(filterAndTrim(nop, filts, truncLen = truncLen, trimLeft = trimLeft,
+                                        maxEE = maxEE, truncQ = truncQ, rm.phix = FALSE,
+                                        multithread = multithread, maxLen = maxLen, minLen = minLen, minQ = 0))
+}else{
+  filts <- file.path(filtered.dir, basename(unfilts))
+  if(inp.dirR!='NULL'){#for paired read analysis
+    filtsR <- file.path(filtered.dirR, basename(unfiltsR))
+    out <- suppressWarnings(filterAndTrim(unfilts, filts, unfiltsR, filtsR,
+                                          truncLen=c(truncLen, truncLenR), trimLeft=c(trimLeft, trimLeftR),
+                                          maxEE=c(maxEE, maxEER), truncQ=truncQ, rm.phix=TRUE,
+                                          multithread=multithread))
+  }else{#for sinlge/pyro read analysis
+    out <- suppressWarnings(filterAndTrim(unfilts, filts, truncLen=truncLen, trimLeft=trimLeft,
+                                          maxEE=maxEE, truncQ=truncQ, rm.phix=TRUE,
+                                          multithread=multithread, maxLen=maxLen))
+  }
+}
+
+cat(ifelse(file.exists(filts), ".", "x"), sep="")
+filts <- list.files(filtered.dir, pattern=".fastq.gz$", full.names=TRUE)
+
+if(inp.dirR!='NULL'){
+  filtsR <- list.files(filtered.dirR, pattern=".fastq.gz$", full.names=TRUE)
+}
+cat("\n")
+if(length(filts) == 0) { # All reads were filtered out
+  errQuit("No reads passed the filter (was truncLen longer than the read length?)", status=2)
+}
+
+### LEARN ERROR RATES ###
+# Dereplicate enough samples to get nreads.learn total reads
+cat("3) Learning Error Rates\n")
+if(primer.removed.dir!='NULL'){#for CCS read analysis
+  err <- suppressWarnings(learnErrors(filts, nreads=nreads.learn,
+                                      errorEstimationFunction=dada2:::PacBioErrfun,
+                                      multithread=multithread, BAND_SIZE=BAND_SIZE))
+  err_plot <- plotErrors(err)
+  pdf("plot_error_model.pdf", width=12, height=8, useDingbats=FALSE)
+  err_plot
+  dev.off()
+
+}else if(inp.dirR!='NULL'){#for paired read analysis
+
+  err <- suppressWarnings(learnErrors(filts, nreads=nreads.learn, multithread=multithread))
+  errR <- suppressWarnings(learnErrors(filtsR, nreads=nreads.learn, multithread=multithread))
+
+}else{#for sinlge/pyro read analysis
+  err <- suppressWarnings(learnErrors(filts, nreads=nreads.learn, multithread=multithread,
+                                      HOMOPOLYMER_GAP_PENALTY=HOMOPOLYMER_GAP_PENALTY, BAND_SIZE=BAND_SIZE))
+}
+
+### PROCESS ALL SAMPLES ###
+# Loop over rest in streaming fashion with learned error rates
+
+if(inp.dirR =='NULL'){#for CCS/sinlge/pyro read analysis
+  dds <- vector("list", length(filts))
+  cat("4) Denoise samples ")
+  cat("\n")
+  for(j in seq(length(filts))) {
+    drp <- derepFastq(filts[[j]])
+    dds[[j]] <- dada(drp, err=err, multithread=multithread,HOMOPOLYMER_GAP_PENALTY=HOMOPOLYMER_GAP_PENALTY,
+                     BAND_SIZE=BAND_SIZE, verbose=FALSE)
+    cat(".")
+  }
+  cat("\n")
+  if(poolMethod == "pseudo") {
+    cat("  Pseudo-pool step ")
+    ### TEMPORARY, to be removed once 1.12 makes its way to Q2
+    ### Needed for now to manage pseudo-pooling memory, as 1.10 didn't do this appropriately.
+    ### pseudo_priors code copied from dada2.R
+    st <- makeSequenceTable(dds)
+    pseudo_priors <- colnames(st)[colSums(st>0) >= 2 | colSums(st) >= Inf]
+    rm(st)
+    ### \pseudo_priors code copied from dada2.R
+    ### code copied from previous loop through samples in this script
+    for(j in seq(length(filts))) {
+      drp <- derepFastq(filts[[j]])
+      dds[[j]] <- dada(drp, err=err, multithread=multithread,
+                       priors=pseudo_priors, HOMOPOLYMER_GAP_PENALTY=HOMOPOLYMER_GAP_PENALTY,
+                       BAND_SIZE=BAND_SIZE, verbose=FALSE)
+      cat(".")
+    }
+    cat("\n")
+    ### \code copied from previous loop through samples in this script
+  }
+  # Make sequence table
+  seqtab <- makeSequenceTable(dds)
+}else{#for paired read analysis
+  denoisedF <- rep(0, length(filts))
+  ddsF <- vector("list", length(filts))
+  ddsR <- vector("list", length(filts))
+  mergers <- vector("list", length(filts))
+  cat("3) Denoise samples ")
+
+  for(j in seq(length(filts))) {
+    drpF <- derepFastq(filts[[j]])
+    ddsF[[j]] <- dada(drpF, err=err, multithread=multithread, verbose=FALSE)
+    drpR <- derepFastq(filtsR[[j]])
+    ddsR[[j]] <- dada(drpR, err=errR, multithread=multithread, verbose=FALSE)
+    cat(".")
+  }
+  cat("\n")
+  if(poolMethod == "pseudo") {
+    cat("  Pseudo-pool step ")
+    ### TEMPORARY, to be removed once 1.12 makes its way to Q2
+    ### Needed for now to manage pseudo-pooling memory, as 1.10 didn't do this appropriately.
+    ### pseudo_priors code copied from dada2.R
+    stF <- makeSequenceTable(ddsF)
+    pseudo_priorsF <- colnames(stF)[colSums(stF>0) >= 2 | colSums(stF) >= Inf]
+    rm(stF)
+    stR <- makeSequenceTable(ddsR)
+    pseudo_priorsR <- colnames(stR)[colSums(stR>0) >= 2 | colSums(stR) >= Inf]
+    rm(stR)
+    ### \pseudo_priors code copied from dada2.R
+    ### code copied from previous loop through samples in this script
+    for(j in seq(length(filts))) {
+      drpF <- derepFastq(filts[[j]])
+      ddsF[[j]] <- dada(drpF, err=err, priors=pseudo_priorsF,
+                        multithread=multithread, verbose=FALSE)
+      drpR <- derepFastq(filtsR[[j]])
+      ddsR[[j]] <- dada(drpR, err=errR, priors=pseudo_priorsR,
+                        multithread=multithread, verbose=FALSE)
+      cat(".")
+    }
+    cat("\n")
+    ### \code copied from previous loop through samples in this script
+  }
+
+  ### Now loop through and do merging
+  for(j in seq(length(filts))) {
+    drpF <- derepFastq(filts[[j]])
+    drpR <- derepFastq(filtsR[[j]])
+    mergers[[j]] <- mergePairs(ddsF[[j]], drpF, ddsR[[j]], drpR, minOverlap=minOverlap)
+    denoisedF[[j]] <- getN(ddsF[[j]])
+    cat(".")
+  }
+  cat("\n")
+  # Make sequence table
+  seqtab <- makeSequenceTable(mergers)
+
+}
+
+
+### Remove chimeras
+cat("5) Remove chimeras (method = ", chimeraMethod, ")\n", sep="")
+if(chimeraMethod %in% c("pooled", "consensus")) {
+  seqtab.nochim <- removeBimeraDenovo(seqtab, method=chimeraMethod, minFoldParentOverAbundance=minParentFold, allowOneOff=allowOneOff, multithread=multithread)
+} else { # No chimera removal, copy seqtab to seqtab.nochim
+  seqtab.nochim <- seqtab
+}
+
+### REPORT READ FRACTIONS THROUGH PIPELINE ###
+cat("6) Report read numbers through the pipeline\n")
+if(inp.dirR =='NULL'){
+  # Handle edge cases: Samples lost in filtering; One sample
+  if(primer.removed.dir!='NULL'){ #for CCS read analysis
+    if (primerF == 'none' && primerR == 'none'){
+      track <- cbind(out[ ,1:2,drop=FALSE], matrix(0, nrow=nrow(out), ncol=2))
+      colnames(track) <- c("input", "filtered", "denoised", "non-chimeric")
+      passed.filtering <- track[,"filtered"] > 0
+      track[passed.filtering,"denoised"] <- rowSums(seqtab)
+      track[passed.filtering,"non-chimeric"] <- rowSums(seqtab.nochim)
+      write.table(track, out.track, sep="\t", row.names=TRUE, col.names=NA,
+                  quote=FALSE)
+    } else {
+      track <- cbind(prim,out[ ,2], matrix(0, nrow=nrow(out), ncol=2))
+      colnames(track) <- c("input", "primer-removed","filtered", "denoised", "non-chimeric")
+      passed.filtering <- track[,"filtered"] > 0
+      track[passed.filtering,"denoised"] <- rowSums(seqtab)
+      track[passed.filtering,"non-chimeric"] <- rowSums(seqtab.nochim)
+      write.table(track, out.track, sep="\t", row.names=TRUE, col.names=NA,
+                  quote=FALSE)
+    }
+  }else{#for paired end reads
+    # Handle edge cases: Samples lost in filtering; One sample
+    track <- cbind(out, matrix(0, nrow=nrow(out), ncol=3))
+    colnames(track) <- c("input", "filtered", "denoised", "merged", "non-chimeric")
+    passed.filtering <- track[,"filtered"] > 0
+    track[passed.filtering,"denoised"] <- denoisedF
+    track[passed.filtering,"merged"] <- rowSums(seqtab)
+    track[passed.filtering,"non-chimeric"] <- rowSums(seqtab.nochim)
+    write.table(track, out.track, sep="\t", row.names=TRUE, col.names=NA,
+                quote=FALSE)
+  }
+}
+
+### WRITE OUTPUT AND QUIT ###
+# Formatting as tsv plain-text sequence table table
+cat("7) Write output\n")
+seqtab.nochim <- t(seqtab.nochim) # QIIME has OTUs as rows
+col.names <- basename(filts)
+col.names[[1]] <- paste0("#OTU ID\t", col.names[[1]])
+write.table(seqtab.nochim, out.path, sep="\t",
+            row.names=TRUE, col.names=col.names, quote=FALSE)
+saveRDS(seqtab.nochim, "seqtab_nochim.rds") ### TESTING
+
+q(status=0)
diff --git a/scripts/visualize_biom.Rmd b/scripts/visualize_biom.Rmd
index 5feefdd..4a408f7 100644
--- a/scripts/visualize_biom.Rmd
+++ b/scripts/visualize_biom.Rmd
@@ -220,6 +220,8 @@ dada2_qc <- dada2_qc %>%
 * Missing samples (Not enough reads, do not pass QC, etc): `r removed_sample`
 * Total number of CCS reads before filtering and primers trimming: `r nrow(reads_qc)`
 * Was primers trimmed prior to DADA2? `r paste0(" ", trim_cutadapt)`
+* Note that if downsampling was enabled, the reads will be filtered first, then restricted
+  to a maximum of N reads.
 * Total number of reads after quality filtering: `r if (skip_cutadapt != "Yes") {sum(all_read_count$input_reads)} else {sum(as.numeric(dada2_qc$input))}` (`r if (skip_cutadapt != "Yes") {paste0(round(sum(all_read_count$input_reads)/nrow(reads_qc), 4)*100, "%")} else {paste0(round(sum(as.numeric(dada2_qc$input))/nrow(reads_qc), 4)*100, "%")}`)
 * Total number of reads after primers trimming (DADA2 input): `r if (skip_cutadapt != "Yes") {sum(all_read_count$demuxed_reads)} else {paste0("Skipped cutadapt trimming")}` (`r if (skip_cutadapt != "Yes") {paste0(round(sum(all_read_count$demuxed_reads)/sum(all_read_count$input_reads), 4)*100, "%")} else {paste0("Skipped cutadapt trimming")}`)
 * Total number of ASVs found: `r total_asv`