From b021738258f4586dd84cb6ab4a60853258bb52e6 Mon Sep 17 00:00:00 2001 From: misialq Date: Fri, 29 Nov 2024 16:05:23 +0000 Subject: [PATCH] deploy: 71da24a8ea2d0a794c9971bb88ec7f33c2433a46 --- .../02_mag_reconstruction/reconstruction.md | 1 + .../chapters/05_interoperability/export.md | 53 ++ .../chapters/05_interoperability/import.md | 89 +++ .../chapters/05_interoperability/intro.md | 18 + chapters/00_data_retrieval.html | 5 + chapters/00_setup.html | 5 + chapters/01_filtering/host-filtering.html | 5 + chapters/01_filtering/intro.html | 5 + chapters/01_filtering/quality-filtering.html | 5 + chapters/02_mag_reconstruction/abundance.html | 5 + .../02_mag_reconstruction/dereplication.html | 5 + chapters/02_mag_reconstruction/intro.html | 5 + .../02_mag_reconstruction/reconstruction.html | 7 +- .../03_taxonomic_classification/intro.html | 5 + .../03_taxonomic_classification/mags.html | 5 + .../03_taxonomic_classification/reads.html | 5 + chapters/04_functional_annotation/intro.html | 5 + chapters/04_functional_annotation/mags.html | 15 + chapters/05_interoperability/export.html | 500 ++++++++++++++++ chapters/05_interoperability/import.html | 549 ++++++++++++++++++ chapters/05_interoperability/intro.html | 447 ++++++++++++++ genindex.html | 5 + intro.html | 7 +- objects.inv | Bin 717 -> 832 bytes search.html | 5 + searchindex.js | 2 +- 26 files changed, 1755 insertions(+), 3 deletions(-) create mode 100644 _sources/chapters/05_interoperability/export.md create mode 100644 _sources/chapters/05_interoperability/import.md create mode 100644 _sources/chapters/05_interoperability/intro.md create mode 100644 chapters/05_interoperability/export.html create mode 100644 chapters/05_interoperability/import.html create mode 100644 chapters/05_interoperability/intro.html diff --git a/_sources/chapters/02_mag_reconstruction/reconstruction.md b/_sources/chapters/02_mag_reconstruction/reconstruction.md index 8670ef9..b087c0c 100644 --- a/_sources/chapters/02_mag_reconstruction/reconstruction.md +++ b/_sources/chapters/02_mag_reconstruction/reconstruction.md @@ -11,6 +11,7 @@ kernelspec: language: python name: python3 --- +(mag-recovery)= # Recovery of MAGs In this part of the tutorial we will go thorugh the steps required to recover metagenome-assembled genomes (MAGs) from metagenomic data. The workflow is divided into several steps, from contig assembly to binning and quality control. diff --git a/_sources/chapters/05_interoperability/export.md b/_sources/chapters/05_interoperability/export.md new file mode 100644 index 0000000..1348713 --- /dev/null +++ b/_sources/chapters/05_interoperability/export.md @@ -0,0 +1,53 @@ +--- +jupytext: + formats: md:myst + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.5 +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- +(data-export)= +# Exporting data and connecting with other tools +QIIME 2 offers various ways of visualizing and processing your data further, but sometimes you may want to use other tools +that are not (yet) available through QIIME 2. This is, of course, possible and very easy to do: you can export your data +from any QIIME 2 artifact and use it with any of your other favourite tools, as long as the underlying format is compatible. +The formats that QIIME 2 supports are common and should be readable by most bioinformatics tools - most of the time, the +artifacts will contain data in the original format that the underlying tool uses. Below are some examples of how you can +export data from QIIME 2 and connect it with other tools. + +```{warning} +QIIME 2 does not yet support exporting data from the cache. This means that you will need to manually copy the data from the +cache directory to a location where you can access it with other tools. In our examples, the cache directory is located directly +in the working directory and that is where we will copy the data from. Keep in mind that you should never temper with the files +in the cache directory directly, as this may lead to broken artifacts and failed analyses. +``` + +## Visualizing Kraken 2 reports with Pavian +If you have used Kraken 2 to [classify your reads](kraken-reads), you can export the resulting reports from the corresponding +QIIME 2 artifact and visualize them with [Pavian](https://github.com/fbreitwieser/pavian) which will allow you to explore the +taxonomic composition of your samples in an interactive way. To export the Kraken 2 reports, you can use the following commands: +```bash +UUID=$(cat ./cache/keys/kraken_reports_reads | grep 'data' | awk '{print $2}') +mkdir exported_reports +cp -r ./cache/data/$UUID/data/* exported_reports/ +``` +This will find the UUID of the reports artifact, use it to locate the data within the cache directory, create a directory +for the exported data and copy the files from the cache into it. You can then use those files (within the `exported_reports` +directory) with Pavian. To give it a quick try, navigate to [Pavian's demo site](https://fbreitwieser.shinyapps.io/pavian/) +and upload the exported files. + +## Microbial pangenomics with Anvi'o +Another suite of tools you may be familiar with is the [Anvi'o](http://anvio.org/) platform. One of the workflows that Anvi'o +provides is the microbial pangenomics analysis, which can be used to explore the gene clusters within your samples. You +could export the MAGs obtained from the [binning step](mag-recovery) and use them as input to the `anvi-pan-genome` workflow, as +described [here](https://merenlab.org/2016/11/08/pangenomics-v2/). To export the MAGs, you can use the following command: +```bash +UUID=$(cat ./cache/keys/mags | grep 'data' | awk '{print $2}') +mkdir exported_mags +cp -r ./cache/data/$UUID/data/* exported_mags/ +``` diff --git a/_sources/chapters/05_interoperability/import.md b/_sources/chapters/05_interoperability/import.md new file mode 100644 index 0000000..0461c5f --- /dev/null +++ b/_sources/chapters/05_interoperability/import.md @@ -0,0 +1,89 @@ +--- +jupytext: + formats: md:myst + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.5 +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- +(data-import)= +# Importing data from other tools +The MOSHPIT pipeline allows you to start working directly with the NGS reads, which you can take through various analysis, +like contig assembly, binning, and annotation. However, if you have already performed some of these steps outside of QIIME 2, +you can import the results into an appropriate QIIME 2 artifact and continue from there. Below you can see some examples and +use cases where this may be relevant. + +## Working with exisiting contigs +In case you already have contigs assembled from your metagenomic data, you can import them into a `SampleData[Contigs]` +artifact. This should not differ much from the typical import process (see [here](https://docs.qiime2.org/2024.10/tutorials/importing/) +for more details on importing data), but the command may look like: +```bash +qiime tools cache-import \ + --cache ./cache \ + --key contigs \ + --type "SampleData[Contigs]" \ + --input-path ./ +``` +Some actions in the MOSHPIT pipeline assume that contig IDs are unique across your entire sample set. If this is not the case, +you may use the `qiime assembly rename-contigs` action to rename contigs with unique identifiers: +```bash +qiime assembly rename-contigs \ + --i-contigs ./cache:contigs \ + --p-uuid-type shortuuid \ + --o-renamed-contigs ./cache:contigs_renamed +``` +From here, you should be able to continue with the rest of the MOSHPIT pipeline as described in our tutorials. + +## Working with existing MAGs +You may also be interested in continuing your analysis with MAGs that you have already recovered using other tools. +In this case, you can import the MAGs into a `SampleData[MAGs]` (non-dereplicated) or `FeatureData[MAG]` (dereplicated) +artifact. Before you do that, you will need to rename each MAG's FASTA file using the [UUID4](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) +format: this is required to ensure that MAG IDs are unique across your entire sample set. Here is a sample Python script +which could be used for that purpose: +```python +import os +from uuid import uuid4 +path = 'path/to/your/mag/directory/' + +for file in os.listdir(path): + os.rename(os.path.join(path, file), os.path.join(path, f'{uuid4()}.fa'))) +``` +Once you have renamed the MAGs, you can import them into a QIIME 2 artifact: +```bash +qiime tools cache-import \ + --cache ./cache \ + --key mags \ + --type "SampleData[MAGs]" \ + --input-path ./ +``` +for MAGs-per-sample, or: +```bash +qiime tools cache-import \ + --cache ./cache \ + --key mags \ + --type "FeatureData[MAG]" \ + --input-path ./ +``` +for dereplicated MAGs. From here, you should be able to continue with the rest of the MOSHPIT pipeline as described in our tutorials. + +## Importing other data +If you have other data that you would like to import into QIIME 2, you can use the `qiime tools cache-import` command - no +additional steps should be required. For example, you can import a set of Kraken 2 reports into a `SampleData[Kraken2Report % Properties('reads')]` +like this: +```bash +qiime tools cache-import \ + --cache ./cache \ + --key kraken2_reports_reads \ + --type "SampleData[Kraken2Report % reads]" \ + --input-path ./ +``` + +```{note} +Remember: you can import any existing data into QIIME 2 artifacts, as long as it matches the format required by the respective +QIIME 2 semantic type. +``` diff --git a/_sources/chapters/05_interoperability/intro.md b/_sources/chapters/05_interoperability/intro.md new file mode 100644 index 0000000..39cf3d0 --- /dev/null +++ b/_sources/chapters/05_interoperability/intro.md @@ -0,0 +1,18 @@ +--- +jupytext: + formats: md:myst + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.5 +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- +(interoperability)= +# Interoperability with other tools +While most of the typical steps in a metagenomic analysis can be performed within QIIME 2, there are cases where you +might want to use other tools to perform certain tasks. In this chapter, we will show you how you can get some data in +and out of the QIIME 2 artifacts to continue your analysis workflow elsewhere. diff --git a/chapters/00_data_retrieval.html b/chapters/00_data_retrieval.html index 902773f..54e8390 100644 --- a/chapters/00_data_retrieval.html +++ b/chapters/00_data_retrieval.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/00_setup.html b/chapters/00_setup.html index 00da078..0368be6 100644 --- a/chapters/00_setup.html +++ b/chapters/00_setup.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/01_filtering/host-filtering.html b/chapters/01_filtering/host-filtering.html index 51d2828..2b0e73f 100644 --- a/chapters/01_filtering/host-filtering.html +++ b/chapters/01_filtering/host-filtering.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/01_filtering/intro.html b/chapters/01_filtering/intro.html index 2eb93b7..6a80921 100644 --- a/chapters/01_filtering/intro.html +++ b/chapters/01_filtering/intro.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/01_filtering/quality-filtering.html b/chapters/01_filtering/quality-filtering.html index f082263..93ec596 100644 --- a/chapters/01_filtering/quality-filtering.html +++ b/chapters/01_filtering/quality-filtering.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/02_mag_reconstruction/abundance.html b/chapters/02_mag_reconstruction/abundance.html index 636d650..8bbcd2d 100644 --- a/chapters/02_mag_reconstruction/abundance.html +++ b/chapters/02_mag_reconstruction/abundance.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/02_mag_reconstruction/dereplication.html b/chapters/02_mag_reconstruction/dereplication.html index 54d2738..f038461 100644 --- a/chapters/02_mag_reconstruction/dereplication.html +++ b/chapters/02_mag_reconstruction/dereplication.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/02_mag_reconstruction/intro.html b/chapters/02_mag_reconstruction/intro.html index 2037ae6..f1acbfe 100644 --- a/chapters/02_mag_reconstruction/intro.html +++ b/chapters/02_mag_reconstruction/intro.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/02_mag_reconstruction/reconstruction.html b/chapters/02_mag_reconstruction/reconstruction.html index 8f53827..1db870e 100644 --- a/chapters/02_mag_reconstruction/reconstruction.html +++ b/chapters/02_mag_reconstruction/reconstruction.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • @@ -343,7 +348,7 @@

    Contents

    -

    Recovery of MAGs#

    +

    Recovery of MAGs#

    In this part of the tutorial we will go thorugh the steps required to recover metagenome-assembled genomes (MAGs) from metagenomic data. The workflow is divided into several steps, from contig assembly to binning and quality control.

    diff --git a/chapters/03_taxonomic_classification/intro.html b/chapters/03_taxonomic_classification/intro.html index 463e1db..5533241 100644 --- a/chapters/03_taxonomic_classification/intro.html +++ b/chapters/03_taxonomic_classification/intro.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/03_taxonomic_classification/mags.html b/chapters/03_taxonomic_classification/mags.html index 40d96b6..95729ff 100644 --- a/chapters/03_taxonomic_classification/mags.html +++ b/chapters/03_taxonomic_classification/mags.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/03_taxonomic_classification/reads.html b/chapters/03_taxonomic_classification/reads.html index 6d60ac8..b2bff90 100644 --- a/chapters/03_taxonomic_classification/reads.html +++ b/chapters/03_taxonomic_classification/reads.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/04_functional_annotation/intro.html b/chapters/04_functional_annotation/intro.html index ad36490..191c047 100644 --- a/chapters/04_functional_annotation/intro.html +++ b/chapters/04_functional_annotation/intro.html @@ -184,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/chapters/04_functional_annotation/mags.html b/chapters/04_functional_annotation/mags.html index 096249f..9282767 100644 --- a/chapters/04_functional_annotation/mags.html +++ b/chapters/04_functional_annotation/mags.html @@ -64,6 +64,7 @@ + @@ -183,6 +184,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • @@ -529,6 +535,15 @@

    Let’s have a look at our CAZymes functional diversity!Functional annotation

    + +
    +

    next

    +

    Interoperability with other tools

    +
    + +
    diff --git a/chapters/05_interoperability/export.html b/chapters/05_interoperability/export.html new file mode 100644 index 0000000..5662833 --- /dev/null +++ b/chapters/05_interoperability/export.html @@ -0,0 +1,500 @@ + + + + + + + + + + + Exporting data and connecting with other tools — MOSHPIT plugin suite + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + +
    + +
    + + + + + +
    +
    + + + + + + +
    + + + +
    + +
    +
    + +
    +
    + +
    + +
    + +
    + + +
    + +
    + +
    + + + + + + + + + + + + + + + + + + + +
    + +
    + +
    +
    + + + +
    +

    Exporting data and connecting with other tools

    + +
    + +
    +
    + + + + +
    + +
    +

    Exporting data and connecting with other tools#

    +

    QIIME 2 offers various ways of visualizing and processing your data further, but sometimes you may want to use other tools +that are not (yet) available through QIIME 2. This is, of course, possible and very easy to do: you can export your data +from any QIIME 2 artifact and use it with any of your other favourite tools, as long as the underlying format is compatible. +The formats that QIIME 2 supports are common and should be readable by most bioinformatics tools - most of the time, the +artifacts will contain data in the original format that the underlying tool uses. Below are some examples of how you can +export data from QIIME 2 and connect it with other tools.

    +
    +

    Warning

    +

    QIIME 2 does not yet support exporting data from the cache. This means that you will need to manually copy the data from the +cache directory to a location where you can access it with other tools. In our examples, the cache directory is located directly +in the working directory and that is where we will copy the data from. Keep in mind that you should never temper with the files +in the cache directory directly, as this may lead to broken artifacts and failed analyses.

    +
    +
    +

    Visualizing Kraken 2 reports with Pavian#

    +

    If you have used Kraken 2 to classify your reads, you can export the resulting reports from the corresponding +QIIME 2 artifact and visualize them with Pavian which will allow you to explore the +taxonomic composition of your samples in an interactive way. To export the Kraken 2 reports, you can use the following commands:

    +
    UUID=$(cat ./cache/keys/kraken_reports_reads | grep 'data' | awk '{print $2}')
    +mkdir exported_reports
    +cp -r ./cache/data/$UUID/data/* exported_reports/
    +
    +
    +

    This will find the UUID of the reports artifact, use it to locate the data within the cache directory, create a directory +for the exported data and copy the files from the cache into it. You can then use those files (within the exported_reports +directory) with Pavian. To give it a quick try, navigate to Pavian’s demo site +and upload the exported files.

    +
    +
    +

    Microbial pangenomics with Anvi’o#

    +

    Another suite of tools you may be familiar with is the Anvi’o platform. One of the workflows that Anvi’o +provides is the microbial pangenomics analysis, which can be used to explore the gene clusters within your samples. You +could export the MAGs obtained from the binning step and use them as input to the anvi-pan-genome workflow, as +described here. To export the MAGs, you can use the following command:

    +
    UUID=$(cat ./cache/keys/mags | grep 'data' | awk '{print $2}')
    +mkdir exported_mags
    +cp -r ./cache/data/$UUID/data/* exported_mags/
    +
    +
    +
    +
    + + + + +
    + + + + + + + + +
    + + + + + + + +
    +
    + + +
    + + +
    +
    +
    + + + + + +
    +
    + + \ No newline at end of file diff --git a/chapters/05_interoperability/import.html b/chapters/05_interoperability/import.html new file mode 100644 index 0000000..ac46ed1 --- /dev/null +++ b/chapters/05_interoperability/import.html @@ -0,0 +1,549 @@ + + + + + + + + + + + Importing data from other tools — MOSHPIT plugin suite + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + +
    + +
    + + + + + +
    +
    + + + + + + +
    + + + +
    + +
    +
    + +
    +
    + +
    + +
    + +
    + + +
    + +
    + +
    + + + + + + + + + + + + + + + + + + + +
    + +
    + +
    +
    + + + +
    +

    Importing data from other tools

    + +
    + +
    +
    + + + + +
    + +
    +

    Importing data from other tools#

    +

    The MOSHPIT pipeline allows you to start working directly with the NGS reads, which you can take through various analysis, +like contig assembly, binning, and annotation. However, if you have already performed some of these steps outside of QIIME 2, +you can import the results into an appropriate QIIME 2 artifact and continue from there. Below you can see some examples and +use cases where this may be relevant.

    +
    +

    Working with exisiting contigs#

    +

    In case you already have contigs assembled from your metagenomic data, you can import them into a SampleData[Contigs] +artifact. This should not differ much from the typical import process (see here +for more details on importing data), but the command may look like:

    +
    qiime tools cache-import \
    +    --cache ./cache \
    +    --key contigs \
    +    --type "SampleData[Contigs]" \
    +    --input-path ./<directory with contig FASTA files>
    +
    +
    +

    Some actions in the MOSHPIT pipeline assume that contig IDs are unique across your entire sample set. If this is not the case, +you may use the qiime assembly rename-contigs action to rename contigs with unique identifiers:

    +
    qiime assembly rename-contigs \
    +    --i-contigs ./cache:contigs \
    +    --p-uuid-type shortuuid \
    +    --o-renamed-contigs ./cache:contigs_renamed
    +
    +
    +

    From here, you should be able to continue with the rest of the MOSHPIT pipeline as described in our tutorials.

    +
    +
    +

    Working with existing MAGs#

    +

    You may also be interested in continuing your analysis with MAGs that you have already recovered using other tools. +In this case, you can import the MAGs into a SampleData[MAGs] (non-dereplicated) or FeatureData[MAG] (dereplicated) +artifact. Before you do that, you will need to rename each MAG’s FASTA file using the UUID4 +format: this is required to ensure that MAG IDs are unique across your entire sample set. Here is a sample Python script +which could be used for that purpose:

    +
    import os
    +from uuid import uuid4
    +path = 'path/to/your/mag/directory/'
    +
    +for file in os.listdir(path):
    +    os.rename(os.path.join(path, file), os.path.join(path, f'{uuid4()}.fa')))
    +
    +
    +

    Once you have renamed the MAGs, you can import them into a QIIME 2 artifact:

    +
    qiime tools cache-import \
    +    --cache ./cache \
    +    --key mags \
    +    --type "SampleData[MAGs]" \
    +    --input-path ./<directory with MAG FASTA files per sample>
    +
    +
    +

    for MAGs-per-sample, or:

    +
    qiime tools cache-import \
    +    --cache ./cache \
    +    --key mags \
    +    --type "FeatureData[MAG]" \
    +    --input-path ./<directory with MAG FASTA files>
    +
    +
    +

    for dereplicated MAGs. From here, you should be able to continue with the rest of the MOSHPIT pipeline as described in our tutorials.

    +
    +
    +

    Importing other data#

    +

    If you have other data that you would like to import into QIIME 2, you can use the qiime tools cache-import command - no +additional steps should be required. For example, you can import a set of Kraken 2 reports into a SampleData[Kraken2Report % Properties('reads')] +like this:

    +
    qiime tools cache-import \
    +    --cache ./cache \
    +    --key kraken2_reports_reads \
    +    --type "SampleData[Kraken2Report % reads]" \
    +    --input-path ./<directory with Kraken 2 reports>
    +
    +
    +
    +

    Note

    +

    Remember: you can import any existing data into QIIME 2 artifacts, as long as it matches the format required by the respective +QIIME 2 semantic type.

    +
    +
    +
    + + + + +
    + + + + + + + + +
    + + + + + + + +
    +
    + + +
    + + +
    +
    +
    + + + + + +
    +
    + + \ No newline at end of file diff --git a/chapters/05_interoperability/intro.html b/chapters/05_interoperability/intro.html new file mode 100644 index 0000000..44ce4fd --- /dev/null +++ b/chapters/05_interoperability/intro.html @@ -0,0 +1,447 @@ + + + + + + + + + + + Interoperability with other tools — MOSHPIT plugin suite + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + +
    + +
    + + + + + +
    +
    + + + + + + +
    + + + +
    + +
    +
    + +
    +
    + +
    + +
    + +
    + + +
    + +
    + +
    + + + + + + + + + + + + + + + + + + + +
    + +
    + +
    +
    + + + +
    +

    Interoperability with other tools

    + +
    +
    + +
    +
    +
    + + + + +
    + +
    +

    Interoperability with other tools#

    +

    While most of the typical steps in a metagenomic analysis can be performed within QIIME 2, there are cases where you +might want to use other tools to perform certain tasks. In this chapter, we will show you how you can get some data in +and out of the QIIME 2 artifacts to continue your analysis workflow elsewhere.

    +
    +
    +
    + + + + +
    + + + + + + + + +
    + + + + +
    +
    + + +
    + + +
    +
    +
    + + + + + +
    +
    + + \ No newline at end of file diff --git a/genindex.html b/genindex.html index 5923ecd..028096d 100644 --- a/genindex.html +++ b/genindex.html @@ -183,6 +183,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/intro.html b/intro.html index 24a0859..0f7f099 100644 --- a/intro.html +++ b/intro.html @@ -185,6 +185,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • @@ -353,7 +358,7 @@

    MOSHPIT tutorialSetup and Data retrieval). Then, we will move to quality control and filtering of the raw reads (see Quality control). Once we have our clean dataset, we can start by recovering metagenome-assembled -genomes (MAGs) (see here), followed by taxonomic classification of reads and MAGs themselves (see +genomes (MAGs) (see here), followed by taxonomic classification of reads and MAGs themselves (see Taxonomic classification). Finally, we will estimate perform functional annotation of the dereplicated MAGs (see Functional annotation).

    Let’s dive in!

    diff --git a/objects.inv b/objects.inv index 0b38c526e85a01b2666e35b916e993d5d551d450..dd94be98f3718dfb0225ac2159b3f02a54239eec 100644 GIT binary patch delta 713 zcmV;)0yh241;7T7gMXGybK4*ghVS_m%(Pe4Nk1;VWu~o@gQsqjy9P_hVvK|eD7OE7 zS0Gs+BVne!cx2e!=Vh0V%}U%TBIN4+KFzVhsUS*F@`f3bD*u}E3?&aJo3;8-s*0g+ zForH;cec4Cs%_?C=}8mfrTerzAJT#{_@woIRdT74%U^%6d4GEo{|UxGBF^Eb;;u^D zm)1(~FhZM|$iG_5s5+yJcVlyL{}`*7Wn8(qyvwjf{FqjF57hxJQi(QGl-Db~YwH}> z8Sx{ReGGH+xJF|bA{DK0XBR@}M39EjOdChw9lucm>=)!rh4>5AoVPc;-!St=#2N7d zZHU5qQuB%=Sbs`V?HI|?S`WyO1ov-*7Wc15)Z^NQe@YcTagao_G-Du>7JZVIvJmnz z*w?`Zxi+`qVQn&+-Z?G9<8sg){dg7HD=b5sSvo$aMO*8#VV2^$<|G9~ zO66@xE17@{rBgH9zQcp@b{-IY(tBLLIG$KHzBv%x|LTmVCK*q~94vgYf#W}C@aE7u zCnq=bFn>C18~D7NmLR5BwvO;7yxQN^7T)}PdD=d|BGoD`C{*Jh@PX9Hc=>uH-E{qe zwD1KCMNP!;LdKNd^V9aneO?kPuePKJp-3p{Tb=?i1c2Hdu1Rl;f<@1X_kd&{{+ z`8!is!n8M)1dFV6!a#{{J6DG=MB(x7ik8f%dY$S|-h*2h(Thyu|v~JdWJIQ0zIxUG!_j delta 597 zcmV-b0;>JM2F(SKgMU^_kJ}&+zUNmUwO5+mwx`}!s+!$H)U>;Ii(nk1U`PS)uK#_< z#xD|VY_%5)4>R+9^MHZoF%7WcnlXVd!5|9>OMfFeoZQz)P1*p^GH zRUD#GbDn>dL~vUns%qJk-#<=kO&B*OK3*>#A%0E@a)Rmr7h`ovEmv|&j-^bAq$~_M zZ{slc4?8rtVaD=;)Os=KlxcPmoHoiU;61y60`v>=wqp1*)tuIMc)wximFWtp4DFdE zCnijb0nggRjd$ee~C}XsqHi8nqFRuSr(QrfftcL`r3&g_t#Q41V+qXi!*y zI-UPHi#g1C24H_u2y$d%!L0c3@p@X^-Vp&`3vUVV9)G?khB)yM#}rt+7Jw7Z4BQf2 zrtIB!-AP?epX%l2rWZz)-v4}lJUqQ1TUw<#l-Q6^6 z3gRVb@da9_G?24Pg}aS5DJm@mYqK}njhG!On?rD(bvIetkv$b-c;G{5*CXFBqG|5C zakB1l(@yLkAP1Y_K@>bP!;=Vb+@q*3E50h&`bO}gn7q~Bls;!dy&2q{19*d+{{Lw~ j9nc7|?!6hXc-z(I|7zw2VA>2-b6EeH#{u^*RF*plk#Z*I diff --git a/search.html b/search.html index 8ec659c..ec69d81 100644 --- a/search.html +++ b/search.html @@ -185,6 +185,11 @@
  • Dereplicated MAGs
  • +
  • Interoperability with other tools
    +
  • diff --git a/searchindex.js b/searchindex.js index b04e3ad..9cc9529 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"Annotate orthologs against eggNOG database": [[13, "annotate-orthologs-against-eggnog-database"]], "Approach 1: Kraken 2": [[11, "approach-1-kraken-2"]], "Approach 2: Kaiju": [[11, "approach-2-kaiju"]], "Assemble contigs with MEGAHIT": [[8, "assemble-contigs-with-megahit"]], "Bin contigs with MetaBAT": [[8, "bin-contigs-with-metabat"]], "Compare MinHash signatures": [[6, "compare-minhash-signatures"]], "Compute MinHash signatures with Sourmash": [[6, "compute-minhash-signatures-with-sourmash"]], "Contig QC with QUAST": [[8, "contig-qc-with-quast"]], "Data retrieval": [[0, null]], "Dereplicate MAGs": [[6, "dereplicate-mags"]], "EggNOG search using Diamond aligner": [[13, "eggnog-search-using-diamond-aligner"]], "Estimate MAG abundance": [[5, "estimate-mag-abundance"]], "Evaluate bins with BUSCO": [[8, "evaluate-bins-with-busco"]], "Extract annotations": [[13, "extract-annotations"]], "Filter MAGs": [[8, "filter-mags"]], "Functional annotation": [[12, null], [13, null]], "Get MAG lengths": [[5, "get-mag-lengths"]], "Host read removal": [[2, null]], "Human host reads": [[2, "human-host-reads"]], "Index contigs": [[8, "index-contigs"]], "Index dereplicated MAGs": [[5, "index-dereplicated-mags"]], "Kaiju: protein-based classification": [[9, "kaiju-protein-based-classification"]], "Kraken 2: DNA-to-DNA classification": [[9, "kraken-2-dna-to-dna-classification"]], "Let\u2019s have a look at our CAZymes functional diversity!": [[13, "let-s-have-a-look-at-our-cazymes-functional-diversity"]], "Let\u2019s have a look at our estimated MAG abundance!": [[5, "let-s-have-a-look-at-our-estimated-mag-abundance"]], "MAG abundance estimation": [[5, null]], "MAG set dereplication": [[6, null]], "MOSHPIT tutorial": [[14, null]], "Map reads to contigs": [[8, "map-reads-to-contigs"]], "Map reads to dereplicated MAGs": [[5, "map-reads-to-dereplicated-mags"]], "Multiply tables": [[13, "multiply-tables"]], "Note on parallelization": [[1, "note-on-parallelization"]], "Quality control": [[3, null]], "Quality filtering": [[4, null]], "Quality overview": [[4, "quality-overview"]], "Read trimming and quality filtering": [[4, "read-trimming-and-quality-filtering"]], "Read-based classification overview": [[9, "read-based-classification-overview"]], "Recovery of MAGs": [[8, null]], "Recovery of Metagenome-assembled Genomes": [[7, null]], "Removal of contaminating reads": [[2, "removal-of-contaminating-reads"]], "Required databases": [[13, "required-databases"]], "Setup": [[1, null]], "Taxonomic classification": [[9, null]], "Taxonomic classification of MAGs": [[10, null]], "Taxonomic classification of reads": [[11, null]], "Visualization": [[11, "visualization"]]}, "docnames": ["chapters/00_data_retrieval", "chapters/00_setup", "chapters/01_filtering/host-filtering", "chapters/01_filtering/intro", "chapters/01_filtering/quality-filtering", "chapters/02_mag_reconstruction/abundance", "chapters/02_mag_reconstruction/dereplication", "chapters/02_mag_reconstruction/intro", "chapters/02_mag_reconstruction/reconstruction", "chapters/03_taxonomic_classification/intro", "chapters/03_taxonomic_classification/mags", "chapters/03_taxonomic_classification/reads", "chapters/04_functional_annotation/intro", "chapters/04_functional_annotation/mags", "intro"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9}, "filenames": ["chapters/00_data_retrieval.md", "chapters/00_setup.md", "chapters/01_filtering/host-filtering.md", "chapters/01_filtering/intro.md", "chapters/01_filtering/quality-filtering.md", "chapters/02_mag_reconstruction/abundance.md", "chapters/02_mag_reconstruction/dereplication.md", "chapters/02_mag_reconstruction/intro.md", "chapters/02_mag_reconstruction/reconstruction.md", "chapters/03_taxonomic_classification/intro.md", "chapters/03_taxonomic_classification/mags.md", "chapters/03_taxonomic_classification/reads.md", "chapters/04_functional_annotation/intro.md", "chapters/04_functional_annotation/mags.md", "intro.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [1, 8, 14], "0": [5, 6, 10, 11, 13], "00": 1, "0001": 13, "1": [1, 10], "10": [5, 6], "100": [5, 8], "128": 8, "14": 1, "150": 11, "16": [8, 11, 13], "2": [0, 1, 2, 3, 4, 5, 7, 10, 12, 14], "200": 8, "2016": 9, "2019": 9, "2024": 1, "24": [1, 8], "31": 9, "35": 6, "40": 5, "42": 5, "4g": 1, "5": [0, 1, 5, 10, 11], "50": 8, "64": 8, "72": [10, 11], "8": [5, 8], "90": 4, "99": 6, "As": 3, "For": [5, 7, 9, 12], "In": [1, 2, 4, 5, 6, 8, 10, 11, 12, 13], "It": 8, "One": 9, "The": [0, 3, 5, 8, 9, 13, 14], "Then": [5, 13, 14], "There": [2, 5, 8], "These": [9, 14], "To": [0, 1, 4, 11], "__cit": 2, "abl": 8, "ablab": 7, "about": [1, 4], "abov": 2, "abund": [8, 11, 13], "access": 0, "accur": 9, "across": 13, "action": [0, 1, 2, 4, 11, 14], "activ": 1, "actual": 8, "ad": 12, "adapt": 3, "addit": 8, "addition": [11, 14], "address": [0, 3], "adjust": 11, "advantag": 9, "affect": 9, "against": [2, 8, 9], "al": 9, "algorithm": 9, "align": [5, 8, 9, 12], "all": [0, 1, 2, 4, 5, 8, 9, 13, 14], "allow": [2, 5, 6, 7, 9, 12, 13], "alreadi": 3, "also": [2, 6, 8, 10], "altern": [8, 13], "amrfinderplu": 14, "an": [0, 1, 4, 8], "analys": [5, 9], "analysi": [3, 8, 13, 14], "analyz": [8, 14], "ani": [0, 2, 3, 6, 8], "anim": 13, "annot": 14, "antimicrobi": 14, "applic": [6, 14], "approach": [2, 7, 10], "appropri": 8, "ar": [2, 4, 5, 6, 7, 8, 9, 12, 13, 14], "archaea": [8, 11], "archiv": 0, "arg": 14, "artifact": [0, 2, 5, 8, 10, 11, 14], "aspect": 14, "assembl": [1, 10, 12, 14], "assembli": [5, 8, 9, 14], "assess": 9, "assign": [8, 9], "avail": [0, 1, 8, 14], "back": 5, "bacteri": 8, "bacteria": [8, 11], "bacteria_odb10": 8, "bar": [5, 11], "barplot": [5, 11], "base": [4, 5, 6, 8, 13, 14], "bashrc": 1, "bbuchfink": 12, "befor": [1, 2, 3, 8, 9, 11, 12], "begin": 14, "belong": [8, 11], "below": [2, 8, 13], "benchmark": 9, "benefit": 12, "berkeleylab": 7, "beta": 13, "better": 8, "between": [6, 8, 13], "bin": [7, 14], "biodivers": 9, "bioinformat": 9, "bitbucket": 7, "blastx": 9, "bokulich": 0, "both": [2, 11], "bowti": 2, "bowtie2": [2, 5], "bp": 4, "bracken": 11, "bracken_db": 11, "bracken_ft": 11, "bracken_ft_filt": 11, "bracken_report": 11, "bracken_taxonomi": 11, "brai": 13, "braycurti": 13, "brief": 3, "broader": 9, "bruijn": 8, "build": [2, 11, 13], "built": 2, "burrow": 9, "busco": 7, "busco_db": 8, "busco_result": 8, "bwt": 9, "bypass": 12, "c": 11, "cach": [0, 1, 2, 4, 5, 6, 8, 10, 11, 13], "calcul": [5, 8, 13], "call": [1, 8], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "cannot": 11, "capabl": 14, "card": 14, "care": 0, "case": [0, 6, 8], "categori": 13, "caz": 13, "caz_annot_ft": 13, "caz_braycurtis_dist": 13, "caz_braycurtis_pcoa": 13, "caz_ft": 13, "centr": 9, "certain": 9, "chapter": 14, "check": [8, 14], "choos": 13, "class": 1, "classif": [5, 12, 14], "classifi": [5, 9, 10, 11], "clean": 14, "click": 13, "cluster": 6, "co": 8, "coassembl": 8, "cocoa": 5, "code": 12, "cog": 13, "collect": [2, 8, 11], "color": 13, "com": 0, "combin": [2, 13], "command": [0, 1, 4, 7, 8, 9, 10, 11, 12], "commonli": 9, "commun": [7, 9, 12], "compar": 9, "complet": [8, 12], "complex": [7, 12], "compon": 1, "composit": 9, "comprehens": 9, "compress": 6, "compris": 14, "comput": 14, "conda": 1, "confid": [5, 10, 11], "config": 1, "configur": 1, "connect": 12, "construct": [2, 8], "consult": [1, 9], "contact": 0, "contain": [0, 2, 3, 10, 14], "contig": [1, 7, 10, 12, 14], "contig_map": 8, "contigs_index": 8, "contigu": [8, 12], "continu": 11, "contribut": 12, "control": [2, 8, 14], "convert": 5, "coordin": 13, "copi": 8, "core": 14, "cores_per_nod": 1, "correspond": 0, "could": [6, 8, 9], "coupl": [5, 13], "cover": [2, 8, 11, 14], "coverag": 10, "cpu": [1, 8, 9, 12, 13], "creat": [1, 6, 8, 13], "critic": 12, "cultur": [7, 12], "current": [1, 5], "curti": 13, "custom": 13, "cutadapt": 4, "cycl": 12, "data": [1, 3, 4, 5, 8, 10, 11, 13, 14], "databas": [2, 5, 8, 9, 11], "dataset": [0, 2, 6, 8, 14], "db": [5, 8, 10, 11, 13], "de": 8, "default": 1, "defin": [1, 5, 8], "demonstr": [13, 14], "demultiplex": [2, 4], "demux": 4, "depend": [3, 6, 9], "derep": [5, 8], "derepl": [7, 8, 10, 13, 14], "derrickwood": [7, 9], "describ": 7, "detail": 5, "detect": 14, "determin": [9, 13], "develop": 8, "diamond": 12, "diamond_db": 13, "dib": 7, "differ": [2, 11, 13, 14], "directli": [7, 9, 12], "directori": 1, "disabl": 8, "diseas": 12, "dissimilar": 13, "distanc": [6, 13], "distribut": [1, 13], "dive": [1, 14], "diverg": 9, "divers": [2, 7], "divid": [8, 14], "dna": [3, 7, 8, 12], "do": [1, 5], "doc": 0, "document": [1, 4, 7, 9, 12], "domain": 0, "done": [2, 3, 8], "dot": 13, "download": [0, 8, 11, 13], "downstream": [3, 5, 8, 9], "draft": 7, "dure": 8, "e": [0, 9, 13], "each": [4, 5, 7, 11, 13, 14], "earli": 12, "ecolog": [7, 12], "ecosystem": 12, "effici": [5, 6, 8], "eggnog": 12, "eggnog_annot": 13, "eggnog_db": 13, "eggnog_ft": 13, "eggnog_hit": 13, "eggnogdb": 12, "either": 11, "element": 13, "email": 0, "emperor": 13, "enabl": 14, "end": [5, 8, 11], "enough": 2, "ensur": [0, 6, 8, 9, 12], "entir": [8, 10], "environ": [1, 7, 14], "enzym": 12, "estim": [11, 13, 14], "et": 9, "eukaryot": [8, 11], "evalu": [6, 7, 13], "even": 12, "exact": 9, "examin": 1, "exampl": [1, 4, 8, 12], "exclud": [8, 11], "exclus": 1, "execut": 1, "executor": 1, "expand": 3, "experi": [2, 3], "explan": 7, "explicitli": 1, "explor": 7, "extract": [10, 12], "ezlab": 7, "factor": 9, "fail": 0, "failed_run": 0, "fals": [1, 8, 10, 11], "fast": 9, "fasta": [2, 13], "featur": [5, 6, 8, 10, 13], "featuredata": 2, "fetch": [2, 8, 11, 13, 14], "few": [2, 12], "file": [0, 1, 2, 5, 6, 8, 11, 13], "filter": [2, 3, 6, 11, 14], "final": [2, 11, 14], "find": [6, 13], "first": [2, 5, 8, 11, 13], "fix": 9, "flexibl": 14, "fm": 9, "focu": [8, 11, 12, 13], "focus": [3, 14], "follow": [1, 14], "fondu": 0, "format": 13, "forum": 1, "fraction": 11, "fragment": 9, "framework": 14, "frequenc": 13, "from": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13], "fulli": [12, 14], "function": [3, 7, 14], "fungi": 11, "further": [7, 8], "g": [9, 13], "gene": [8, 12, 13, 14], "gener": [1, 2, 3, 4, 5, 6, 8, 11, 13], "genera": 11, "genet": [2, 7, 8], "genom": [2, 6, 8, 10, 11, 12, 14], "get": [0, 4], "github": 14, "githubusercont": 0, "given": [8, 9], "go": [4, 8], "goal": 9, "gradient": 13, "graph": 8, "grch38": 2, "group": [5, 8, 9], "guid": 14, "ha": [8, 9, 12], "hash": 6, "have": [1, 2, 14], "heavili": 9, "help": [5, 8], "here": [1, 4, 5, 6, 8, 11, 14], "high": 8, "higher": 8, "highli": [6, 8, 9, 12], "highthroughputexecutor": 1, "hit": [5, 10, 11, 13], "host": [3, 14], "how": [1, 2, 12, 13], "http": [0, 7], "human": [11, 14], "human_reference_index": 2, "i": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 13, 14], "id": [0, 8], "identif": 12, "identifi": [6, 8, 9, 13], "import": [0, 2, 12], "includ": [3, 6, 7, 8, 9, 11], "index": [2, 9, 14], "indic": 5, "influenc": 9, "info": 11, "inform": [7, 8, 9, 12, 13], "input": [0, 2, 5, 8, 13], "insight": [7, 12, 13], "instal": [1, 14], "instruct": [1, 13, 14], "integr": 14, "intens": [8, 9, 12], "interest": 5, "involv": [7, 8, 9, 12], "issu": 0, "its": 13, "itself": 8, "job": 0, "k": 9, "kaiju_barplot": 11, "kaiju_ft": 11, "kaiju_ft_filt": 11, "kaiju_nr_euk": 11, "kaiju_taxonomi": 11, "keep": [5, 8], "kegg": 13, "kei": [0, 2, 9], "kept": 8, "kilobas": 5, "known": 9, "kraken": [5, 10], "kraken2": [5, 7, 9, 10, 11], "kraken2_db": [5, 10, 11], "kraken_hits_derep_50": 10, "kraken_hits_mags_derep": 5, "kraken_hits_read": 11, "kraken_reports_mags_derep": 5, "kraken_reports_mags_derep_50": 10, "kraken_reports_read": 11, "ksize": 6, "l50": 8, "lab": [0, 7], "label": [1, 9], "laboratori": 7, "larg": 6, "later": 8, "learn": [1, 2, 4], "len": [8, 11], "length": [4, 8, 9], "let": [1, 14], "like": [8, 12], "lineag": 8, "list": 0, "local": 8, "longer": 8, "longest": 6, "look": [8, 11], "low": [3, 4], "m": [5, 8, 11, 13], "mag": [7, 9, 12, 13, 14], "mags_derep": [5, 13], "mags_derep_50": [6, 10], "mags_derep_ft": [5, 13], "mags_derep_index": 5, "mags_derep_length": 5, "mags_derep_taxonomi": 5, "mags_derep_taxonomy_50": 10, "mags_dist_matrix_50": 6, "mags_filtered_50": [6, 8], "mags_ft_50": 6, "mags_minhash_50": 6, "mai": [3, 5, 6, 9, 14], "mail": 0, "main": [0, 14], "major": 9, "make": 1, "makeup": 7, "mani": [1, 8], "map": [2, 10, 11, 14], "mapper": 12, "mapq": 5, "match": [5, 9], "matrix": [6, 13], "max": 13, "max_block": 1, "max_work": 1, "mean": 9, "measur": 13, "megahit": [1, 7], "mem": 1, "memori": [8, 9, 10, 11, 12, 13], "mention": 1, "menzel": 9, "mer": 9, "merg": 6, "meta": [8, 14], "metabat": 7, "metadata": [0, 5, 8, 11, 13], "metagenom": [1, 3, 5, 8, 10, 11, 12, 14], "method": [6, 9, 13, 14], "metric": [5, 8, 13], "microb": 12, "microbi": [7, 9, 12], "microbiom": 14, "microorgan": 7, "might": 9, "million": 5, "min": [5, 6, 8], "minim": [5, 10, 11], "minimum": [4, 5, 8], "mixtur": 7, "mode": 8, "modular": 14, "more": [1, 2, 3, 4, 5, 7, 9, 11, 12], "moreov": 3, "moshpit": [0, 2, 3, 5, 6, 8, 10, 11, 13], "moshpit_doc": 0, "most": [6, 8, 14], "move": 14, "multi": 14, "n": 0, "n50": 8, "name": 12, "ncbi": 0, "ncbiaccessionid": 0, "necessari": [1, 5, 6, 14], "need": [0, 1, 2, 7, 11, 12, 13], "new": 2, "next": [2, 3, 5, 8, 13], "ng": 3, "nodes_per_block": 1, "non": 7, "now": [5, 8, 10, 11, 13], "nr_euk": 11, "nucleotid": 9, "num": [8, 13], "number": 8, "nutrient": 12, "o": [0, 2, 4, 5, 6, 8, 10, 11, 13], "observ": 13, "obtain": [11, 13], "offici": [1, 7, 9, 12], "omic": 14, "onc": [5, 8, 13, 14], "one": [2, 4, 5, 8, 11, 13], "onli": [0, 5, 6, 8, 12], "optim": 8, "option": [2, 5, 7], "order": [4, 11, 13], "org": 7, "organ": [7, 9, 11, 12], "origin": [5, 8, 11], "ortholog": 8, "other": [2, 3, 6, 8, 9, 12, 13, 14], "our": [6, 8, 12, 14], "out": [2, 3, 8, 14], "outcom": 9, "outlin": 12, "output": 6, "over": 13, "overview": 3, "own": 14, "p": [0, 4, 5, 6, 8, 10, 11, 13], "page": 14, "pair": [0, 4, 5, 8, 9, 11], "pangenom": 2, "paramet": [7, 8], "parsl": 1, "part": [8, 14], "particular": 8, "partit": 1, "path": [0, 2], "pathwai": 13, "pcoa": 13, "per": [1, 5, 8], "perfect": 5, "perform": [2, 11, 12, 13, 14], "phred": 5, "pipelin": 14, "place": 1, "plai": 13, "plasmid": 11, "platform": 14, "pleas": 1, "plot": [5, 11, 13], "plugin": [0, 2, 3, 4, 8, 11, 14], "pluspf": [5, 11], "popular": 8, "posit": 4, "post": 1, "potenti": [8, 9, 12], "precompil": 8, "prepar": 3, "presenc": 8, "present": [9, 13], "preset": 8, "press": 13, "previou": [8, 11], "previous": 12, "princip": 13, "procedur": 3, "process": [7, 8, 12, 14], "produc": [8, 14], "product": 13, "programmat": 0, "progress": 13, "prok": 8, "prokaryot": [8, 11], "protein": 12, "protozoa": 11, "proven": 14, "provid": [0, 1, 2, 3, 5, 7, 8, 9, 12, 13, 14], "publish": 14, "purpos": 8, "put": 8, "q2": [0, 1, 8, 11, 14], "qiim": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14], "qiime2": [7, 12], "qualiti": [2, 5, 7, 8, 9, 14], "quast": 7, "queri": [9, 11], "qzv": [4, 5, 8, 11, 13], "raw": [0, 14], "read": [0, 3, 7, 12, 14], "readi": [5, 13], "reads_filt": [2, 5, 8, 11], "reads_pair": [0, 11], "reads_singl": 0, "reads_to_contig": 8, "reads_to_derep_mag": 5, "reads_trim": [2, 4], "recom": 8, "recommend": 2, "reconstruct": [7, 12], "recov": [5, 7, 8, 12, 14], "recoveri": 12, "red": 14, "reduc": 6, "redund": [6, 7], "refer": [2, 5, 7, 8, 9, 11, 12, 13], "reference_index": 2, "reference_seeq": 2, "reference_seq": 2, "refseq": 11, "rel": 11, "relat": 11, "remov": [3, 4, 6, 11, 14], "report": [5, 10, 11], "repositori": 14, "repres": [2, 6, 8], "represent": 6, "reproduc": 14, "requir": [0, 5, 8, 11], "research": [7, 9], "resist": 14, "resourc": [1, 8, 9, 12], "respect": 14, "rest": 0, "result": [5, 8, 9, 11, 13], "retain": [5, 6], "retri": 0, "retriev": [11, 14], "return": 2, "rgi": 14, "right": 13, "role": [7, 12], "rpkm": 5, "run": [0, 1, 8, 9, 11, 12], "same": 10, "sampl": [3, 5, 8, 9, 13], "sampledata": 10, "sbatch": 1, "scaffold": 8, "scale": 6, "scatter": 13, "scheduler_opt": 1, "scienc": 14, "scope": 9, "score": [4, 5], "section": [3, 5, 11, 12], "see": [4, 5, 12, 13, 14], "seed": [5, 8, 13], "select": [9, 13], "sensit": 8, "seq": [5, 8, 10, 11], "sequenc": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14], "set": [7, 8, 10, 14], "setup": 14, "sever": [8, 14], "short": [8, 9], "shorter": 4, "shotgun": [11, 14], "should": [1, 3, 5, 8, 11, 13], "shown": 2, "signific": 9, "similar": [5, 6, 8, 9, 11, 13], "similarli": 11, "simpli": 13, "simplifi": 8, "sinc": 2, "singl": [0, 1, 2, 8, 12], "size": [8, 9, 11], "slurmprovid": 1, "small": 14, "smaller": [1, 8], "so": 2, "some": [9, 14], "sourc": 1, "sourmash": 7, "spade": [7, 8], "speci": 11, "special": 12, "specif": [2, 6, 8, 9, 13], "specifi": [8, 13], "sra": 0, "standard": 8, "start": [2, 13, 14], "statist": 8, "step": [3, 5, 6, 7, 8, 11, 12, 13], "still": 8, "store": 1, "structur": 8, "studi": 12, "subset": [1, 10], "suffici": [8, 9, 12], "suit": [3, 14], "summar": 4, "support": 5, "sure": 1, "surviv": 12, "system": [8, 9, 12], "tab": 13, "tabl": [5, 6, 8, 11], "table1": 13, "table2": 13, "tailor": 8, "take": [0, 8], "taxa": [5, 11], "taxon": 13, "taxonom": [5, 14], "taxonomi": [5, 10, 11], "techniqu": 9, "than": 4, "thei": [0, 14], "them": 2, "themselv": 14, "thi": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], "thorugh": 8, "those": 9, "thread": [5, 8, 10, 11], "threshold": [6, 10, 11], "through": [0, 1, 8, 14], "time": 13, "timepoint": 13, "too": 9, "tool": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 14], "top": 13, "total": [8, 13], "tpm": 5, "track": 14, "trajectori": 13, "transcript": 5, "transform": 9, "translat": 9, "transport": 12, "trim": 3, "true": 8, "try": [8, 11], "tsv": [0, 5, 11, 13], "tutori": [0, 1, 2, 8, 10, 12, 13], "two": [8, 11], "type": [0, 2, 3, 9, 11, 13], "typic": 9, "u": 13, "unbin": 8, "unbinned_contig": 8, "unclassifi": 11, "uncultur": 7, "under": 8, "understand": 12, "uniqu": 6, "univec_cor": 11, "up": 14, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14], "usag": 8, "valuabl": 8, "vari": 9, "variou": [7, 8, 12], "verbos": [0, 5, 6, 8, 10, 11, 13], "view": 1, "viral": [11, 14], "virom": 14, "virus": 8, "visual": [1, 4, 5, 8, 13], "voutcn": 7, "wa": [1, 11], "wai": [0, 5], "walltim": 1, "want": [8, 14], "we": [0, 1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 14], "well": 8, "wget": 0, "wheeler": 9, "when": [0, 9], "where": 8, "which": [1, 2, 3, 5, 8, 9, 11], "while": [1, 9], "whole": 14, "wiki": 14, "wish": 8, "within": [9, 12], "without": 7, "wood": 9, "work": 1, "worker_init": 1, "workflow": [6, 7, 8, 9, 12, 14], "would": 8, "ye": 9, "you": [0, 1, 2, 4, 8, 11, 13, 14], "your": [0, 1, 5, 8, 9, 11, 12, 13], "z": 11}, "titles": ["Data retrieval", "Setup", "Host read removal", "Quality control", "Quality filtering", "MAG abundance estimation", "MAG set dereplication", "Recovery of Metagenome-assembled Genomes", "Recovery of MAGs", "Taxonomic classification", "Taxonomic classification of MAGs", "Taxonomic classification of reads", "Functional annotation", "Functional annotation", "MOSHPIT tutorial"], "titleterms": {"": [5, 13], "1": 11, "2": [9, 11], "abund": 5, "against": 13, "align": 13, "annot": [12, 13], "approach": 11, "assembl": [7, 8], "base": 9, "bin": 8, "busco": 8, "cazym": 13, "classif": [9, 10, 11], "compar": 6, "comput": 6, "contamin": 2, "contig": 8, "control": 3, "data": 0, "databas": 13, "derepl": [5, 6], "diamond": 13, "divers": 13, "dna": 9, "eggnog": 13, "estim": 5, "evalu": 8, "extract": 13, "filter": [4, 8], "function": [12, 13], "genom": 7, "get": 5, "have": [5, 13], "host": 2, "human": 2, "index": [5, 8], "kaiju": [9, 11], "kraken": [9, 11], "length": 5, "let": [5, 13], "look": [5, 13], "mag": [5, 6, 8, 10], "map": [5, 8], "megahit": 8, "metabat": 8, "metagenom": 7, "minhash": 6, "moshpit": 14, "multipli": 13, "note": 1, "ortholog": 13, "our": [5, 13], "overview": [4, 9], "parallel": 1, "protein": 9, "qc": 8, "qualiti": [3, 4], "quast": 8, "read": [2, 4, 5, 8, 9, 11], "recoveri": [7, 8], "remov": 2, "requir": 13, "retriev": 0, "search": 13, "set": 6, "setup": 1, "signatur": 6, "sourmash": 6, "tabl": 13, "taxonom": [9, 10, 11], "trim": 4, "tutori": 14, "us": 13, "visual": 11}}) \ No newline at end of file +Search.setIndex({"alltitles": {"Annotate orthologs against eggNOG database": [[13, "annotate-orthologs-against-eggnog-database"]], "Approach 1: Kraken 2": [[11, "approach-1-kraken-2"]], "Approach 2: Kaiju": [[11, "approach-2-kaiju"]], "Assemble contigs with MEGAHIT": [[8, "assemble-contigs-with-megahit"]], "Bin contigs with MetaBAT": [[8, "bin-contigs-with-metabat"]], "Compare MinHash signatures": [[6, "compare-minhash-signatures"]], "Compute MinHash signatures with Sourmash": [[6, "compute-minhash-signatures-with-sourmash"]], "Contig QC with QUAST": [[8, "contig-qc-with-quast"]], "Data retrieval": [[0, null]], "Dereplicate MAGs": [[6, "dereplicate-mags"]], "EggNOG search using Diamond aligner": [[13, "eggnog-search-using-diamond-aligner"]], "Estimate MAG abundance": [[5, "estimate-mag-abundance"]], "Evaluate bins with BUSCO": [[8, "evaluate-bins-with-busco"]], "Exporting data and connecting with other tools": [[14, null]], "Extract annotations": [[13, "extract-annotations"]], "Filter MAGs": [[8, "filter-mags"]], "Functional annotation": [[12, null], [13, null]], "Get MAG lengths": [[5, "get-mag-lengths"]], "Host read removal": [[2, null]], "Human host reads": [[2, "human-host-reads"]], "Importing data from other tools": [[15, null]], "Importing other data": [[15, "importing-other-data"]], "Index contigs": [[8, "index-contigs"]], "Index dereplicated MAGs": [[5, "index-dereplicated-mags"]], "Interoperability with other tools": [[16, null]], "Kaiju: protein-based classification": [[9, "kaiju-protein-based-classification"]], "Kraken 2: DNA-to-DNA classification": [[9, "kraken-2-dna-to-dna-classification"]], "Let\u2019s have a look at our CAZymes functional diversity!": [[13, "let-s-have-a-look-at-our-cazymes-functional-diversity"]], "Let\u2019s have a look at our estimated MAG abundance!": [[5, "let-s-have-a-look-at-our-estimated-mag-abundance"]], "MAG abundance estimation": [[5, null]], "MAG set dereplication": [[6, null]], "MOSHPIT tutorial": [[17, null]], "Map reads to contigs": [[8, "map-reads-to-contigs"]], "Map reads to dereplicated MAGs": [[5, "map-reads-to-dereplicated-mags"]], "Microbial pangenomics with Anvi\u2019o": [[14, "microbial-pangenomics-with-anvi-o"]], "Multiply tables": [[13, "multiply-tables"]], "Note on parallelization": [[1, "note-on-parallelization"]], "Quality control": [[3, null]], "Quality filtering": [[4, null]], "Quality overview": [[4, "quality-overview"]], "Read trimming and quality filtering": [[4, "read-trimming-and-quality-filtering"]], "Read-based classification overview": [[9, "read-based-classification-overview"]], "Recovery of MAGs": [[8, null]], "Recovery of Metagenome-assembled Genomes": [[7, null]], "Removal of contaminating reads": [[2, "removal-of-contaminating-reads"]], "Required databases": [[13, "required-databases"]], "Setup": [[1, null]], "Taxonomic classification": [[9, null]], "Taxonomic classification of MAGs": [[10, null]], "Taxonomic classification of reads": [[11, null]], "Visualization": [[11, "visualization"]], "Visualizing Kraken 2 reports with Pavian": [[14, "visualizing-kraken-2-reports-with-pavian"]], "Working with exisiting contigs": [[15, "working-with-exisiting-contigs"]], "Working with existing MAGs": [[15, "working-with-existing-mags"]]}, "docnames": ["chapters/00_data_retrieval", "chapters/00_setup", "chapters/01_filtering/host-filtering", "chapters/01_filtering/intro", "chapters/01_filtering/quality-filtering", "chapters/02_mag_reconstruction/abundance", "chapters/02_mag_reconstruction/dereplication", "chapters/02_mag_reconstruction/intro", "chapters/02_mag_reconstruction/reconstruction", "chapters/03_taxonomic_classification/intro", "chapters/03_taxonomic_classification/mags", "chapters/03_taxonomic_classification/reads", "chapters/04_functional_annotation/intro", "chapters/04_functional_annotation/mags", "chapters/05_interoperability/export", "chapters/05_interoperability/import", "chapters/05_interoperability/intro", "intro"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9}, "filenames": ["chapters/00_data_retrieval.md", "chapters/00_setup.md", "chapters/01_filtering/host-filtering.md", "chapters/01_filtering/intro.md", "chapters/01_filtering/quality-filtering.md", "chapters/02_mag_reconstruction/abundance.md", "chapters/02_mag_reconstruction/dereplication.md", "chapters/02_mag_reconstruction/intro.md", "chapters/02_mag_reconstruction/reconstruction.md", "chapters/03_taxonomic_classification/intro.md", "chapters/03_taxonomic_classification/mags.md", "chapters/03_taxonomic_classification/reads.md", "chapters/04_functional_annotation/intro.md", "chapters/04_functional_annotation/mags.md", "chapters/05_interoperability/export.md", "chapters/05_interoperability/import.md", "chapters/05_interoperability/intro.md", "intro.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [1, 8, 14, 15, 17], "0": [5, 6, 10, 11, 13], "00": 1, "0001": 13, "1": [1, 10], "10": [5, 6], "100": [5, 8], "128": 8, "14": 1, "150": 11, "16": [8, 11, 13], "2": [0, 1, 2, 3, 4, 5, 7, 10, 12, 15, 16, 17], "200": 8, "2016": 9, "2019": 9, "2024": 1, "24": [1, 8], "31": 9, "35": 6, "40": 5, "42": 5, "4g": 1, "5": [0, 1, 5, 10, 11], "50": 8, "64": 8, "72": [10, 11], "8": [5, 8], "90": 4, "99": 6, "As": 3, "For": [5, 7, 9, 12, 15], "If": [14, 15], "In": [1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16], "It": 8, "One": [9, 14], "The": [0, 3, 5, 8, 9, 13, 14, 15, 17], "Then": [5, 13, 17], "There": [2, 5, 8], "These": [9, 17], "To": [0, 1, 4, 11, 14], "__cit": 2, "abl": [8, 15], "ablab": 7, "about": [1, 4], "abov": 2, "abund": [8, 11, 13], "access": [0, 14], "accur": 9, "across": [13, 15], "action": [0, 1, 2, 4, 11, 15, 17], "activ": 1, "actual": 8, "ad": 12, "adapt": 3, "addit": [8, 15], "addition": [11, 17], "address": [0, 3], "adjust": 11, "advantag": 9, "affect": 9, "against": [2, 8, 9], "al": 9, "algorithm": 9, "align": [5, 8, 9, 12], "all": [0, 1, 2, 4, 5, 8, 9, 13, 17], "allow": [2, 5, 6, 7, 9, 12, 13, 14, 15], "alreadi": [3, 15], "also": [2, 6, 8, 10, 15], "altern": [8, 13], "amrfinderplu": 17, "an": [0, 1, 4, 8, 14, 15], "analys": [5, 9, 14], "analysi": [3, 8, 13, 14, 15, 16, 17], "analyz": [8, 17], "ani": [0, 2, 3, 6, 8, 14, 15], "anim": 13, "annot": [15, 17], "anoth": 14, "antimicrobi": 17, "applic": [6, 17], "approach": [2, 7, 10], "appropri": [8, 15], "ar": [2, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17], "archaea": [8, 11], "archiv": 0, "arg": 17, "artifact": [0, 2, 5, 8, 10, 11, 14, 15, 16, 17], "aspect": 17, "assembl": [1, 10, 12, 15, 17], "assembli": [5, 8, 9, 15, 17], "assess": 9, "assign": [8, 9], "assum": 15, "avail": [0, 1, 8, 14, 17], "awk": 14, "back": 5, "bacteri": 8, "bacteria": [8, 11], "bacteria_odb10": 8, "bar": [5, 11], "barplot": [5, 11], "base": [4, 5, 6, 8, 13, 17], "bashrc": 1, "bbuchfink": 12, "befor": [1, 2, 3, 8, 9, 11, 12, 15], "begin": 17, "belong": [8, 11], "below": [2, 8, 13, 14, 15], "benchmark": 9, "benefit": 12, "berkeleylab": 7, "beta": 13, "better": 8, "between": [6, 8, 13], "bin": [7, 14, 15, 17], "biodivers": 9, "bioinformat": [9, 14], "bitbucket": 7, "blastx": 9, "bokulich": 0, "both": [2, 11], "bowti": 2, "bowtie2": [2, 5], "bp": 4, "bracken": 11, "bracken_db": 11, "bracken_ft": 11, "bracken_ft_filt": 11, "bracken_report": 11, "bracken_taxonomi": 11, "brai": 13, "braycurti": 13, "brief": 3, "broader": 9, "broken": 14, "bruijn": 8, "build": [2, 11, 13], "built": 2, "burrow": 9, "busco": 7, "busco_db": 8, "busco_result": 8, "bwt": 9, "bypass": 12, "c": 11, "cach": [0, 1, 2, 4, 5, 6, 8, 10, 11, 13, 14, 15], "calcul": [5, 8, 13], "call": [1, 8], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], "cannot": 11, "capabl": 17, "card": 17, "care": 0, "case": [0, 6, 8, 15, 16], "cat": 14, "categori": 13, "caz": 13, "caz_annot_ft": 13, "caz_braycurtis_dist": 13, "caz_braycurtis_pcoa": 13, "caz_ft": 13, "centr": 9, "certain": [9, 16], "chapter": [16, 17], "check": [8, 17], "choos": 13, "class": 1, "classif": [5, 12, 17], "classifi": [5, 9, 10, 11, 14], "clean": 17, "click": 13, "cluster": [6, 14], "co": 8, "coassembl": 8, "cocoa": 5, "code": 12, "cog": 13, "collect": [2, 8, 11], "color": 13, "com": 0, "combin": [2, 13], "command": [0, 1, 4, 7, 8, 9, 10, 11, 12, 14, 15], "common": 14, "commonli": 9, "commun": [7, 9, 12], "compar": 9, "compat": 14, "complet": [8, 12], "complex": [7, 12], "compon": 1, "composit": [9, 14], "comprehens": 9, "compress": 6, "compris": 17, "comput": 17, "conda": 1, "confid": [5, 10, 11], "config": 1, "configur": 1, "connect": 12, "construct": [2, 8], "consult": [1, 9], "contact": 0, "contain": [0, 2, 3, 10, 14, 17], "contig": [1, 7, 10, 12, 17], "contig_map": 8, "contigs_index": 8, "contigs_renam": 15, "contigu": [8, 12], "continu": [11, 15, 16], "contribut": 12, "control": [2, 8, 17], "convert": 5, "coordin": 13, "copi": [8, 14], "core": 17, "cores_per_nod": 1, "correspond": [0, 14], "could": [6, 8, 9, 14, 15], "coupl": [5, 13], "cours": 14, "cover": [2, 8, 11, 17], "coverag": 10, "cp": 14, "cpu": [1, 8, 9, 12, 13], "creat": [1, 6, 8, 13, 14], "critic": 12, "cultur": [7, 12], "current": [1, 5], "curti": 13, "custom": 13, "cutadapt": 4, "cycl": 12, "data": [1, 3, 4, 5, 8, 10, 11, 13, 16, 17], "databas": [2, 5, 8, 9, 11], "dataset": [0, 2, 6, 8, 17], "db": [5, 8, 10, 11, 13], "de": 8, "default": 1, "defin": [1, 5, 8], "demo": 14, "demonstr": [13, 17], "demultiplex": [2, 4], "demux": 4, "depend": [3, 6, 9], "derep": [5, 8], "derepl": [7, 8, 10, 13, 15, 17], "derrickwood": [7, 9], "describ": [7, 14, 15], "detail": [5, 15], "detect": 17, "determin": [9, 13], "develop": 8, "diamond": 12, "diamond_db": 13, "dib": 7, "differ": [2, 11, 13, 15, 17], "directli": [7, 9, 12, 14, 15], "directori": [1, 14, 15], "disabl": 8, "diseas": 12, "dissimilar": 13, "distanc": [6, 13], "distribut": [1, 13], "dive": [1, 17], "diverg": 9, "divers": [2, 7], "divid": [8, 17], "dna": [3, 7, 8, 12], "do": [1, 5, 14, 15], "doc": 0, "document": [1, 4, 7, 9, 12], "doe": 14, "domain": 0, "done": [2, 3, 8], "dot": 13, "download": [0, 8, 11, 13], "downstream": [3, 5, 8, 9], "draft": 7, "dure": 8, "e": [0, 9, 13], "each": [4, 5, 7, 11, 13, 15, 17], "earli": 12, "easi": 14, "ecolog": [7, 12], "ecosystem": 12, "effici": [5, 6, 8], "eggnog": 12, "eggnog_annot": 13, "eggnog_db": 13, "eggnog_ft": 13, "eggnog_hit": 13, "eggnogdb": 12, "either": 11, "element": 13, "elsewher": 16, "email": 0, "emperor": 13, "enabl": 17, "end": [5, 8, 11], "enough": 2, "ensur": [0, 6, 8, 9, 12, 15], "entir": [8, 10, 15], "environ": [1, 7, 17], "enzym": 12, "estim": [11, 13, 17], "et": 9, "eukaryot": [8, 11], "evalu": [6, 7, 13], "even": 12, "exact": 9, "examin": 1, "exampl": [1, 4, 8, 12, 14, 15], "exclud": [8, 11], "exclus": 1, "execut": 1, "executor": 1, "expand": 3, "experi": [2, 3], "explan": 7, "explicitli": 1, "explor": [7, 14], "exported_mag": 14, "exported_report": 14, "extract": [10, 12], "ezlab": 7, "f": 15, "fa": 15, "factor": 9, "fail": [0, 14], "failed_run": 0, "fals": [1, 8, 10, 11], "familiar": 14, "fast": 9, "fasta": [2, 13, 15], "favourit": 14, "featur": [5, 6, 8, 10, 13], "featuredata": [2, 15], "fetch": [2, 8, 11, 13, 17], "few": [2, 12], "file": [0, 1, 2, 5, 6, 8, 11, 13, 14, 15], "filter": [2, 3, 6, 11, 17], "final": [2, 11, 17], "find": [6, 13, 14], "first": [2, 5, 8, 11, 13], "fix": 9, "flexibl": 17, "fm": 9, "focu": [8, 11, 12, 13], "focus": [3, 17], "follow": [1, 14, 17], "fondu": 0, "format": [13, 14, 15], "forum": 1, "fraction": 11, "fragment": 9, "framework": 17, "frequenc": 13, "from": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14], "fulli": [12, 17], "function": [3, 7, 17], "fungi": 11, "further": [7, 8, 14], "g": [9, 13], "gene": [8, 12, 13, 14, 17], "gener": [1, 2, 3, 4, 5, 6, 8, 11, 13], "genera": 11, "genet": [2, 7, 8], "genom": [2, 6, 8, 10, 11, 12, 14, 17], "get": [0, 4, 16], "github": 17, "githubusercont": 0, "give": 14, "given": [8, 9], "go": [4, 8], "goal": 9, "gradient": 13, "graph": 8, "grch38": 2, "grep": 14, "group": [5, 8, 9], "guid": 17, "ha": [8, 9, 12], "hash": 6, "have": [1, 2, 14, 15, 17], "heavili": 9, "help": [5, 8], "here": [1, 4, 5, 6, 8, 11, 14, 15, 17], "high": 8, "higher": 8, "highli": [6, 8, 9, 12], "highthroughputexecutor": 1, "hit": [5, 10, 11, 13], "host": [3, 17], "how": [1, 2, 12, 13, 14, 16], "howev": 15, "http": [0, 7], "human": [11, 17], "human_reference_index": 2, "i": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 17], "id": [0, 8, 15], "identif": 12, "identifi": [6, 8, 9, 13, 15], "import": [0, 2, 12], "includ": [3, 6, 7, 8, 9, 11], "index": [2, 9, 17], "indic": 5, "influenc": 9, "info": 11, "inform": [7, 8, 9, 12, 13], "input": [0, 2, 5, 8, 13, 14, 15], "insight": [7, 12, 13], "instal": [1, 17], "instruct": [1, 13, 17], "integr": 17, "intens": [8, 9, 12], "interact": 14, "interest": [5, 15], "involv": [7, 8, 9, 12], "issu": 0, "its": 13, "itself": 8, "job": 0, "join": 15, "k": 9, "kaiju_barplot": 11, "kaiju_ft": 11, "kaiju_ft_filt": 11, "kaiju_nr_euk": 11, "kaiju_taxonomi": 11, "keep": [5, 8, 14], "kegg": 13, "kei": [0, 2, 9, 14, 15], "kept": 8, "kilobas": 5, "known": 9, "kraken": [5, 10, 15], "kraken2": [5, 7, 9, 10, 11], "kraken2_db": [5, 10, 11], "kraken2_reports_read": 15, "kraken2report": 15, "kraken_hits_derep_50": 10, "kraken_hits_mags_derep": 5, "kraken_hits_read": 11, "kraken_reports_mags_derep": 5, "kraken_reports_mags_derep_50": 10, "kraken_reports_read": [11, 14], "ksize": 6, "l50": 8, "lab": [0, 7], "label": [1, 9], "laboratori": 7, "larg": 6, "later": 8, "lead": 14, "learn": [1, 2, 4], "len": [8, 11], "length": [4, 8, 9], "let": [1, 17], "like": [8, 12, 15], "lineag": 8, "list": 0, "listdir": 15, "local": 8, "locat": 14, "long": [14, 15], "longer": 8, "longest": 6, "look": [8, 11, 15], "low": [3, 4], "m": [5, 8, 11, 13], "mag": [7, 9, 12, 13, 14, 17], "mags_derep": [5, 13], "mags_derep_50": [6, 10], "mags_derep_ft": [5, 13], "mags_derep_index": 5, "mags_derep_length": 5, "mags_derep_taxonomi": 5, "mags_derep_taxonomy_50": 10, "mags_dist_matrix_50": 6, "mags_filtered_50": [6, 8], "mags_ft_50": 6, "mags_minhash_50": 6, "mai": [3, 5, 6, 9, 14, 15, 17], "mail": 0, "main": [0, 17], "major": 9, "make": 1, "makeup": 7, "mani": [1, 8], "manual": 14, "map": [2, 10, 11, 17], "mapper": 12, "mapq": 5, "match": [5, 9, 15], "matrix": [6, 13], "max": 13, "max_block": 1, "max_work": 1, "mean": [9, 14], "measur": 13, "megahit": [1, 7], "mem": 1, "memori": [8, 9, 10, 11, 12, 13], "mention": 1, "menzel": 9, "mer": 9, "merg": 6, "meta": [8, 17], "metabat": 7, "metadata": [0, 5, 8, 11, 13], "metagenom": [1, 3, 5, 8, 10, 11, 12, 15, 16, 17], "method": [6, 9, 13, 17], "metric": [5, 8, 13], "microb": 12, "microbi": [7, 9, 12], "microbiom": 17, "microorgan": 7, "might": [9, 16], "million": 5, "min": [5, 6, 8], "mind": 14, "minim": [5, 10, 11], "minimum": [4, 5, 8], "mixtur": 7, "mkdir": 14, "mode": 8, "modular": 17, "more": [1, 2, 3, 4, 5, 7, 9, 11, 12, 15], "moreov": 3, "moshpit": [0, 2, 3, 5, 6, 8, 10, 11, 13, 15], "moshpit_doc": 0, "most": [6, 8, 14, 16, 17], "move": 17, "much": 15, "multi": 17, "n": 0, "n50": 8, "name": 12, "navig": 14, "ncbi": 0, "ncbiaccessionid": 0, "necessari": [1, 5, 6, 17], "need": [0, 1, 2, 7, 11, 12, 13, 14, 15], "never": 14, "new": 2, "next": [2, 3, 5, 8, 13], "ng": [3, 15], "nodes_per_block": 1, "non": [7, 15], "now": [5, 8, 10, 11, 13], "nr_euk": 11, "nucleotid": 9, "num": [8, 13], "number": 8, "nutrient": 12, "o": [0, 2, 4, 5, 6, 8, 10, 11, 13, 15], "observ": 13, "obtain": [11, 13, 14], "offer": 14, "offici": [1, 7, 9, 12], "omic": 17, "onc": [5, 8, 13, 15, 17], "one": [2, 4, 5, 8, 11, 13], "onli": [0, 5, 6, 8, 12], "optim": 8, "option": [2, 5, 7], "order": [4, 11, 13], "org": 7, "organ": [7, 9, 11, 12], "origin": [5, 8, 11, 14], "ortholog": 8, "other": [2, 3, 6, 8, 9, 12, 13, 17], "our": [6, 8, 12, 14, 15, 17], "out": [2, 3, 8, 16, 17], "outcom": 9, "outlin": 12, "output": 6, "outsid": 15, "over": 13, "overview": 3, "own": 17, "p": [0, 4, 5, 6, 8, 10, 11, 13, 15], "page": 17, "pair": [0, 4, 5, 8, 9, 11], "pan": 14, "pangenom": 2, "paramet": [7, 8], "parsl": 1, "part": [8, 17], "particular": 8, "partit": 1, "path": [0, 2, 15], "pathwai": 13, "pcoa": 13, "per": [1, 5, 8, 15], "perfect": 5, "perform": [2, 11, 12, 13, 15, 16, 17], "phred": 5, "pipelin": [15, 17], "place": 1, "plai": 13, "plasmid": 11, "platform": [14, 17], "pleas": 1, "plot": [5, 11, 13], "plugin": [0, 2, 3, 4, 8, 11, 17], "pluspf": [5, 11], "popular": 8, "posit": 4, "possibl": 14, "post": 1, "potenti": [8, 9, 12], "precompil": 8, "prepar": 3, "presenc": 8, "present": [9, 13], "preset": 8, "press": 13, "previou": [8, 11], "previous": 12, "princip": 13, "print": 14, "procedur": 3, "process": [7, 8, 12, 14, 15, 17], "produc": [8, 17], "product": 13, "programmat": 0, "progress": 13, "prok": 8, "prokaryot": [8, 11], "properti": 15, "protein": 12, "protozoa": 11, "proven": 17, "provid": [0, 1, 2, 3, 5, 7, 8, 9, 12, 13, 14, 17], "publish": 17, "purpos": [8, 15], "put": 8, "python": 15, "q2": [0, 1, 8, 11, 17], "qiim": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17], "qiime2": [7, 12], "qualiti": [2, 5, 7, 8, 9, 17], "quast": 7, "queri": [9, 11], "quick": 14, "qzv": [4, 5, 8, 11, 13], "r": 14, "raw": [0, 17], "read": [0, 3, 7, 12, 14, 15, 17], "readabl": 14, "readi": [5, 13], "reads_filt": [2, 5, 8, 11], "reads_pair": [0, 11], "reads_singl": 0, "reads_to_contig": 8, "reads_to_derep_mag": 5, "reads_trim": [2, 4], "recom": 8, "recommend": 2, "reconstruct": [7, 12], "recov": [5, 7, 8, 12, 15, 17], "recoveri": 12, "red": 17, "reduc": 6, "redund": [6, 7], "refer": [2, 5, 7, 8, 9, 11, 12, 13], "reference_index": 2, "reference_seeq": 2, "reference_seq": 2, "refseq": 11, "rel": 11, "relat": 11, "relev": 15, "rememb": 15, "remov": [3, 4, 6, 11, 17], "renam": 15, "report": [5, 10, 11, 15], "repositori": 17, "repres": [2, 6, 8], "represent": 6, "reproduc": 17, "requir": [0, 5, 8, 11, 15], "research": [7, 9], "resist": 17, "resourc": [1, 8, 9, 12], "respect": [15, 17], "rest": [0, 15], "result": [5, 8, 9, 11, 13, 14, 15], "retain": [5, 6], "retri": 0, "retriev": [11, 17], "return": 2, "rgi": 17, "right": 13, "role": [7, 12], "rpkm": 5, "run": [0, 1, 8, 9, 11, 12], "same": 10, "sampl": [3, 5, 8, 9, 13, 14, 15], "sampledata": [10, 15], "sbatch": 1, "scaffold": 8, "scale": 6, "scatter": 13, "scheduler_opt": 1, "scienc": 17, "scope": 9, "score": [4, 5], "script": 15, "section": [3, 5, 11, 12], "see": [4, 5, 12, 13, 15, 17], "seed": [5, 8, 13], "select": [9, 13], "semant": 15, "sensit": 8, "seq": [5, 8, 10, 11], "sequenc": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 17], "set": [7, 8, 10, 15, 17], "setup": 17, "sever": [8, 17], "short": [8, 9], "shorter": 4, "shortuuid": 15, "shotgun": [11, 17], "should": [1, 3, 5, 8, 11, 13, 14, 15], "show": 16, "shown": 2, "signific": 9, "similar": [5, 6, 8, 9, 11, 13], "similarli": 11, "simpli": 13, "simplifi": 8, "sinc": 2, "singl": [0, 1, 2, 8, 12], "site": 14, "size": [8, 9, 11], "slurmprovid": 1, "small": 17, "smaller": [1, 8], "so": 2, "some": [9, 14, 15, 16, 17], "sometim": 14, "sourc": 1, "sourmash": 7, "spade": [7, 8], "speci": 11, "special": 12, "specif": [2, 6, 8, 9, 13], "specifi": [8, 13], "sra": 0, "standard": 8, "start": [2, 13, 15, 17], "statist": 8, "step": [3, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16], "still": 8, "store": 1, "structur": 8, "studi": 12, "subset": [1, 10], "suffici": [8, 9, 12], "suit": [3, 14, 17], "summar": 4, "support": [5, 14], "sure": 1, "surviv": 12, "system": [8, 9, 12], "tab": 13, "tabl": [5, 6, 8, 11], "table1": 13, "table2": 13, "tailor": 8, "take": [0, 8, 15], "task": 16, "taxa": [5, 11], "taxon": 13, "taxonom": [5, 14, 17], "taxonomi": [5, 10, 11], "techniqu": 9, "temper": 14, "than": 4, "thei": [0, 17], "them": [2, 14, 15], "themselv": 17, "thi": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], "thorugh": 8, "those": [9, 14], "thread": [5, 8, 10, 11], "threshold": [6, 10, 11], "through": [0, 1, 8, 14, 15, 17], "time": [13, 14], "timepoint": 13, "too": 9, "tool": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 17], "top": 13, "total": [8, 13], "tpm": 5, "track": 17, "trajectori": 13, "transcript": 5, "transform": 9, "translat": 9, "transport": 12, "trim": 3, "true": 8, "try": [8, 11, 14], "tsv": [0, 5, 11, 13], "tutori": [0, 1, 2, 8, 10, 12, 13, 15], "two": [8, 11], "type": [0, 2, 3, 9, 11, 13, 15], "typic": [9, 15, 16], "u": 13, "unbin": 8, "unbinned_contig": 8, "unclassifi": 11, "uncultur": 7, "under": 8, "underli": 14, "understand": 12, "uniqu": [6, 15], "univec_cor": 11, "up": 17, "upload": 14, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17], "usag": 8, "uuid": [14, 15], "uuid4": 15, "valuabl": 8, "vari": 9, "variou": [7, 8, 12, 14, 15], "verbos": [0, 5, 6, 8, 10, 11, 13], "veri": 14, "view": 1, "viral": [11, 17], "virom": 17, "virus": 8, "visual": [1, 4, 5, 8, 13], "voutcn": 7, "wa": [1, 11], "wai": [0, 5, 14], "walltim": 1, "want": [8, 14, 16, 17], "we": [0, 1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 14, 16, 17], "well": 8, "wget": 0, "wheeler": 9, "when": [0, 9], "where": [8, 14, 15, 16], "which": [1, 2, 3, 5, 8, 9, 11, 14, 15], "while": [1, 9, 16], "whole": 17, "wiki": 17, "wish": 8, "within": [9, 12, 14, 16], "without": 7, "wood": 9, "work": [1, 14], "worker_init": 1, "workflow": [6, 7, 8, 9, 12, 14, 16, 17], "would": [8, 15], "ye": 9, "yet": 14, "you": [0, 1, 2, 4, 8, 11, 13, 14, 15, 16, 17], "your": [0, 1, 5, 8, 9, 11, 12, 13, 14, 15, 16], "z": 11}, "titles": ["Data retrieval", "Setup", "Host read removal", "Quality control", "Quality filtering", "MAG abundance estimation", "MAG set dereplication", "Recovery of Metagenome-assembled Genomes", "Recovery of MAGs", "Taxonomic classification", "Taxonomic classification of MAGs", "Taxonomic classification of reads", "Functional annotation", "Functional annotation", "Exporting data and connecting with other tools", "Importing data from other tools", "Interoperability with other tools", "MOSHPIT tutorial"], "titleterms": {"": [5, 13], "1": 11, "2": [9, 11, 14], "abund": 5, "against": 13, "align": 13, "annot": [12, 13], "anvi": 14, "approach": 11, "assembl": [7, 8], "base": 9, "bin": 8, "busco": 8, "cazym": 13, "classif": [9, 10, 11], "compar": 6, "comput": 6, "connect": 14, "contamin": 2, "contig": [8, 15], "control": 3, "data": [0, 14, 15], "databas": 13, "derepl": [5, 6], "diamond": 13, "divers": 13, "dna": 9, "eggnog": 13, "estim": 5, "evalu": 8, "exisit": 15, "exist": 15, "export": 14, "extract": 13, "filter": [4, 8], "from": 15, "function": [12, 13], "genom": 7, "get": 5, "have": [5, 13], "host": 2, "human": 2, "import": 15, "index": [5, 8], "interoper": 16, "kaiju": [9, 11], "kraken": [9, 11, 14], "length": 5, "let": [5, 13], "look": [5, 13], "mag": [5, 6, 8, 10, 15], "map": [5, 8], "megahit": 8, "metabat": 8, "metagenom": 7, "microbi": 14, "minhash": 6, "moshpit": 17, "multipli": 13, "note": 1, "o": 14, "ortholog": 13, "other": [14, 15, 16], "our": [5, 13], "overview": [4, 9], "pangenom": 14, "parallel": 1, "pavian": 14, "protein": 9, "qc": 8, "qualiti": [3, 4], "quast": 8, "read": [2, 4, 5, 8, 9, 11], "recoveri": [7, 8], "remov": 2, "report": 14, "requir": 13, "retriev": 0, "search": 13, "set": 6, "setup": 1, "signatur": 6, "sourmash": 6, "tabl": 13, "taxonom": [9, 10, 11], "tool": [14, 15, 16], "trim": 4, "tutori": 17, "us": 13, "visual": [11, 14], "work": 15}}) \ No newline at end of file