diff --git a/.github/workflows/push-main.yml b/.github/workflows/pypi-publish.yml
similarity index 60%
rename from .github/workflows/push-main.yml
rename to .github/workflows/pypi-publish.yml
index d6ccafd..694df95 100644
--- a/.github/workflows/push-main.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -1,25 +1,14 @@
-name: Release
+name: PyPI publish
 on:
-  push:
-    branches:
-      - main
+  release:
+    types: [ published ]
 
 jobs:
-  release:
-    name: release
+  pypi-publish:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: actions/setup-node@v2
-        with:
-          cache: npm
-          node-version: 16
-      - run: npm install
-      - run: npx semantic-release
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Python
+      - name: Setup Python
         uses: actions/setup-python@v2
         with:
           python-version: '3.6'
@@ -33,4 +22,4 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
         run: |
           python setup.py sdist bdist_wheel
-          twine upload dist/*
\ No newline at end of file
+          twine upload dist/*
diff --git a/.github/workflows/semantic-release.yml b/.github/workflows/semantic-release.yml
new file mode 100644
index 0000000..de67e9f
--- /dev/null
+++ b/.github/workflows/semantic-release.yml
@@ -0,0 +1,63 @@
+name: Semantic release
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  semantic-release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v2
+        with:
+          cache: npm
+          node-version: 16
+      - run: npm install
+      - run: npx semantic-release
+        env:
+          GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+
+  documentation:
+    runs-on: ubuntu-latest
+    needs: ['semantic-release']
+    steps:
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+
+      # Checkout the main branch to /main
+      - uses: actions/checkout@v2
+        with:
+          path: main
+
+      # Checkout the GitHub pages branch to /gh-pages
+      - uses: actions/checkout@v2
+        with:
+          ref: gh-pages
+          path: gh-pages
+
+      - name: Install Sphinx requirements
+        working-directory: ${{ github.workspace }}/main/docs
+        run: |
+          sudo apt-get update -y
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements.txt
+
+      - name: Build documentation
+        working-directory: ${{ github.workspace }}/main/docs
+        run: make html
+
+      - name: Move documentation
+        run: |
+          touch main/docs/build/html/.nojekyll
+          cp -a main/docs/build/html/. gh-pages/
+
+      - name: Upload documentation
+        working-directory: ${{ github.workspace }}/gh-pages
+        run: |
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+          git add --all
+          git diff-index --quiet HEAD || git commit -m "docs update" -a
+          git push
diff --git a/.releaserc b/.releaserc
index bbea15c..b65e40c 100644
--- a/.releaserc
+++ b/.releaserc
@@ -20,7 +20,8 @@
     [
       "@semantic-release/changelog",
       {
-        "changelogFile": "CHANGELOG.md"
+        "changelogFile": "CHANGELOG.md",
+        "changelogTitle": "# Changelog"
       }
     ],
   [
diff --git a/README.md b/README.md
index 07d03b3..780798e 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,17 @@
 # magna
-The magna CLI package is a collection of commands that I frequently use in bioinformatics and day-to-day life.
+
+[![PyPI](https://img.shields.io/pypi/v/magna.svg)](https://pypi.python.org/pypi/magna)
+
+Magna is a collection of bioinformatic datasets and utilities I use in my everyday life.
+
+This has been written with the intention of personal use, but feel free to use/contribute.
+
+**Documentation:** [https://aaronmussig.github.io/magna/](https://aaronmussig.github.io/magna/)
+
+
+## Contributing
+
+This project uses [Semantic Versioning](http://semver.org/) and [Conventional Commits](https://conventionalcommits.org/)
+to automatically generate release notes using [Semantic Release](https://semantic-release.gitbook.io/semantic-release/).
+
+Please ensure that your commits are property formatted.
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..88d11cc
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx-rtd-theme ~= 1.0.0
+sphinx ~= 4.4.0
+sphinx-autodoc-typehints ~= 1.12.0
+myst-parser ~= 0.16.1
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
new file mode 100644
index 0000000..f0c8365
--- /dev/null
+++ b/docs/source/_static/css/custom.css
@@ -0,0 +1,13 @@
+@import 'theme.css';
+
+.wy-nav-content {
+    max-width: 100% !important;
+}
+
+.wy-side-nav-search {
+    background-color: #2a518f !important;
+}
+
+.rst-content img {
+    margin-right: 5px;
+}
\ No newline at end of file
diff --git a/docs/source/changelog.md b/docs/source/changelog.md
new file mode 120000
index 0000000..699cc9e
--- /dev/null
+++ b/docs/source/changelog.md
@@ -0,0 +1 @@
+../../CHANGELOG.md
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..88773de
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,109 @@
+import os
+import sys
+from datetime import datetime
+import sphinx_rtd_theme
+
+sys.path.insert(0, os.path.abspath('../..'))
+from magna import __author__, __version__, __title__, __url__
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+
+# The full version, including alpha/beta/rc tags
+release = __version__
+project = __title__
+copyright = f'{datetime.now().year}, {__author__}'
+author = __author__
+
+# The full version, including alpha/beta/rc tags
+version = __version__
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+github_url = __url__
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon',
+              'sphinx_rtd_theme', 'sphinx_autodoc_typehints',
+              'myst_parser']
+
+# Napoleon settings
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = False
+napoleon_include_private_with_doc = False
+napoleon_include_special_with_doc = True
+napoleon_use_admonition_for_examples = False
+napoleon_use_admonition_for_notes = False
+napoleon_use_admonition_for_references = False
+napoleon_use_ivar = False
+napoleon_use_param = True
+napoleon_use_rtype = True
+napoleon_preprocess_types = False
+napoleon_type_aliases = None
+napoleon_attr_annotations = True
+
+# autodoc_typehints = 'None'
+autodoc_typehints_format = 'short'
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+
+html_theme_options = {# 'analytics_id': 'G-XXXXXXXXXX',  #  Provided by Google in your dashboard
+    'analytics_anonymize_ip': False, 'logo_only': False, 'display_version': True,
+    'prev_next_buttons_location': 'bottom', 'style_external_links': False, 'vcs_pageview_mode': '',
+    'style_nav_header_background': 'white', # Toc options
+    'collapse_navigation': True, 'sticky_navigation': True, 'navigation_depth': 4, 'includehidden': True,
+    'titles_only': False}
+
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+html_style = 'css/custom.css'
+
+html_context = {'display_github': True, 'github_user': 'aaronmussig', 'github_repo': 'magna',
+    'github_version': 'master/docs/source/'}
+
+
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.txt': 'markdown',
+    '.md': 'markdown',
+}
diff --git a/docs/source/gtdb/enums.rst b/docs/source/gtdb/enums.rst
new file mode 100644
index 0000000..d9049cc
--- /dev/null
+++ b/docs/source/gtdb/enums.rst
@@ -0,0 +1,8 @@
+*****
+Enums
+*****
+
+.. autoclass:: magna.gtdb.enums.GtdbRelease
+    :members:
+    :undoc-members:
+    :member-order: bysource
diff --git a/docs/source/gtdb/genome.rst b/docs/source/gtdb/genome.rst
new file mode 100644
index 0000000..a03e1bc
--- /dev/null
+++ b/docs/source/gtdb/genome.rst
@@ -0,0 +1,11 @@
+******
+Genome
+******
+
+.. autoclass:: magna.gtdb.genome.Genome
+    :members:
+
+
+.. autoclass:: magna.gtdb.genome.GenomeDirs
+    :members:
+    :special-members: __init__
diff --git a/docs/source/gtdb/metadata.rst b/docs/source/gtdb/metadata.rst
new file mode 100644
index 0000000..44150de
--- /dev/null
+++ b/docs/source/gtdb/metadata.rst
@@ -0,0 +1,33 @@
+********
+Metadata
+********
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadata
+   :members:
+
+
+Release 95
+----------
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR95
+    :members:
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR95Arc
+    :show-inheritance:
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR95Bac
+    :show-inheritance:
+
+
+
+Release 202
+-----------
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR202
+    :members:
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR202Arc
+    :show-inheritance:
+
+.. autoclass:: magna.gtdb.metadata.GtdbMetadataR202Bac
+    :show-inheritance:
diff --git a/docs/source/gtdb/tree.rst b/docs/source/gtdb/tree.rst
new file mode 100644
index 0000000..1537803
--- /dev/null
+++ b/docs/source/gtdb/tree.rst
@@ -0,0 +1,26 @@
+****
+Tree
+****
+
+.. autoclass:: magna.gtdb.tree.GtdbTree
+   :members:
+
+
+Release 95
+----------
+
+.. autoclass:: magna.gtdb.tree.GtdbTreeR95Arc
+    :show-inheritance:
+
+.. autoclass:: magna.gtdb.tree.GtdbTreeR95Bac
+    :show-inheritance:
+
+
+Release 202
+-----------
+
+.. autoclass:: magna.gtdb.tree.GtdbTreeR202Arc
+    :show-inheritance:
+
+.. autoclass:: magna.gtdb.tree.GtdbTreeR202Bac
+    :show-inheritance:
diff --git a/docs/source/gunc/index.rst b/docs/source/gunc/index.rst
new file mode 100644
index 0000000..054d283
--- /dev/null
+++ b/docs/source/gunc/index.rst
@@ -0,0 +1,24 @@
+****
+GUNC
+****
+
+GTDB R95 reference DB
+---------------------
+
+Note: The following methods have source data that is not publicly available:
+
+.. autofunction:: magna.gunc.gunc_max_css_scores_gtdb_r95
+
+.. autofunction:: magna.gunc.gunc_contig_assignment_gtdb_r95
+
+.. autofunction:: magna.gunc.gunc_all_levels_gtdb_r95
+
+
+Progenes reference DB
+---------------------
+
+.. autoclass:: magna.gunc.GuncMaxCssScores
+    :members:
+
+.. autoclass:: magna.gunc.GuncAllScores
+    :members:
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..25273b0
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,36 @@
+*****
+magna
+*****
+
+.. image:: https://img.shields.io/pypi/v/magna.svg
+    :alt: PyPI version
+
+cd ~/git/magna/docs && make clean && make html && cd ~/git/magna/docs/build/html && http-server
+
+
+.. toctree::
+    :caption: GTDB
+
+    gtdb/tree
+    gtdb/metadata
+    gtdb/genome
+    gtdb/enums
+
+.. toctree::
+    :caption: GUNC
+
+    gunc/index
+
+
+.. toctree::
+   :caption: Utility
+
+   util/io
+   util/accession
+
+
+.. toctree::
+   :caption: Changelog
+   :maxdepth: 1
+
+   changelog
diff --git a/docs/source/util/accession.rst b/docs/source/util/accession.rst
new file mode 100644
index 0000000..c01b4e5
--- /dev/null
+++ b/docs/source/util/accession.rst
@@ -0,0 +1,5 @@
+*********
+Accession
+*********
+
+.. autofunction:: magna.util.accession.canonical_gid
diff --git a/docs/source/util/io.rst b/docs/source/util/io.rst
new file mode 100644
index 0000000..be17946
--- /dev/null
+++ b/docs/source/util/io.rst
@@ -0,0 +1,16 @@
+************
+Input/Output
+************
+
+.. autofunction:: magna.util.io.untar
+
+
+.. autofunction:: magna.util.io.md5sum
+
+
+.. autofunction:: magna.util.io.download_file
+
+
+.. autofunction:: magna.util.io.cache_file
+
+
diff --git a/magna/dataset/gunc/__init__.py b/magna/dataset/gunc/__init__.py
deleted file mode 100644
index 1e853e0..0000000
--- a/magna/dataset/gunc/__init__.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import tempfile
-
-import numpy as np
-import pandas as pd
-
-from magna.config import MAGNA_DIR
-from magna.io import download_file
-
-
-class GuncMaxCssScores:
-    source = 'https://swifter.embl.de/~fullam/gunc/paper_supplementary_files/All_Datasets.GUNC.scores.maxCSS_level.tsv'
-    path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'All_Datasets.GUNC.scores.maxCSS_level.feather')
-    md5 = 'dd91aa177b9112c361b9503e132f1c06'
-
-    def __init__(self):
-        if not os.path.isfile(self.path):
-            self._download()
-        self.df = self._read()
-
-    def _read(self):
-        print('Note: Only the RefSeq and GenBank results are used.')
-        return pd.read_feather(self.path)
-
-    @staticmethod
-    def _read_tsv(path):
-        dtype = {
-            'genome': np.object,
-            'n_genes_called': np.uintc,
-            'n_genes_mapped': np.uintc,
-            'n_contigs': np.uintc,
-            'taxonomic_level': np.object,
-            'proportion_genes_retained_in_major_clades': np.float16,
-            'genes_retained_index': np.float16,
-            'clade_separation_score': np.float16,
-            'contamination_portion': np.float16,
-            'n_effective_surplus_clades': np.float16,
-            'mean_hit_identity': np.float16,
-            'reference_representation_score': np.float16,
-            'pass.GUNC': np.object,
-            'study': np.object,
-            'CheckM_completeness': np.object,
-            'CheckM_contamination': np.object,
-        }
-        rows = list()
-        allowed_studies = frozenset({'GenBank', 'RefSeq'})
-        with open(path, 'r') as f:
-            header = {k: i for i, k in enumerate(
-                f.readline().strip().split('\t'))}
-            study_idx = header['study']
-            for line in f.readlines():
-                cols = line.strip().split('\t')
-                if cols[study_idx] in allowed_studies:
-                    rows.append(cols)
-        return pd.DataFrame(rows, columns=dtype)
-
-    def _download(self):
-        with tempfile.TemporaryDirectory() as tmpdir:
-            # Download the file
-            tmp_path = os.path.join(tmpdir, 'download.tsv')
-            download_file(self.source, tmp_path, self.md5)
-
-            df = self._read_tsv(tmp_path)
-            os.makedirs(os.path.dirname(self.path), exist_ok=True)
-            df.to_feather(path=self.path, compression='lz4')
-
-
-class GuncAllScores:
-    source = 'https://swifter.embl.de/~fullam/gunc/paper_supplementary_files/All_Datasets.GUNC.scores.all_levels.specI2species.tsv'
-    path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'All_Datasets.GUNC.scores.all_levels.specI2species.feather')
-    md5 = 'a54e3719221a42a5f96b267412827d27'
-
-    def __init__(self):
-        if not os.path.isfile(self.path):
-            self._download()
-        self.df = self._read()
-
-    def _read(self):
-        print('Note: Only the RefSeq and GenBank results are used.')
-        return pd.read_feather(self.path)
-
-    @staticmethod
-    def _read_tsv(path):
-        dtype = {
-            'genome': np.object,
-            'n_genes_called': np.uintc,
-            'n_genes_mapped': np.uintc,
-            'n_contigs': np.uintc,
-            'taxonomic_level': np.object,
-            'proportion_genes_retained_in_major_clades': np.float16,
-            'genes_retained_index': np.float16,
-            'clade_separation_score': np.float16,
-            'contamination_portion': np.float16,
-            'n_effective_surplus_clades': np.float16,
-            'mean_hit_identity': np.float16,
-            'reference_representation_score': np.float16,
-            'pass.GUNC': np.object,
-            'study': np.object,
-        }
-        rows = list()
-        allowed_studies = frozenset({'GenBank', 'RefSeq'})
-        with open(path, 'r') as f:
-            header = {k: i for i, k in enumerate(
-                f.readline().strip().split('\t'))}
-            study_idx = header['study']
-            for line in f.readlines():
-                cols = line.strip().split('\t')
-                if cols[study_idx] in allowed_studies:
-                    rows.append(cols)
-        return pd.DataFrame(rows, columns=dtype)
-
-    def _download(self):
-        with tempfile.TemporaryDirectory() as tmpdir:
-            # Download the file
-            tmp_path = os.path.join(tmpdir, 'download.tsv')
-            download_file(self.source, tmp_path, self.md5)
-
-            df = self._read_tsv(tmp_path)
-            os.makedirs(os.path.dirname(self.path), exist_ok=True)
-            df.to_feather(path=self.path, compression='lz4')
diff --git a/magna/gtdb/enums.py b/magna/gtdb/enums.py
index fb8d217..c96cba3 100644
--- a/magna/gtdb/enums.py
+++ b/magna/gtdb/enums.py
@@ -2,6 +2,8 @@
 
 
 class GtdbRelease(Enum):
+    """All available GTDB releases."""
+
     R80 = '80'
     R83 = '83'
     R86 = '86'
diff --git a/magna/gtdb/genome.py b/magna/gtdb/genome.py
index c21d6eb..2bfe2f1 100644
--- a/magna/gtdb/genome.py
+++ b/magna/gtdb/genome.py
@@ -5,39 +5,50 @@
 from Bio.SeqRecord import SeqRecord
 
 from magna.gtdb.enums import GtdbRelease
-from magna.io import cache_file
+from magna.util.io import cache_file
 
 
 class Genome:
+    """A wrapper to a GTDB genome."""
 
     def __init__(self, accession: str, root: str):
+        #: The short accession of the genome.
         self.accession: str = accession
+        #: The root directory where this genome is stored.
         self.root: str = root
 
         # Generate paths
         base = os.path.basename(self.root)
-        self.cds_path = os.path.join(self.root, f'{base}_cds_from_genomic.fna')
-        self.fna_path = os.path.join(self.root, f'{base}_genomic.fna')
+        #: The path to the CDS file.
+        self.cds_path: str = os.path.join(self.root, f'{base}_cds_from_genomic.fna')
+        #: The path to the FNA file.
+        self.fna_path: str = os.path.join(self.root, f'{base}_genomic.fna')
 
     def __repr__(self):
         return str(self.accession)
 
     def cds_seqio(self) -> Tuple[SeqRecord, ...]:
-        # Returns the CDS generated from the FNA
+        """Read and return the CDS file as a SeqIO object."""
         with open(self.cds_path, 'r') as f:
             out = tuple(SeqIO.parse(f, 'fasta'))
         return out
 
     def fna_seqio(self) -> Tuple[SeqRecord, ...]:
-        # Returns the FNA
+        """Read and return the FNA file as a SeqIO object."""
         with open(self.fna_path, 'r') as f:
             out = tuple(SeqIO.parse(f, 'fasta'))
         return out
 
 
 class GenomeDirs:
+    """An interface to the :obj:`GtdbRelease` accession to :obj:`Genome` mapping."""
 
     def __init__(self, release: GtdbRelease):
+        """Initialise the GenomeDirs class for a given release.
+
+        Args:
+            release: The release of GTDB to use.
+        """
         self.release = release
 
         # Create the paths
@@ -57,4 +68,9 @@ def read(path: str) -> Dict[str, str]:
         return out
 
     def get(self, accession: str) -> Genome:
+        """Return the :obj:`Genome` for the given accession.
+
+        Args:
+            accession: The short accession of the genome (e.g. ``GCA_123456789.1``).
+        """
         return Genome(accession=accession, root=self._data[accession])
diff --git a/magna/gtdb/metadata.py b/magna/gtdb/metadata.py
index 364a281..871d9fd 100644
--- a/magna/gtdb/metadata.py
+++ b/magna/gtdb/metadata.py
@@ -4,18 +4,22 @@
 import pandas as pd
 
 from magna.config import MAGNA_DIR
-from magna.io import download_file, md5sum, untar
+from magna.util.io import download_file, md5sum, untar
 
 
-class _GtdbMetadata:
+class GtdbMetadata:
 
     def __init__(self, source: str, path: str, md5: str):
-        self.source = source
-        self.path = path
-        self.md5 = md5
+        #: The source URL
+        self.source: str = source
+        #: The path to the metadata file.
+        self.path: str = path
+        #: The MD5 checksum of the downloaded file.
+        self.md5: str = md5
         if not os.path.isfile(self.path):
             self._download()
-        self.df = self._read()
+        #: The metadata as a pandas DataFrame.
+        self.df: pd.DataFrame = self._read()
 
     def _read(self):
         df = pd.read_feather(self.path)
@@ -45,7 +49,9 @@ def _download(self):
 
 # ----------------------------------------------------------------------------------------------------------------------
 
-class GtdbMetadataR95Arc(_GtdbMetadata):
+class GtdbMetadataR95Arc(GtdbMetadata):
+    """The archaeal metadata (release 95)."""
+
     source = 'https://data.gtdb.ecogenomic.org/releases/release95/95.0/ar122_metadata_r95.tar.gz'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'metadata', 'ar122_metadata_r95.feather')
     md5 = '110ad5daa2dbed2ee904b10c295da5dc'
@@ -54,7 +60,9 @@ def __init__(self):
         super().__init__(self.source, self.path, self.md5)
 
 
-class GtdbMetadataR95Bac(_GtdbMetadata):
+class GtdbMetadataR95Bac(GtdbMetadata):
+    """The bacterial metadata (release 95)."""
+
     source = 'https://data.gtdb.ecogenomic.org/releases/release95/95.0/bac120_metadata_r95.tar.gz'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'metadata', 'bac120_metadata_r95.feather')
     md5 = '223ada02ffca4d1a2dda6edb9a164dcd'
@@ -64,14 +72,18 @@ def __init__(self):
 
 
 class GtdbMetadataR95:
+    """The combined archaeal and bacterial metadata (release 95)."""
 
     def __init__(self):
-        self.df = pd.concat([GtdbMetadataR95Arc().df, GtdbMetadataR95Bac().df])
+        #: The combined dataframe.
+        self.df: pd.DataFrame = pd.concat([GtdbMetadataR95Arc().df, GtdbMetadataR95Bac().df])
 
 
 # ----------------------------------------------------------------------------------------------------------------------
 
-class GtdbMetadataR202Arc(_GtdbMetadata):
+class GtdbMetadataR202Arc(GtdbMetadata):
+    """The archaeal metadata (release 202)."""
+
     source = 'https://data.gtdb.ecogenomic.org/releases/release202/202.0/ar122_metadata_r202.tar.gz'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'metadata', 'ar122_metadata_r202.feather')
     md5 = '0607728ae1f56bdb1a7cc24d238185c3'
@@ -80,7 +92,8 @@ def __init__(self):
         super().__init__(self.source, self.path, self.md5)
 
 
-class GtdbMetadataR202Bac(_GtdbMetadata):
+class GtdbMetadataR202Bac(GtdbMetadata):
+    """The bacterial metadata (release 202)."""
     source = 'https://data.gtdb.ecogenomic.org/releases/release202/202.0/bac120_metadata_r202.tar.gz'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'metadata', 'bac120_metadata_r202.feather')
     md5 = '68fed11eb688982edb6f4669476c2a10'
@@ -90,8 +103,10 @@ def __init__(self):
 
 
 class GtdbMetadataR202:
+    """The combined archaeal and bacterial metadata (release 202)."""
 
     def __init__(self):
-        self.df = pd.concat([GtdbMetadataR202Arc().df, GtdbMetadataR202Bac().df])
+        #: The combined dataframe.
+        self.df: pd.DataFrame = pd.concat([GtdbMetadataR202Arc().df, GtdbMetadataR202Bac().df])
 
 # ----------------------------------------------------------------------------------------------------------------------
diff --git a/magna/gtdb/tree.py b/magna/gtdb/tree.py
index 40b6f04..c1c87e2 100644
--- a/magna/gtdb/tree.py
+++ b/magna/gtdb/tree.py
@@ -5,18 +5,25 @@
 import dendropy
 
 from magna.config import MAGNA_DIR
-from magna.io import download_file, md5sum
+from magna.util.io import download_file, md5sum
 
 
-class _GtdbTree:
+class GtdbTree:
+    """The base class that all GTDB tree objects inherit."""
+
+    __slots__ = ('source', 'path', 'md5', 'tree')
 
     def __init__(self, source: str, path: str, md5: str):
-        self.source = source
-        self.path = path
-        self.md5 = md5
+        #: The source URL.
+        self.source: str = source
+        #: The path to the downloaded file.
+        self.path: str = path
+        #: The expected MD5 checksum of the file.
+        self.md5: str = md5
         if not os.path.isfile(self.path):
             self._download()
-        self.tree = self._read()
+        #: The dendropy tree.
+        self.tree: dendropy.Tree = self._read()
 
     def _read(self):
         return dendropy.Tree.get(path=self.path, schema='newick', preserve_underscores=True)
@@ -35,7 +42,8 @@ def _download(self):
             shutil.copyfile(tmp_path, self.path)
 
 
-class GtdbTreeR95Arc(_GtdbTree):
+class GtdbTreeR95Arc(GtdbTree):
+    """The GTDB archaeal tree (release 95)."""
     source = 'https://data.gtdb.ecogenomic.org/releases/release95/95.0/ar122_r95.tree'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'tree', 'ar122_r95.tree')
     md5 = '2f5e072b9095617e7b5cff09653f8bec'
@@ -44,7 +52,8 @@ def __init__(self):
         super().__init__(self.source, self.path, self.md5)
 
 
-class GtdbTreeR95Bac(_GtdbTree):
+class GtdbTreeR95Bac(GtdbTree):
+    """The GTDB bacterial tree (release 95)."""
     source = 'https://data.gtdb.ecogenomic.org/releases/release95/95.0/bac120_r95.tree'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'tree', 'bac120_r95.tree')
     md5 = 'c896d0eece01b281e09bd38534cd072e'
@@ -53,7 +62,8 @@ def __init__(self):
         super().__init__(self.source, self.path, self.md5)
 
 
-class GtdbTreeR202Arc(_GtdbTree):
+class GtdbTreeR202Arc(GtdbTree):
+    """The GTDB archaeal tree (release 202)."""
     source = 'https://data.gtdb.ecogenomic.org/releases/release202/202.0/ar122_r202.tree'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'tree', 'ar122_r202.tree')
     md5 = '5b2dd87b0836fd63a223a556eae2906d'
@@ -62,7 +72,8 @@ def __init__(self):
         super().__init__(self.source, self.path, self.md5)
 
 
-class GtdbTreeR202Bac(_GtdbTree):
+class GtdbTreeR202Bac(GtdbTree):
+    """The GTDB bacterial tree (release 202)."""
     source = 'https://data.gtdb.ecogenomic.org/releases/release202/202.0/bac120_r202.tree'
     path = os.path.join(MAGNA_DIR, 'dataset', 'gtdb', 'tree', 'bac120_r202.tree')
     md5 = 'aebfc092ff6f2d81ef1226da6f1477c9'
diff --git a/magna/gunc/__init__.py b/magna/gunc/__init__.py
index b9e014d..0146392 100644
--- a/magna/gunc/__init__.py
+++ b/magna/gunc/__init__.py
@@ -1,11 +1,15 @@
 import os
+import tempfile
 
+import numpy as np
 import pandas as pd
 
 from magna.config import MAGNA_DIR
+from magna.util.io import download_file
 
 
 def gunc_max_css_scores_gtdb_r95() -> pd.DataFrame:
+    """Return the max clade separation score (CSS) for the R95 GTDB."""
     path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'GUNC.gtdb_95.maxCSS_level.feather')
     if not os.path.isfile(path):
         raise IOError(f'{path} does not exist.')
@@ -13,6 +17,7 @@ def gunc_max_css_scores_gtdb_r95() -> pd.DataFrame:
 
 
 def gunc_contig_assignment_gtdb_r95() -> pd.DataFrame:
+    """Return the contig assignment for the R95 GTDB."""
     path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'GUNC.gtdb_95.contig_assignments.feather')
     if not os.path.isfile(path):
         raise IOError(f'{path} does not exist.')
@@ -20,7 +25,126 @@ def gunc_contig_assignment_gtdb_r95() -> pd.DataFrame:
 
 
 def gunc_all_levels_gtdb_r95() -> pd.DataFrame:
+    """Return GUNC output at all levels for the R95 GTDB."""
     path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'gtdb_95.all_levels.tsv')
     if not os.path.isfile(path):
         raise IOError(f'{path} does not exist.')
     return pd.read_feather(path)
+
+
+class GuncMaxCssScores:
+    """Return the max clade separation score (CSS) for the R95 GTDB (progenes db)."""
+
+    source = 'https://swifter.embl.de/~fullam/gunc/paper_supplementary_files/All_Datasets.GUNC.scores.maxCSS_level.tsv'
+    path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'All_Datasets.GUNC.scores.maxCSS_level.feather')
+    md5 = 'dd91aa177b9112c361b9503e132f1c06'
+
+    def __init__(self):
+        if not os.path.isfile(self.path):
+            self._download()
+        #: The dataframe.
+        self.df: pd.DataFrame = self._read()
+
+    def _read(self):
+        print('Note: Only the RefSeq and GenBank results are used.')
+        return pd.read_feather(self.path)
+
+    @staticmethod
+    def _read_tsv(path):
+        dtype = {
+            'genome': np.object,
+            'n_genes_called': np.uintc,
+            'n_genes_mapped': np.uintc,
+            'n_contigs': np.uintc,
+            'taxonomic_level': np.object,
+            'proportion_genes_retained_in_major_clades': np.float16,
+            'genes_retained_index': np.float16,
+            'clade_separation_score': np.float16,
+            'contamination_portion': np.float16,
+            'n_effective_surplus_clades': np.float16,
+            'mean_hit_identity': np.float16,
+            'reference_representation_score': np.float16,
+            'pass.GUNC': np.object,
+            'study': np.object,
+            'CheckM_completeness': np.object,
+            'CheckM_contamination': np.object,
+        }
+        rows = list()
+        allowed_studies = frozenset({'GenBank', 'RefSeq'})
+        with open(path, 'r') as f:
+            header = {k: i for i, k in enumerate(
+                f.readline().strip().split('\t'))}
+            study_idx = header['study']
+            for line in f.readlines():
+                cols = line.strip().split('\t')
+                if cols[study_idx] in allowed_studies:
+                    rows.append(cols)
+        return pd.DataFrame(rows, columns=dtype)
+
+    def _download(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Download the file
+            tmp_path = os.path.join(tmpdir, 'download.tsv')
+            download_file(self.source, tmp_path, self.md5)
+
+            df = self._read_tsv(tmp_path)
+            os.makedirs(os.path.dirname(self.path), exist_ok=True)
+            df.to_feather(path=self.path, compression='lz4')
+
+
+class GuncAllScores:
+    """Return GUNC output at all levels for the R95 GTDB (progenes db)."""
+
+    source = 'https://swifter.embl.de/~fullam/gunc/paper_supplementary_files/All_Datasets.GUNC.scores.all_levels.specI2species.tsv'
+    path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'All_Datasets.GUNC.scores.all_levels.specI2species.feather')
+    md5 = 'a54e3719221a42a5f96b267412827d27'
+
+    def __init__(self):
+        if not os.path.isfile(self.path):
+            self._download()
+        #: The dataframe.
+        self.df: pd.DataFrame = self._read()
+
+    def _read(self):
+        print('Note: Only the RefSeq and GenBank results are used.')
+        return pd.read_feather(self.path)
+
+    @staticmethod
+    def _read_tsv(path):
+        dtype = {
+            'genome': np.object,
+            'n_genes_called': np.uintc,
+            'n_genes_mapped': np.uintc,
+            'n_contigs': np.uintc,
+            'taxonomic_level': np.object,
+            'proportion_genes_retained_in_major_clades': np.float16,
+            'genes_retained_index': np.float16,
+            'clade_separation_score': np.float16,
+            'contamination_portion': np.float16,
+            'n_effective_surplus_clades': np.float16,
+            'mean_hit_identity': np.float16,
+            'reference_representation_score': np.float16,
+            'pass.GUNC': np.object,
+            'study': np.object,
+        }
+        rows = list()
+        allowed_studies = frozenset({'GenBank', 'RefSeq'})
+        with open(path, 'r') as f:
+            header = {k: i for i, k in enumerate(
+                f.readline().strip().split('\t'))}
+            study_idx = header['study']
+            for line in f.readlines():
+                cols = line.strip().split('\t')
+                if cols[study_idx] in allowed_studies:
+                    rows.append(cols)
+        return pd.DataFrame(rows, columns=dtype)
+
+    def _download(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Download the file
+            tmp_path = os.path.join(tmpdir, 'download.tsv')
+            download_file(self.source, tmp_path, self.md5)
+
+            df = self._read_tsv(tmp_path)
+            os.makedirs(os.path.dirname(self.path), exist_ok=True)
+            df.to_feather(path=self.path, compression='lz4')
diff --git a/magna/dataset/__init__.py b/magna/util/__init__.py
similarity index 100%
rename from magna/dataset/__init__.py
rename to magna/util/__init__.py
diff --git a/magna/accession.py b/magna/util/accession.py
similarity index 61%
rename from magna/accession.py
rename to magna/util/accession.py
index 3492cbd..5186c0f 100644
--- a/magna/accession.py
+++ b/magna/util/accession.py
@@ -1,12 +1,12 @@
 def canonical_gid(gid: str) -> str:
     """Get canonical form of NCBI genome accession.
 
+    Args:
+        gid: The NCBI genome accession.
+
     Example:
-        G005435135 -> G005435135
-        GCF_005435135.1 -> G005435135
-        GCF_005435135.1_ASM543513v1_genomic -> G005435135
-        RS_GCF_005435135.1 -> G005435135
-        GB_GCA_005435135.1 -> G005435135
+        >>> canonical_gid('GCF_005435135.1_ASM543513v1_genomic')
+        'G005435135'
     """
 
     if gid.startswith('U'):
diff --git a/magna/io.py b/magna/util/io.py
similarity index 53%
rename from magna/io.py
rename to magna/util/io.py
index 93195ac..af3cc10 100644
--- a/magna/io.py
+++ b/magna/util/io.py
@@ -11,15 +11,6 @@
 from magna.config import CACHE_DIR
 
 
-def untar(file_path, dir_name):
-    """
-    Extracts the contents of the tar file at file_path into the directory
-    dest_path.
-    """
-    with tarfile.open(file_path) as tar:
-        tar.extractall(dir_name)
-
-
 class TqdmUpTo(tqdm):
     """Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
 
@@ -37,7 +28,30 @@ def update_to(self, b=1, bsize=1, tsize=None):
         return self.update(b * bsize - self.n)  # also sets self.n = b * bsize
 
 
+def untar(file_path: str, dir_name: str):
+    """Extracts the contents of the tar file into the target directory.
+
+    Args:
+        file_path: The path to the tar file.
+        dir_name: The directory to extract the tar file into.
+
+    Examples:
+        Extract the contents of the tar file at ``/tmp/data.tar.gz`` into the
+        directory ``/tmp/data``.
+
+        >>> untar('/tmp/data.tar.gz', '/tmp/data')
+    """
+    os.makedirs(dir_name, exist_ok=True)
+    with tarfile.open(file_path) as tar:
+        tar.extractall(dir_name)
+
+
 def md5sum(path: str) -> str:
+    """Returns the md5 hash of a file.
+
+    Args:
+        path: The path to the file.
+    """
     block_size = 65536
     hasher = hashlib.md5()
     with open(path, 'rb') as f:
@@ -49,19 +63,45 @@ def md5sum(path: str) -> str:
 
 
 def download_file(url: str, path: str, md5: Optional[str] = None):
-    """Downloads a file to disk with tqdm progress bar."""
+    """Downloads a file to disk, optionally validating the md5 hash.
+
+    Args:
+        url: The url to download from.
+        path: The path to save the file to.
+        md5: The expected md5 hash of the file.
+
+    Raises:
+        IOError: If the md5 hash doesn't match.
+
+    Examples:
+        Download the file at ``https://www.example.com/data.csv`` to ``/tmp/data.csv``.
+
+        >>> download_file('https://www.example.com/data.csv', '/tmp/data.csv')
+    """
     with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=url.split('/')[-1]) as t:
         urllib.request.urlretrieve(url, filename=path, reporthook=t.update_to, data=None)
         t.total = t.n
 
     if md5 and md5 != md5sum(path):
-        raise ValueError('Hash mismatch')
+        raise IOError('Hash mismatch')
 
 
 def cache_file(srv_path: str, local_name: str) -> str:
-    """Copies a remote file to the local machine."""
-    if not os.path.isdir(CACHE_DIR):
-        os.makedirs(CACHE_DIR)
+    """Copies a file to the magna cache (doesn't auto-remove).
+
+    Args:
+        srv_path: The remote path of the file.
+        local_name: The key to cache this file with.
+
+    Returns:
+        The path to the cached file.
+
+    Examples:
+        Cache the file at ``/srv/data.csv`` as ``data.csv``.
+
+        >>> cache_file('/srv/data.csv', 'data.csv')
+    """
+    os.makedirs(CACHE_DIR, exist_ok=True)
     local_path = os.path.join(CACHE_DIR, local_name)
     if not os.path.isfile(local_path):
         shutil.copyfile(srv_path, local_path)
diff --git a/package.json b/package.json
index 45395d0..bb02ecf 100644
--- a/package.json
+++ b/package.json
@@ -1,23 +1,5 @@
 {
   "name": "magna",
-  "version": "0.0.1",
-  "description": "Magna is a collection of commands that I frequently use in bioinformatics and day-to-day life.",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/aaronmussig/magna.git"
-  },
-  "author": "",
-  "license": "ISC",
-  "bugs": {
-    "url": "https://github.com/aaronmussig/magna/issues"
-  },
-  "homepage": "https://github.com/aaronmussig/magna#readme",
-  "dependencies": {
-  },
   "devDependencies": {
     "@semantic-release/github": "^8.0.2",
     "@semantic-release/changelog": "^6.0.1",
diff --git a/setup.py b/setup.py
index 9e231dd..92ffb93 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@ def readme():
       ],
       packages=find_packages(),
       include_package_data=True,
-      install_requires=['tqdm', 'pandas>=1.1.0', 'pyarrow', 'numpy', 'dendropy', 'biopython'],
+      install_requires=['tqdm', 'pandas>=1.1.0', 'pyarrow', 'numpy',
+                        'dendropy', 'biopython'],
       python_requires='>=3.6',
       )