From aff82f65eca0a4224a36bb647598ba1c4915ecd5 Mon Sep 17 00:00:00 2001 From: Aaron Mussig Date: Wed, 9 Mar 2022 17:09:01 +1000 Subject: [PATCH] feat(GUNC): Added GTDB R95 dataset methods. --- magna/gunc/__init__.py | 26 ++++++++++++++++++++++++++ setup.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 magna/gunc/__init__.py diff --git a/magna/gunc/__init__.py b/magna/gunc/__init__.py new file mode 100644 index 0000000..b9e014d --- /dev/null +++ b/magna/gunc/__init__.py @@ -0,0 +1,26 @@ +import os + +import pandas as pd + +from magna.config import MAGNA_DIR + + +def gunc_max_css_scores_gtdb_r95() -> pd.DataFrame: + path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'GUNC.gtdb_95.maxCSS_level.feather') + if not os.path.isfile(path): + raise IOError(f'{path} does not exist.') + return pd.read_feather(path) + + +def gunc_contig_assignment_gtdb_r95() -> pd.DataFrame: + path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'GUNC.gtdb_95.contig_assignments.feather') + if not os.path.isfile(path): + raise IOError(f'{path} does not exist.') + return pd.read_feather(path) + + +def gunc_all_levels_gtdb_r95() -> pd.DataFrame: + path = os.path.join(MAGNA_DIR, 'dataset', 'gunc', 'gtdb_95.all_levels.tsv') + if not os.path.isfile(path): + raise IOError(f'{path} does not exist.') + return pd.read_feather(path) diff --git a/setup.py b/setup.py index 56feacd..9e231dd 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,6 @@ def readme(): ], packages=find_packages(), include_package_data=True, - install_requires=['tqdm', 'pandas', 'pyarrow', 'numpy', 'dendropy'], + install_requires=['tqdm', 'pandas>=1.1.0', 'pyarrow', 'numpy', 'dendropy', 'biopython'], python_requires='>=3.6', )