diff --git a/docs/gettingstarted/quickstart.ipynb b/docs/gettingstarted/quickstart.ipynb
index 5832cbd..bf86212 100644
--- a/docs/gettingstarted/quickstart.ipynb
+++ b/docs/gettingstarted/quickstart.ipynb
@@ -38,7 +38,7 @@
"metadata": {},
"outputs": [],
"source": [
- "from dask_nested.datasets import generate_data\n",
+ "from nested_dask.datasets import generate_data\n",
"\n",
"# generate_data creates some toy data\n",
"ndf = generate_data(10, 100) # 10 rows, 100 nested rows per row\n",
diff --git a/docs/index.rst b/docs/index.rst
index a3804c9..6b0ab87 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -5,6 +5,15 @@
Nested-Dask
========================================================================================
+A ![dask](https://www.dask.org/) extension of
+![nested-pandas](https://nested-pandas.readthedocs.io/en/latest/).
+
+Nested-pandas is a pandas extension package that empowers efficient analysis
+of nested associated datasets. This package wraps the majority of the
+nested-pandas API with Dask, which enables easy parallelization and capacity
+for work at scale.
+
+
Dev Guide - Getting Started
---------------------------
diff --git a/docs/tutorials/nest_accessor.ipynb b/docs/tutorials/nest_accessor.ipynb
index c3e20d8..5d80489 100644
--- a/docs/tutorials/nest_accessor.ipynb
+++ b/docs/tutorials/nest_accessor.ipynb
@@ -18,62 +18,11 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
Dask DataFrame Structure:
\n",
- "\n",
- " \n",
- " \n",
- " | \n",
- " a | \n",
- " b | \n",
- " nested | \n",
- "
\n",
- " \n",
- " npartitions=1 | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " float64 | \n",
- " float64 | \n",
- " nested<t: [double], flux: [double], band: [string]> | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- "
\n",
- "Dask Name: repartition, 3 expressions
"
- ],
- "text/plain": [
- "Dask NestedFrame Structure:\n",
- " a b nested\n",
- "npartitions=1 \n",
- "0 float64 float64 nested\n",
- "9 ... ... ...\n",
- "Dask Name: repartition, 3 expressions\n",
- "Expr=Repartition(frame=MapPartitions(NestedFrame), new_partitions=1)"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "from dask_nested.datasets import generate_data\n",
+ "from nested_dask.datasets import generate_data\n",
"\n",
"# generate_data creates some toy data\n",
"ndf = generate_data(10, 5) # 10 rows, 5 nested rows per row\n",
@@ -89,20 +38,9 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"ndf[\"nested\"].nest"
]
@@ -116,20 +54,9 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['t', 'flux', 'band']"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"ndf[\"nested\"].nest.fields"
]
@@ -145,60 +72,9 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "Dask DataFrame Structure:
\n",
- "\n",
- " \n",
- " \n",
- " | \n",
- " t | \n",
- " flux | \n",
- " band | \n",
- "
\n",
- " \n",
- " npartitions=1 | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " double[pyarrow] | \n",
- " double[pyarrow] | \n",
- " string[pyarrow] | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- "
\n",
- "Dask Name: lambda, 5 expressions
"
- ],
- "text/plain": [
- "Dask DataFrame Structure:\n",
- " t flux band\n",
- "npartitions=1 \n",
- "0 double[pyarrow] double[pyarrow] string[pyarrow]\n",
- "9 ... ... ...\n",
- "Dask Name: lambda, 5 expressions\n",
- "Expr=MapPartitions(lambda)"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"flat_nested = ndf[\"nested\"].nest.to_flat()\n",
"flat_nested"
@@ -206,189 +82,9 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " t | \n",
- " flux | \n",
- " band | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 13.509629 | \n",
- " 88.914048 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 8.634948 | \n",
- " 79.231053 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 17.119157 | \n",
- " 5.928274 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 19.561653 | \n",
- " 46.766275 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 13.746892 | \n",
- " 84.034938 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 10.561542 | \n",
- " 81.163032 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 1.354972 | \n",
- " 3.397109 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 9.6219 | \n",
- " 75.977237 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.07304 | \n",
- " 43.562784 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 7.657719 | \n",
- " 12.362825 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 5.652833 | \n",
- " 46.674879 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 4.813034 | \n",
- " 30.925827 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.800736 | \n",
- " 93.787431 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13.317202 | \n",
- " 36.264783 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3.159 | \n",
- " 47.166825 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4.214378 | \n",
- " 23.836587 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13.1752 | \n",
- " 38.680537 | \n",
- " r | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6.579436 | \n",
- " 61.964227 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 8.143109 | \n",
- " 25.909146 | \n",
- " g | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1.839672 | \n",
- " 86.719744 | \n",
- " r | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " t flux band\n",
- "0 13.509629 88.914048 r\n",
- "0 8.634948 79.231053 g\n",
- "0 17.119157 5.928274 r\n",
- "0 19.561653 46.766275 r\n",
- "0 13.746892 84.034938 g\n",
- "1 10.561542 81.163032 g\n",
- "1 1.354972 3.397109 r\n",
- "1 9.6219 75.977237 r\n",
- "1 0.07304 43.562784 g\n",
- "1 7.657719 12.362825 g\n",
- "2 5.652833 46.674879 r\n",
- "2 4.813034 30.925827 g\n",
- "2 0.800736 93.787431 g\n",
- "2 13.317202 36.264783 r\n",
- "2 3.159 47.166825 g\n",
- "3 4.214378 23.836587 g\n",
- "3 13.1752 38.680537 r\n",
- "3 6.579436 61.964227 g\n",
- "3 8.143109 25.909146 g\n",
- "3 1.839672 86.719744 r"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"flat_nested.head(20)"
]
@@ -404,121 +100,9 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " t | \n",
- " flux | \n",
- " band | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " [13.5096288 8.63494758 17.11915696 19.561652... | \n",
- " [88.91404805 79.23105261 5.92827401 46.766274... | \n",
- " ['r' 'g' 'r' 'r' 'g'] | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " [10.5615423 1.35497198 9.62190035 0.073040... | \n",
- " [81.16303204 3.39710897 75.97723713 43.562784... | \n",
- " ['g' 'r' 'r' 'g' 'g'] | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " [ 5.65283252 4.81303449 0.80073596 13.317201... | \n",
- " [46.6748786 30.92582712 93.78743066 36.264783... | \n",
- " ['r' 'g' 'g' 'r' 'g'] | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " [ 4.214378 13.17520009 6.57943592 8.143109... | \n",
- " [23.83658733 38.68053664 61.96422735 25.909146... | \n",
- " ['g' 'r' 'g' 'g' 'r'] | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " [4.31790223 9.2989414 0.19071925 7.55345992 7... | \n",
- " [97.32244264 32.95566652 15.17553499 36.674948... | \n",
- " ['r' 'r' 'g' 'g' 'r'] | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " [ 0.8441804 17.12893578 5.99104788 16.905202... | \n",
- " [ 4.58329024 35.91586029 20.12656116 43.806012... | \n",
- " ['g' 'g' 'g' 'r' 'r'] | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " [ 2.17272952 11.47100691 19.78062851 12.968281... | \n",
- " [52.14204136 84.87265098 25.26807129 94.230023... | \n",
- " ['g' 'g' 'r' 'r' 'r'] | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " [ 7.31546622 14.96282356 15.17099992 15.028434... | \n",
- " [41.17012344 89.94693463 84.09613648 53.773103... | \n",
- " ['g' 'r' 'r' 'r' 'g'] | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " [ 1.69038072 11.05890727 11.28588246 8.962195... | \n",
- " [25.24952237 11.87511229 95.16037222 83.882477... | \n",
- " ['r' 'r' 'r' 'r' 'g'] | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " [ 1.20415091 18.31529619 16.92607067 15.473383... | \n",
- " [43.42305605 76.56653572 6.47013062 13.418778... | \n",
- " ['g' 'r' 'r' 'r' 'r'] | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " t ... band\n",
- "0 [13.5096288 8.63494758 17.11915696 19.561652... ... ['r' 'g' 'r' 'r' 'g']\n",
- "1 [10.5615423 1.35497198 9.62190035 0.073040... ... ['g' 'r' 'r' 'g' 'g']\n",
- "2 [ 5.65283252 4.81303449 0.80073596 13.317201... ... ['r' 'g' 'g' 'r' 'g']\n",
- "3 [ 4.214378 13.17520009 6.57943592 8.143109... ... ['g' 'r' 'g' 'g' 'r']\n",
- "4 [4.31790223 9.2989414 0.19071925 7.55345992 7... ... ['r' 'r' 'g' 'g' 'r']\n",
- "5 [ 0.8441804 17.12893578 5.99104788 16.905202... ... ['g' 'g' 'g' 'r' 'r']\n",
- "6 [ 2.17272952 11.47100691 19.78062851 12.968281... ... ['g' 'g' 'r' 'r' 'r']\n",
- "7 [ 7.31546622 14.96282356 15.17099992 15.028434... ... ['g' 'r' 'r' 'r' 'g']\n",
- "8 [ 1.69038072 11.05890727 11.28588246 8.962195... ... ['r' 'r' 'r' 'r' 'g']\n",
- "9 [ 1.20415091 18.31529619 16.92607067 15.473383... ... ['g' 'r' 'r' 'r' 'r']\n",
- "\n",
- "[10 rows x 3 columns]"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"list_nested = ndf[\"nested\"].nest.to_lists()\n",
"list_nested.compute()"