From 356e5d502e01f49c0be4302b18c9dca2f4b1ffe5 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 21 May 2024 13:41:07 -0700 Subject: [PATCH] doc tweaks --- docs/gettingstarted/quickstart.ipynb | 2 +- docs/index.rst | 9 + docs/tutorials/nest_accessor.ipynb | 442 +-------------------------- 3 files changed, 23 insertions(+), 430 deletions(-) diff --git a/docs/gettingstarted/quickstart.ipynb b/docs/gettingstarted/quickstart.ipynb index 5832cbd..bf86212 100644 --- a/docs/gettingstarted/quickstart.ipynb +++ b/docs/gettingstarted/quickstart.ipynb @@ -38,7 +38,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dask_nested.datasets import generate_data\n", + "from nested_dask.datasets import generate_data\n", "\n", "# generate_data creates some toy data\n", "ndf = generate_data(10, 100) # 10 rows, 100 nested rows per row\n", diff --git a/docs/index.rst b/docs/index.rst index a3804c9..6b0ab87 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,6 +5,15 @@ Nested-Dask ======================================================================================== +A ![dask](https://www.dask.org/) extension of +![nested-pandas](https://nested-pandas.readthedocs.io/en/latest/). + +Nested-pandas is a pandas extension package that empowers efficient analysis +of nested associated datasets. This package wraps the majority of the +nested-pandas API with Dask, which enables easy parallelization and capacity +for work at scale. + + Dev Guide - Getting Started --------------------------- diff --git a/docs/tutorials/nest_accessor.ipynb b/docs/tutorials/nest_accessor.ipynb index c3e20d8..5d80489 100644 --- a/docs/tutorials/nest_accessor.ipynb +++ b/docs/tutorials/nest_accessor.ipynb @@ -18,62 +18,11 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Dask DataFrame Structure:
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abnested
npartitions=1
0float64float64nested<t: [double], flux: [double], band: [string]>
9.........
\n", - "
Dask Name: repartition, 3 expressions
" - ], - "text/plain": [ - "Dask NestedFrame Structure:\n", - " a b nested\n", - "npartitions=1 \n", - "0 float64 float64 nested\n", - "9 ... ... ...\n", - "Dask Name: repartition, 3 expressions\n", - "Expr=Repartition(frame=MapPartitions(NestedFrame), new_partitions=1)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "from dask_nested.datasets import generate_data\n", + "from nested_dask.datasets import generate_data\n", "\n", "# generate_data creates some toy data\n", "ndf = generate_data(10, 5) # 10 rows, 5 nested rows per row\n", @@ -89,20 +38,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ndf[\"nested\"].nest" ] @@ -116,20 +54,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['t', 'flux', 'band']" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ndf[\"nested\"].nest.fields" ] @@ -145,60 +72,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Dask DataFrame Structure:
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tfluxband
npartitions=1
0double[pyarrow]double[pyarrow]string[pyarrow]
9.........
\n", - "
Dask Name: lambda, 5 expressions
" - ], - "text/plain": [ - "Dask DataFrame Structure:\n", - " t flux band\n", - "npartitions=1 \n", - "0 double[pyarrow] double[pyarrow] string[pyarrow]\n", - "9 ... ... ...\n", - "Dask Name: lambda, 5 expressions\n", - "Expr=MapPartitions(lambda)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flat_nested = ndf[\"nested\"].nest.to_flat()\n", "flat_nested" @@ -206,189 +82,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tfluxband
013.50962988.914048r
08.63494879.231053g
017.1191575.928274r
019.56165346.766275r
013.74689284.034938g
110.56154281.163032g
11.3549723.397109r
19.621975.977237r
10.0730443.562784g
17.65771912.362825g
25.65283346.674879r
24.81303430.925827g
20.80073693.787431g
213.31720236.264783r
23.15947.166825g
34.21437823.836587g
313.175238.680537r
36.57943661.964227g
38.14310925.909146g
31.83967286.719744r
\n", - "
" - ], - "text/plain": [ - " t flux band\n", - "0 13.509629 88.914048 r\n", - "0 8.634948 79.231053 g\n", - "0 17.119157 5.928274 r\n", - "0 19.561653 46.766275 r\n", - "0 13.746892 84.034938 g\n", - "1 10.561542 81.163032 g\n", - "1 1.354972 3.397109 r\n", - "1 9.6219 75.977237 r\n", - "1 0.07304 43.562784 g\n", - "1 7.657719 12.362825 g\n", - "2 5.652833 46.674879 r\n", - "2 4.813034 30.925827 g\n", - "2 0.800736 93.787431 g\n", - "2 13.317202 36.264783 r\n", - "2 3.159 47.166825 g\n", - "3 4.214378 23.836587 g\n", - "3 13.1752 38.680537 r\n", - "3 6.579436 61.964227 g\n", - "3 8.143109 25.909146 g\n", - "3 1.839672 86.719744 r" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "flat_nested.head(20)" ] @@ -404,121 +100,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tfluxband
0[13.5096288 8.63494758 17.11915696 19.561652...[88.91404805 79.23105261 5.92827401 46.766274...['r' 'g' 'r' 'r' 'g']
1[10.5615423 1.35497198 9.62190035 0.073040...[81.16303204 3.39710897 75.97723713 43.562784...['g' 'r' 'r' 'g' 'g']
2[ 5.65283252 4.81303449 0.80073596 13.317201...[46.6748786 30.92582712 93.78743066 36.264783...['r' 'g' 'g' 'r' 'g']
3[ 4.214378 13.17520009 6.57943592 8.143109...[23.83658733 38.68053664 61.96422735 25.909146...['g' 'r' 'g' 'g' 'r']
4[4.31790223 9.2989414 0.19071925 7.55345992 7...[97.32244264 32.95566652 15.17553499 36.674948...['r' 'r' 'g' 'g' 'r']
5[ 0.8441804 17.12893578 5.99104788 16.905202...[ 4.58329024 35.91586029 20.12656116 43.806012...['g' 'g' 'g' 'r' 'r']
6[ 2.17272952 11.47100691 19.78062851 12.968281...[52.14204136 84.87265098 25.26807129 94.230023...['g' 'g' 'r' 'r' 'r']
7[ 7.31546622 14.96282356 15.17099992 15.028434...[41.17012344 89.94693463 84.09613648 53.773103...['g' 'r' 'r' 'r' 'g']
8[ 1.69038072 11.05890727 11.28588246 8.962195...[25.24952237 11.87511229 95.16037222 83.882477...['r' 'r' 'r' 'r' 'g']
9[ 1.20415091 18.31529619 16.92607067 15.473383...[43.42305605 76.56653572 6.47013062 13.418778...['g' 'r' 'r' 'r' 'r']
\n", - "
" - ], - "text/plain": [ - " t ... band\n", - "0 [13.5096288 8.63494758 17.11915696 19.561652... ... ['r' 'g' 'r' 'r' 'g']\n", - "1 [10.5615423 1.35497198 9.62190035 0.073040... ... ['g' 'r' 'r' 'g' 'g']\n", - "2 [ 5.65283252 4.81303449 0.80073596 13.317201... ... ['r' 'g' 'g' 'r' 'g']\n", - "3 [ 4.214378 13.17520009 6.57943592 8.143109... ... ['g' 'r' 'g' 'g' 'r']\n", - "4 [4.31790223 9.2989414 0.19071925 7.55345992 7... ... ['r' 'r' 'g' 'g' 'r']\n", - "5 [ 0.8441804 17.12893578 5.99104788 16.905202... ... ['g' 'g' 'g' 'r' 'r']\n", - "6 [ 2.17272952 11.47100691 19.78062851 12.968281... ... ['g' 'g' 'r' 'r' 'r']\n", - "7 [ 7.31546622 14.96282356 15.17099992 15.028434... ... ['g' 'r' 'r' 'r' 'g']\n", - "8 [ 1.69038072 11.05890727 11.28588246 8.962195... ... ['r' 'r' 'r' 'r' 'g']\n", - "9 [ 1.20415091 18.31529619 16.92607067 15.473383... ... ['g' 'r' 'r' 'r' 'r']\n", - "\n", - "[10 rows x 3 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "list_nested = ndf[\"nested\"].nest.to_lists()\n", "list_nested.compute()"