diff --git a/parquet-demo-short-range-18files_feature_id.ipynb b/parquet-demo-short-range-18files_feature_id.ipynb index c5d75e2..2f7d190 100644 --- a/parquet-demo-short-range-18files_feature_id.ipynb +++ b/parquet-demo-short-range-18files_feature_id.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 10, + "execution_count": 71, "metadata": { "collapsed": true, "pycharm": { @@ -11,38 +11,19 @@ }, "outputs": [], "source": [ - "import pyarrow.parquet as pq\n", "import pandas as pd\n", - "import numpy as np\n", - "import pyarrow as pa\n", - "import dask\n", - "import fsspec\n", - "from datetime import datetime, timedelta\n", "from pyarrow.parquet import ParquetFile\n", - "\n", "import dask.dataframe as dd\n", - "import spark as spark\n", - "from dask.distributed import Client\n", "import os\n", "import xarray as xr\n", - "import glob\n", "\n", - "import spark\n", - "#plot\n", - "import data\n", "import matplotlib.pyplot as plt\n", - "import hvplot\n", - "import hvplot.pandas\n", - "import hvplot.xarray\n", - "\n", - "%matplotlib inline\n", - "from kerchunk.hdf import SingleHdf5ToZarr\n", - "from kerchunk.combine import MultiZarrToZarr\n" + "%matplotlib inline\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 72, "outputs": [ { "name": "stdout", @@ -66,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 73, "outputs": [], "source": [ "# Create list for\n", @@ -99,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 74, "outputs": [], "source": [ "df = pd.Series.to_frame(df)\n", @@ -118,13 +99,13 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 75, "outputs": [ { "data": { - "text/plain": "\n created_by: fastparquet-python version 0.8.3 (build 0)\n num_columns: 4\n num_rows: 49981284\n num_row_groups: 1\n format_version: 1.0\n serialized_size: 1591" + "text/plain": "\n created_by: fastparquet-python version 0.8.3 (build 0)\n num_columns: 4\n num_rows: 49981284\n num_row_groups: 1\n format_version: 1.0\n serialized_size: 1591" }, - "execution_count": 65, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" } @@ -141,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 76, "outputs": [ { "name": "stdout", @@ -158,7 +139,7 @@ "text/plain": " streamflow\ntime reference_time feature_id \n2022-09-11 01:00:00 2022-09-11 101 0.18\n 179 0.01\n 181 0.01\n 183 0.01\n 185 0.01\n... ...\n2022-09-11 18:00:00 2022-09-11 1180001800 0.00\n 1180001801 0.00\n 1180001802 0.00\n 1180001803 0.00\n 1180001804 0.00\n\n[49981284 rows x 1 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
streamflow
timereference_timefeature_id
2022-09-11 01:00:002022-09-111010.18
1790.01
1810.01
1830.01
1850.01
............
2022-09-11 18:00:002022-09-1111800018000.00
11800018010.00
11800018020.00
11800018030.00
11800018040.00
\n

49981284 rows × 1 columns

\n
" }, - "execution_count": 69, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } @@ -186,22 +167,30 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 77, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: total: 78.1 ms\n", - "Wall time: 71.1 ms\n", "CPU times: total: 62.5 ms\n", - "Wall time: 67 ms\n", - "CPU times: total: 31.2 ms\n", - "Wall time: 25 ms\n", + "Wall time: 73 ms\n", + "CPU times: total: 78.1 ms\n", + "Wall time: 68 ms\n", + "CPU times: total: 46.9 ms\n", + "Wall time: 53 ms\n", "CPU times: total: 31.2 ms\n", - "Wall time: 36 ms\n" + "Wall time: 37 ms\n" ] }, + { + "data": { + "text/plain": "" + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + }, { "data": { "text/plain": "
", @@ -229,8 +218,8 @@ "#result\n", "%time result.plot(kind=\"line\", label=\"feature_id=101\")\n", "%time result1.plot(kind=\"bar\", label=\"feature_id=987\")\n", - "plt.legend()\n", - "plt.show()" + "#plt.legend()\n", + "#plt.show()" ], "metadata": { "collapsed": false,