Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
arpita0911patel authored Sep 21, 2022
1 parent 406f84e commit 8bbcaee
Showing 1 changed file with 27 additions and 38 deletions.
65 changes: 27 additions & 38 deletions parquet-demo-short-range-18files_feature_id.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 71,
"metadata": {
"collapsed": true,
"pycharm": {
Expand All @@ -11,38 +11,19 @@
},
"outputs": [],
"source": [
"import pyarrow.parquet as pq\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pyarrow as pa\n",
"import dask\n",
"import fsspec\n",
"from datetime import datetime, timedelta\n",
"from pyarrow.parquet import ParquetFile\n",
"\n",
"import dask.dataframe as dd\n",
"import spark as spark\n",
"from dask.distributed import Client\n",
"import os\n",
"import xarray as xr\n",
"import glob\n",
"\n",
"import spark\n",
"#plot\n",
"import data\n",
"import matplotlib.pyplot as plt\n",
"import hvplot\n",
"import hvplot.pandas\n",
"import hvplot.xarray\n",
"\n",
"%matplotlib inline\n",
"from kerchunk.hdf import SingleHdf5ToZarr\n",
"from kerchunk.combine import MultiZarrToZarr\n"
"%matplotlib inline\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 72,
"outputs": [
{
"name": "stdout",
Expand All @@ -66,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 73,
"outputs": [],
"source": [
"# Create list for\n",
Expand Down Expand Up @@ -99,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 74,
"outputs": [],
"source": [
"df = pd.Series.to_frame(df)\n",
Expand All @@ -118,13 +99,13 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 75,
"outputs": [
{
"data": {
"text/plain": "<pyarrow._parquet.FileMetaData object at 0x000001AA2019A070>\n created_by: fastparquet-python version 0.8.3 (build 0)\n num_columns: 4\n num_rows: 49981284\n num_row_groups: 1\n format_version: 1.0\n serialized_size: 1591"
"text/plain": "<pyarrow._parquet.FileMetaData object at 0x000001AA2CC0E700>\n created_by: fastparquet-python version 0.8.3 (build 0)\n num_columns: 4\n num_rows: 49981284\n num_row_groups: 1\n format_version: 1.0\n serialized_size: 1591"
},
"execution_count": 65,
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -141,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 76,
"outputs": [
{
"name": "stdout",
Expand All @@ -158,7 +139,7 @@
"text/plain": " streamflow\ntime reference_time feature_id \n2022-09-11 01:00:00 2022-09-11 101 0.18\n 179 0.01\n 181 0.01\n 183 0.01\n 185 0.01\n... ...\n2022-09-11 18:00:00 2022-09-11 1180001800 0.00\n 1180001801 0.00\n 1180001802 0.00\n 1180001803 0.00\n 1180001804 0.00\n\n[49981284 rows x 1 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th></th>\n <th>streamflow</th>\n </tr>\n <tr>\n <th>time</th>\n <th>reference_time</th>\n <th>feature_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"5\" valign=\"top\">2022-09-11 01:00:00</th>\n <th rowspan=\"5\" valign=\"top\">2022-09-11</th>\n <th>101</th>\n <td>0.18</td>\n </tr>\n <tr>\n <th>179</th>\n <td>0.01</td>\n </tr>\n <tr>\n <th>181</th>\n <td>0.01</td>\n </tr>\n <tr>\n <th>183</th>\n <td>0.01</td>\n </tr>\n <tr>\n <th>185</th>\n <td>0.01</td>\n </tr>\n <tr>\n <th>...</th>\n <th>...</th>\n <th>...</th>\n <td>...</td>\n </tr>\n <tr>\n <th rowspan=\"5\" valign=\"top\">2022-09-11 18:00:00</th>\n <th rowspan=\"5\" valign=\"top\">2022-09-11</th>\n <th>1180001800</th>\n <td>0.00</td>\n </tr>\n <tr>\n <th>1180001801</th>\n <td>0.00</td>\n </tr>\n <tr>\n <th>1180001802</th>\n <td>0.00</td>\n </tr>\n <tr>\n <th>1180001803</th>\n <td>0.00</td>\n </tr>\n <tr>\n <th>1180001804</th>\n <td>0.00</td>\n </tr>\n </tbody>\n</table>\n<p>49981284 rows × 1 columns</p>\n</div>"
},
"execution_count": 69,
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -186,22 +167,30 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 77,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 78.1 ms\n",
"Wall time: 71.1 ms\n",
"CPU times: total: 62.5 ms\n",
"Wall time: 67 ms\n",
"CPU times: total: 31.2 ms\n",
"Wall time: 25 ms\n",
"Wall time: 73 ms\n",
"CPU times: total: 78.1 ms\n",
"Wall time: 68 ms\n",
"CPU times: total: 46.9 ms\n",
"Wall time: 53 ms\n",
"CPU times: total: 31.2 ms\n",
"Wall time: 36 ms\n"
"Wall time: 37 ms\n"
]
},
{
"data": {
"text/plain": "<AxesSubplot:xlabel='time,reference_time'>"
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "<Figure size 640x480 with 1 Axes>",
Expand Down Expand Up @@ -229,8 +218,8 @@
"#result\n",
"%time result.plot(kind=\"line\", label=\"feature_id=101\")\n",
"%time result1.plot(kind=\"bar\", label=\"feature_id=987\")\n",
"plt.legend()\n",
"plt.show()"
"#plt.legend()\n",
"#plt.show()"
],
"metadata": {
"collapsed": false,
Expand Down

0 comments on commit 8bbcaee

Please sign in to comment.