From 4a3b256b65ac3652a3d7aadc248d4bae2d0a380c Mon Sep 17 00:00:00 2001 From: MatevzVremec Date: Mon, 18 Mar 2024 12:36:52 +0100 Subject: [PATCH] Added computation time to Notebook 10. --- docs/examples/10_example_paper.ipynb | 359 ++++++++++++++++++++++++++- pyet/version.py | 2 +- 2 files changed, 357 insertions(+), 4 deletions(-) diff --git a/docs/examples/10_example_paper.ipynb b/docs/examples/10_example_paper.ipynb index 15a029d..ab1b903 100644 --- a/docs/examples/10_example_paper.ipynb +++ b/docs/examples/10_example_paper.ipynb @@ -122,7 +122,7 @@ "#edit default values for plots\n", "params = {\n", " \"legend.fontsize\": \"15\",\n", - " \"axes.labelsize\": \"16\",\n", + " \"axes.labelsize\": \"15\",\n", " \"xtick.labelsize\": \"15\",\n", " \"ytick.labelsize\": \"15\",\n", " \"xtick.direction\": \"in\",\n", @@ -134,6 +134,7 @@ "cell_type": "markdown", "id": "dc0ebed8", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -781,6 +782,7 @@ "cell_type": "markdown", "id": "38a45751", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -880,6 +882,7 @@ "cell_type": "markdown", "id": "bf7f5fd0", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -892,7 +895,11 @@ "cell_type": "code", "execution_count": null, "id": "d31888e0", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "outputs": [], "source": [ "# Load E-OBS data\n", @@ -1016,6 +1023,7 @@ "cell_type": "markdown", "id": "84918f2b", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1295,6 +1303,7 @@ "cell_type": "markdown", "id": "6f46ea03", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -1497,7 +1506,9 @@ { "cell_type": "markdown", "id": "abfb2272", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## 7. Code example in paper" ] @@ -1597,6 +1608,348 @@ "pe_hargreaves = pyet.hargreaves(tmean, tmax, tmin, lat=lat)" ] }, + { + "cell_type": "markdown", + "id": "7ea54c53-23d9-446a-ba49-b954d10f15f6", + "metadata": {}, + "source": [ + "# 8. Computation time\n", + "\n", + "Comparison of the computational time and time series length and xarray grid size for different *pyet* PET modelds. The plot shows (a) the mean computational time and 95 % confidence interval for each model against the time series length, represented by a line and corresponding colored region, respectively. (b) the mean computational time and 95 % confidence interval for each model against the grid size of the xarray with 30 years time series length, represented by a line and corresponding colored region, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5af6625-88d4-448c-9611-0ca14559ab63", + "metadata": {}, + "outputs": [], + "source": [ + "# Reading meteorological data\n", + "df_10_days = pd.read_csv(\"data//example_10//10_example_meteo.csv\", index_col=0, parse_dates=True).iloc[:10, :-1]\n", + "\n", + "# Determining the necessary input data\n", + "lat = 0.91 # define latitude [radians]\n", + "elevation = 4 # define elevation [meters above sea-level]\n", + "\n", + "# Function to replicate DataFrame to N days using the same values, maintaining a continuous date index\n", + "def extend_data(df, n_days):\n", + " n_replications = n_days // len(df) # Calculate how many times to replicate the DataFrame\n", + " extended_data = np.tile(df.values, (n_replications, 1)) # Replicate the data values\n", + " start_date = df.index[0]\n", + " end_date = start_date + pd.Timedelta(days=n_days - 1)\n", + " new_index = pd.date_range(start=start_date, end=end_date, freq='D') # New continuous date index\n", + " extended_df = pd.DataFrame(extended_data[:len(new_index)], index=new_index, columns=df.columns) # DataFrame with new index\n", + " return extended_df\n", + "\n", + "# Extending the DataFrame to 100, 1000, and 10000 days...\n", + "dfs_time = {}\n", + "dfs_time[\"10\"] = df_10_days\n", + "dfs_time[\"100\"] = extend_data(df_10_days, 100)\n", + "dfs_time[\"1000\"] = extend_data(df_10_days, 1000)\n", + "dfs_time[\"10000\"] = extend_data(df_10_days, 10000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "594a1268-5fb9-41c1-b2d9-93d2ff7a9f1a", + "metadata": {}, + "outputs": [], + "source": [ + "import time \n", + "\n", + "# Compute times\n", + "def compute_pet_and_time(method, length):\n", + " # Determining the necessary input data\n", + " tmean, tmax, tmin, rh, rs, wind = (dfs_time[length][col] for col in df_10_days.columns)\n", + " start_time = time.time()\n", + " if method == \"pm\":\n", + " pet = pyet.pm(tmean, wind, rs=rs, tmax=tmax, tmin=tmin, rh=rh, lat=lat, elevation=elevation)\n", + " elif method == \"oudin\":\n", + " pet = pyet.oudin(tmean, lat)\n", + " elif method == \"hamon\":\n", + " pet = pyet.hamon(tmean, lat)\n", + " elif method == \"hargreaves\":\n", + " pet = pyet.hargreaves(tmean, tmax, tmin, lat)\n", + " elif method == \"priestley_taylor\":\n", + " pet = pyet.priestley_taylor(tmean, rs=rs, tmax=tmax, tmin=tmin, lat=lat, elevation=elevation, rh=rh)\n", + " elif method == \"romanenko\":\n", + " pet = pyet.romanenko(tmean, rh)\n", + " elif method == \"makkink\":\n", + " pet = pyet.makkink(tmean, rs, elevation=elevation)\n", + " elif method == \"jensen_haise\":\n", + " pet = pyet.jensen_haise(tmean, rs=rs, lat=lat)\n", + " elif method == \"turc\":\n", + " pet = pyet.turc(tmean, rs, rh)\n", + " else:\n", + " raise ValueError(\"Invalid method\")\n", + " end_time = time.time()\n", + " return end_time - start_time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1645bb32-b525-49ac-b677-b8d43348798b", + "metadata": {}, + "outputs": [], + "source": [ + "# Define dictionary to store the computed times\n", + "mean_times = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n", + " \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}\n", + "times_25= {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n", + " \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}\n", + "times_975 = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n", + " \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab9a5c5c-3401-4262-86cf-3af8eb5d0ef9", + "metadata": {}, + "outputs": [], + "source": [ + "# Loop over different time series lengths and methods\n", + "lengths = [10, 100, 1000, 10000] \n", + "for length in lengths:\n", + " for method in [\"pm\", \"oudin\", \"hamon\", \"hargreaves\", \"priestley_taylor\", \"romanenko\", \"makkink\", \"jensen_haise\", \"turc\"]:\n", + " times_100 = []\n", + " for i in range(100):\n", + " times_100.append(compute_pet_and_time(method, str(length)))\n", + " mean_times[method].append(np.mean(times_100))\n", + " #times_25[method].append(np.quantile(times_100, 0.025))\n", + " #times_975[method].append(np.quantile(times_100, 0.975))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f55f0f56-4185-4268-9913-7f8f0935b5d9", + "metadata": {}, + "outputs": [], + "source": [ + "# Load xarray dataset\n", + "wind = xr.open_dataset(\"data/example_10/fg_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\")[\"fg\"]\n", + "tmax = xr.open_dataset(\"data/example_10/tx_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n", + " latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tx\"]\n", + "tmin = xr.open_dataset(\"data/example_10/tn_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n", + " latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tn\"]\n", + "tmean = xr.open_dataset(\"data/example_10/tg_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n", + " latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tg\"]\n", + "rs = xr.open_dataset(\"data/example_10/qq_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(lon=slice(wind.longitude.min(), wind.longitude.max()), \n", + " lat=slice(wind.latitude.min(), wind.latitude.max()))\n", + "rs = rs.rename_dims({\"lon\":\"longitude\", \n", + " \"lat\":\"latitude\"}).rename({\"lon\":\"longitude\", \n", + " \"lat\":\"latitude\"}).sel(ensemble=10)[\"qq\"] * 86400 / 1000000 # concert to [MJ/m2day]\n", + "rh = xr.open_dataset(\"data/example_10/hu_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(lon=slice(wind.longitude.min(), wind.longitude.max()), \n", + " lat=slice(wind.latitude.min(), wind.latitude.max()))\n", + "rh = rh.rename_dims({\"lon\":\"longitude\", \"lat\":\"latitude\"}).rename({\"lon\":\"longitude\", \"lat\":\"latitude\"})[\"hu\"]\n", + "wind = wind.sel(longitude=slice(rh.longitude.min(), rh.longitude.max()), latitude=slice(rh.latitude.min(), rh.latitude.max()))\n", + "elevation = xr.open_dataset(\"data/example_10/elev_ens_0.25deg_reg_v25.0e.nc\", \n", + " engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n", + " latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"elevation\"].fillna(0)\n", + "lat = tmean.latitude * np.pi / 180\n", + "lat = lat.expand_dims(dim={\"longitude\":tmean.longitude}, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "889660f3-3569-4af4-a83c-ecd887732b47", + "metadata": {}, + "outputs": [], + "source": [ + "# Combine into a single dataset\n", + "combined_ds = xr.Dataset({\"wind\": wind,\"tmax\": tmax, \"tmin\": tmin, \"tmean\":tmean,\n", + " 'rs': rs, 'rh': rh,'elevation': elevation, \"lat\":lat}).isel(latitude=slice(50, 60), longitude=slice(100, 110))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5c9fcb2-8a33-4a32-8561-756b403ef671", + "metadata": {}, + "outputs": [], + "source": [ + "new_time = pd.date_range('2018-06-06', '2019-06-06')\n", + "meteo_100 = combined_ds.reindex(time=new_time, method='ffill').rio.write_crs(\"EPSG:4326\")\n", + "meteo_10 = meteo_100.isel(latitude=slice(0, 2), longitude=slice(0, 5))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "061e2ce6-c915-4bb5-a780-818724388689", + "metadata": {}, + "outputs": [], + "source": [ + "# Print resolution\n", + "print(abs(meteo_100.latitude.diff(\"latitude\").mean().values))\n", + "print(abs(meteo_100.longitude.diff(\"longitude\").mean().values))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee868a38-32bc-4145-8f3d-3ea2046f4b7c", + "metadata": {}, + "outputs": [], + "source": [ + "# create xarray.datarray with 100x100, 1000x1000\n", + "meteo_10000 = meteo_100.rio.reproject(dst_crs=meteo_100.rio.crs, resolution=0.025).rename({'x': 'longitude', 'y': 'latitude'})\n", + "meteo_1000 = meteo_10000.isel(latitude=slice(0, 20), longitude=slice(0, 50))\n", + "meteo_1000000 = meteo_100.rio.reproject(dst_crs=meteo_100.rio.crs, resolution=0.0025).rename({'x': 'longitude', 'y': 'latitude'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8e9bb5b-d449-49a3-b540-d36d2033d4f2", + "metadata": {}, + "outputs": [], + "source": [ + "meteo_100000 = meteo_1000000.isel(latitude=slice(0, 200), longitude=slice(0, 500))\n", + "meteo_10000 = meteo_1000000.isel(latitude=slice(0, 20), longitude=slice(0, 500))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ac09e2b-0eb3-4a4e-9b58-c8d02fea2e26", + "metadata": {}, + "outputs": [], + "source": [ + "# Create dictionary\n", + "meteo_xr_dic = {\"10\":meteo_10, \"100\":meteo_100, \"1000\":meteo_1000, \"10000\":meteo_10000, \"100000\":meteo_100000, \"1000000\":meteo_1000000}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc1e362d-59bb-4310-acd9-9c6f6a502078", + "metadata": {}, + "outputs": [], + "source": [ + "# Compute times\n", + "def compute_pet_and_time_xr(method, length):\n", + " # Determining the necessary input data\n", + " wind, tmax, tmin, tmean, rs, rh, elevation, lat = (meteo_xr_dic[str(length)][col] for col in meteo_10.data_vars)\n", + " start_time = time.time()\n", + " if method == \"pm\":\n", + " #print(rh.min(), length)\n", + " pet = pyet.pm(tmean, wind, rs=rs, tmax=tmax, tmin=tmin, rh=rh, lat=lat, elevation=elevation)\n", + " elif method == \"oudin\":\n", + " pet = pyet.oudin(tmean, lat)\n", + " elif method == \"hamon\":\n", + " pet = pyet.hamon(tmean, lat)\n", + " elif method == \"hargreaves\":\n", + " pet = pyet.hargreaves(tmean, tmax, tmin, lat)\n", + " elif method == \"priestley_taylor\":\n", + " pet = pyet.priestley_taylor(tmean, rs=rs, tmax=tmax, tmin=tmin, lat=lat, elevation=elevation, rh=rh)\n", + " elif method == \"romanenko\":\n", + " pet = pyet.romanenko(tmean, rh)\n", + " elif method == \"makkink\":\n", + " pet = pyet.makkink(tmean, rs, elevation=elevation)\n", + " elif method == \"jensen_haise\":\n", + " pet = pyet.jensen_haise(tmean, rs=rs, lat=lat)\n", + " elif method == \"turc\":\n", + " pet = pyet.turc(tmean, rs, rh)\n", + " else:\n", + " raise ValueError(\"Invalid method\")\n", + " end_time = time.time()\n", + " return end_time - start_time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "291952b3-1afb-4982-b53d-590ac9337cde", + "metadata": {}, + "outputs": [], + "source": [ + "# Define dictionary to store the computed times\n", + "mean_times_xr = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n", + " \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "835bd1f9-0173-48f9-9b06-6161e84e652e", + "metadata": {}, + "outputs": [], + "source": [ + "# Loop over different time series lengths and methods\n", + "lengths = [10, 100, 1000, 10000, 100000] \n", + "for length in lengths:\n", + " for method in [\"pm\", \"oudin\", \"hamon\", \"hargreaves\", \"priestley_taylor\", \"romanenko\", \"makkink\", \"jensen_haise\", \"turc\"]:\n", + " times_100 = []\n", + " for i in range(10):\n", + " times_100.append(compute_pet_and_time_xr(method, str(length)))\n", + " mean_times_xr[method].append(np.mean(times_100))\n", + " #times_25[method].append(np.quantile(times_100, 0.025))\n", + " #times_975[method].append(np.quantile(times_100, 0.975))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e6d66ea-3a6d-445c-aab0-075bf31c0ea3", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Plot figure\n", + "cmap_colrs = cm.get_cmap('Paired', len(mean_times.keys()))\n", + "colors = [cmap_colrs(i) for i in range(0, len(mean_times.keys()))]\n", + "\n", + "fig, axs = plt.subplots(2, 1, figsize=(6,5), layout=\"constrained\")\n", + "for method, color in zip(mean_times.keys(), colors):\n", + " axs[0].plot(mean_times[method], label=method, color=color)\n", + " #axs[0].fill_between(np.arange(0, len(lengths)), times_25[method], times_975[method], alpha=0.1)\n", + "for method, color in zip(mean_times_xr.keys(), colors):\n", + " axs[1].plot(mean_times_xr[method], label=method, color=color)\n", + "\n", + "axs[0].set_xlabel(\"Time series length [days]\", size=12)\n", + "for i in (0,1):\n", + " axs[i].set_ylabel(\"\")\n", + " axs[i].set_yscale(\"log\")\n", + " \n", + "axs[0].set_xticks(np.arange(0, 4))\n", + "axs[0].set_xticklabels([\"10$^1$\", \"10$^2$\", \"10$^3$\", \"10$^4$\"]);\n", + "axs[0].set_xlim(-0.1, 3.1)\n", + "\n", + "axs[1].set_xticks(np.arange(0, 5))\n", + "axs[1].set_xlim(-0.1, 4.1)\n", + "axs[1].set_xticklabels([\"10$^1$\", \"10$^2$\", \"10$^3$\", \"10$^4$\", \"10$^5$\"]);\n", + "\n", + "fig.text(-0.04, 0.5, 'Computation time [s]', va='center', rotation='vertical', fontsize=14)\n", + "axs[1].set_xlabel(\"Cell number [n]\", size=12)\n", + "\n", + "axs[0].legend(bbox_to_anchor=(0.5, 1.55), fontsize=11, ncol=3, loc=\"upper center\");\n", + "plt.subplots_adjust(left=0.15, top=0.85);\n", + "axs[0].text(0.02, 0.87, \"(a)\", fontsize=13, transform=axs[0].transAxes)\n", + "axs[1].text(0.02, 0.87, \"(b)\", fontsize=13, transform=axs[1].transAxes)\n", + "fig.savefig(\"figure6.png\", dpi=600, bbox_inches=\"tight\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2e9318e-61ca-4034-b4f8-7b030b860576", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "d43d1f5e-0bbc-43f9-ac94-6f952278245e", diff --git a/pyet/version.py b/pyet/version.py index ff5fea7..ed0847b 100644 --- a/pyet/version.py +++ b/pyet/version.py @@ -1,3 +1,3 @@ # This is the only location where the version will be written and changed. # Based on https://packaging.python.org/single_source_version/ -__version__ = "1.3.1" +__version__ = "1.3.2"