From 4a3b256b65ac3652a3d7aadc248d4bae2d0a380c Mon Sep 17 00:00:00 2001
From: MatevzVremec <matevz.vremec@gmail.com>
Date: Mon, 18 Mar 2024 12:36:52 +0100
Subject: [PATCH] Added computation time to Notebook 10.

---
 docs/examples/10_example_paper.ipynb | 359 ++++++++++++++++++++++++++-
 pyet/version.py                      |   2 +-
 2 files changed, 357 insertions(+), 4 deletions(-)

diff --git a/docs/examples/10_example_paper.ipynb b/docs/examples/10_example_paper.ipynb
index 15a029d..ab1b903 100644
--- a/docs/examples/10_example_paper.ipynb
+++ b/docs/examples/10_example_paper.ipynb
@@ -122,7 +122,7 @@
     "#edit default values for plots\n",
     "params = {\n",
     "    \"legend.fontsize\": \"15\",\n",
-    "    \"axes.labelsize\": \"16\",\n",
+    "    \"axes.labelsize\": \"15\",\n",
     "    \"xtick.labelsize\": \"15\",\n",
     "    \"ytick.labelsize\": \"15\",\n",
     "    \"xtick.direction\": \"in\",\n",
@@ -134,6 +134,7 @@
    "cell_type": "markdown",
    "id": "dc0ebed8",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -781,6 +782,7 @@
    "cell_type": "markdown",
    "id": "38a45751",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -880,6 +882,7 @@
    "cell_type": "markdown",
    "id": "bf7f5fd0",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -892,7 +895,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d31888e0",
-   "metadata": {},
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
    "outputs": [],
    "source": [
     "# Load E-OBS data\n",
@@ -1016,6 +1023,7 @@
    "cell_type": "markdown",
    "id": "84918f2b",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -1295,6 +1303,7 @@
    "cell_type": "markdown",
    "id": "6f46ea03",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -1497,7 +1506,9 @@
   {
    "cell_type": "markdown",
    "id": "abfb2272",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
    "source": [
     "## 7. Code example in paper"
    ]
@@ -1597,6 +1608,348 @@
     "pe_hargreaves = pyet.hargreaves(tmean, tmax, tmin, lat=lat)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "7ea54c53-23d9-446a-ba49-b954d10f15f6",
+   "metadata": {},
+   "source": [
+    "# 8. Computation time\n",
+    "\n",
+    "Comparison of the computational time and time series length  and xarray grid size for different *pyet* PET modelds. The plot shows (a) the mean computational time and 95 % confidence interval for each model against the time series length, represented by a line and corresponding colored region, respectively. (b) the mean computational time and 95 % confidence interval for each model against the grid size of the xarray with 30 years time series length, represented by a line and corresponding colored region, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5af6625-88d4-448c-9611-0ca14559ab63",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reading meteorological data\n",
+    "df_10_days = pd.read_csv(\"data//example_10//10_example_meteo.csv\", index_col=0, parse_dates=True).iloc[:10, :-1]\n",
+    "\n",
+    "# Determining the necessary input data\n",
+    "lat = 0.91  # define latitude [radians]\n",
+    "elevation = 4  # define elevation [meters above sea-level]\n",
+    "\n",
+    "# Function to replicate DataFrame to N days using the same values, maintaining a continuous date index\n",
+    "def extend_data(df, n_days):\n",
+    "    n_replications = n_days // len(df)  # Calculate how many times to replicate the DataFrame\n",
+    "    extended_data = np.tile(df.values, (n_replications, 1))  # Replicate the data values\n",
+    "    start_date = df.index[0]\n",
+    "    end_date = start_date + pd.Timedelta(days=n_days - 1)\n",
+    "    new_index = pd.date_range(start=start_date, end=end_date, freq='D')  # New continuous date index\n",
+    "    extended_df = pd.DataFrame(extended_data[:len(new_index)], index=new_index, columns=df.columns)  # DataFrame with new index\n",
+    "    return extended_df\n",
+    "\n",
+    "# Extending the DataFrame to 100, 1000, and 10000 days...\n",
+    "dfs_time = {}\n",
+    "dfs_time[\"10\"] = df_10_days\n",
+    "dfs_time[\"100\"] = extend_data(df_10_days, 100)\n",
+    "dfs_time[\"1000\"] = extend_data(df_10_days, 1000)\n",
+    "dfs_time[\"10000\"] = extend_data(df_10_days, 10000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "594a1268-5fb9-41c1-b2d9-93d2ff7a9f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time \n",
+    "\n",
+    "# Compute times\n",
+    "def compute_pet_and_time(method, length):\n",
+    "    # Determining the necessary input data\n",
+    "    tmean, tmax, tmin, rh, rs, wind = (dfs_time[length][col] for col in df_10_days.columns)\n",
+    "    start_time = time.time()\n",
+    "    if method == \"pm\":\n",
+    "        pet = pyet.pm(tmean, wind, rs=rs, tmax=tmax, tmin=tmin, rh=rh, lat=lat, elevation=elevation)\n",
+    "    elif method == \"oudin\":\n",
+    "        pet = pyet.oudin(tmean, lat)\n",
+    "    elif method == \"hamon\":\n",
+    "        pet = pyet.hamon(tmean, lat)\n",
+    "    elif method == \"hargreaves\":\n",
+    "        pet = pyet.hargreaves(tmean, tmax, tmin, lat)\n",
+    "    elif method == \"priestley_taylor\":\n",
+    "        pet = pyet.priestley_taylor(tmean, rs=rs, tmax=tmax, tmin=tmin, lat=lat, elevation=elevation, rh=rh)\n",
+    "    elif method == \"romanenko\":\n",
+    "        pet = pyet.romanenko(tmean, rh)\n",
+    "    elif method == \"makkink\":\n",
+    "        pet = pyet.makkink(tmean, rs, elevation=elevation)\n",
+    "    elif method == \"jensen_haise\":\n",
+    "        pet = pyet.jensen_haise(tmean, rs=rs, lat=lat)\n",
+    "    elif method == \"turc\":\n",
+    "        pet = pyet.turc(tmean, rs, rh)\n",
+    "    else:\n",
+    "        raise ValueError(\"Invalid method\")\n",
+    "    end_time = time.time()\n",
+    "    return end_time - start_time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1645bb32-b525-49ac-b677-b8d43348798b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define dictionary to store the computed times\n",
+    "mean_times = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n",
+    "         \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}\n",
+    "times_25= {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n",
+    "         \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}\n",
+    "times_975 = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n",
+    "         \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab9a5c5c-3401-4262-86cf-3af8eb5d0ef9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loop over different time series lengths and methods\n",
+    "lengths = [10, 100, 1000, 10000] \n",
+    "for length in lengths:\n",
+    "    for method in [\"pm\", \"oudin\", \"hamon\", \"hargreaves\", \"priestley_taylor\", \"romanenko\", \"makkink\", \"jensen_haise\", \"turc\"]:\n",
+    "        times_100 = []\n",
+    "        for i in range(100):\n",
+    "            times_100.append(compute_pet_and_time(method, str(length)))\n",
+    "        mean_times[method].append(np.mean(times_100))\n",
+    "        #times_25[method].append(np.quantile(times_100, 0.025))\n",
+    "        #times_975[method].append(np.quantile(times_100, 0.975))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f55f0f56-4185-4268-9913-7f8f0935b5d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load xarray dataset\n",
+    "wind = xr.open_dataset(\"data/example_10/fg_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                       engine=\"netcdf4\")[\"fg\"]\n",
+    "tmax = xr.open_dataset(\"data/example_10/tx_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                       engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                             latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tx\"]\n",
+    "tmin = xr.open_dataset(\"data/example_10/tn_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                       engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                             latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tn\"]\n",
+    "tmean = xr.open_dataset(\"data/example_10/tg_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                        engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                              latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"tg\"]\n",
+    "rs = xr.open_dataset(\"data/example_10/qq_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                     engine=\"netcdf4\").sel(lon=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                           lat=slice(wind.latitude.min(), wind.latitude.max()))\n",
+    "rs = rs.rename_dims({\"lon\":\"longitude\", \n",
+    "                     \"lat\":\"latitude\"}).rename({\"lon\":\"longitude\", \n",
+    "                                                \"lat\":\"latitude\"}).sel(ensemble=10)[\"qq\"] * 86400 / 1000000  # concert to [MJ/m2day]\n",
+    "rh = xr.open_dataset(\"data/example_10/hu_ens_mean_0.25deg_reg_2018_v25.0e.nc\", \n",
+    "                     engine=\"netcdf4\").sel(lon=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                           lat=slice(wind.latitude.min(), wind.latitude.max()))\n",
+    "rh = rh.rename_dims({\"lon\":\"longitude\", \"lat\":\"latitude\"}).rename({\"lon\":\"longitude\", \"lat\":\"latitude\"})[\"hu\"]\n",
+    "wind = wind.sel(longitude=slice(rh.longitude.min(), rh.longitude.max()), latitude=slice(rh.latitude.min(), rh.latitude.max()))\n",
+    "elevation =  xr.open_dataset(\"data/example_10/elev_ens_0.25deg_reg_v25.0e.nc\", \n",
+    "                     engine=\"netcdf4\").sel(longitude=slice(wind.longitude.min(), wind.longitude.max()), \n",
+    "                                           latitude=slice(wind.latitude.min(), wind.latitude.max()))[\"elevation\"].fillna(0)\n",
+    "lat = tmean.latitude * np.pi / 180\n",
+    "lat = lat.expand_dims(dim={\"longitude\":tmean.longitude}, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "889660f3-3569-4af4-a83c-ecd887732b47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Combine into a single dataset\n",
+    "combined_ds = xr.Dataset({\"wind\": wind,\"tmax\": tmax, \"tmin\": tmin, \"tmean\":tmean,\n",
+    "                          'rs': rs, 'rh': rh,'elevation': elevation, \"lat\":lat}).isel(latitude=slice(50, 60), longitude=slice(100, 110))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5c9fcb2-8a33-4a32-8561-756b403ef671",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_time = pd.date_range('2018-06-06', '2019-06-06')\n",
+    "meteo_100 = combined_ds.reindex(time=new_time, method='ffill').rio.write_crs(\"EPSG:4326\")\n",
+    "meteo_10 = meteo_100.isel(latitude=slice(0, 2), longitude=slice(0, 5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "061e2ce6-c915-4bb5-a780-818724388689",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Print resolution\n",
+    "print(abs(meteo_100.latitude.diff(\"latitude\").mean().values))\n",
+    "print(abs(meteo_100.longitude.diff(\"longitude\").mean().values))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee868a38-32bc-4145-8f3d-3ea2046f4b7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create xarray.datarray with 100x100, 1000x1000\n",
+    "meteo_10000 = meteo_100.rio.reproject(dst_crs=meteo_100.rio.crs, resolution=0.025).rename({'x': 'longitude', 'y': 'latitude'})\n",
+    "meteo_1000 = meteo_10000.isel(latitude=slice(0, 20), longitude=slice(0, 50))\n",
+    "meteo_1000000 = meteo_100.rio.reproject(dst_crs=meteo_100.rio.crs, resolution=0.0025).rename({'x': 'longitude', 'y': 'latitude'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8e9bb5b-d449-49a3-b540-d36d2033d4f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "meteo_100000 = meteo_1000000.isel(latitude=slice(0, 200), longitude=slice(0, 500))\n",
+    "meteo_10000 = meteo_1000000.isel(latitude=slice(0, 20), longitude=slice(0, 500))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ac09e2b-0eb3-4a4e-9b58-c8d02fea2e26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create dictionary\n",
+    "meteo_xr_dic = {\"10\":meteo_10, \"100\":meteo_100, \"1000\":meteo_1000, \"10000\":meteo_10000, \"100000\":meteo_100000, \"1000000\":meteo_1000000}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc1e362d-59bb-4310-acd9-9c6f6a502078",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compute times\n",
+    "def compute_pet_and_time_xr(method, length):\n",
+    "    # Determining the necessary input data\n",
+    "    wind, tmax, tmin, tmean, rs, rh, elevation, lat = (meteo_xr_dic[str(length)][col] for col in meteo_10.data_vars)\n",
+    "    start_time = time.time()\n",
+    "    if method == \"pm\":\n",
+    "        #print(rh.min(), length)\n",
+    "        pet = pyet.pm(tmean, wind, rs=rs, tmax=tmax, tmin=tmin, rh=rh, lat=lat, elevation=elevation)\n",
+    "    elif method == \"oudin\":\n",
+    "        pet = pyet.oudin(tmean, lat)\n",
+    "    elif method == \"hamon\":\n",
+    "        pet = pyet.hamon(tmean, lat)\n",
+    "    elif method == \"hargreaves\":\n",
+    "        pet = pyet.hargreaves(tmean, tmax, tmin, lat)\n",
+    "    elif method == \"priestley_taylor\":\n",
+    "        pet = pyet.priestley_taylor(tmean, rs=rs, tmax=tmax, tmin=tmin, lat=lat, elevation=elevation, rh=rh)\n",
+    "    elif method == \"romanenko\":\n",
+    "        pet = pyet.romanenko(tmean, rh)\n",
+    "    elif method == \"makkink\":\n",
+    "        pet = pyet.makkink(tmean, rs, elevation=elevation)\n",
+    "    elif method == \"jensen_haise\":\n",
+    "        pet = pyet.jensen_haise(tmean, rs=rs, lat=lat)\n",
+    "    elif method == \"turc\":\n",
+    "        pet = pyet.turc(tmean, rs, rh)\n",
+    "    else:\n",
+    "        raise ValueError(\"Invalid method\")\n",
+    "    end_time = time.time()\n",
+    "    return end_time - start_time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "291952b3-1afb-4982-b53d-590ac9337cde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define dictionary to store the computed times\n",
+    "mean_times_xr = {\"pm\": [], \"oudin\": [], \"hamon\": [], \"hargreaves\": [], \"priestley_taylor\": [], \n",
+    "         \"romanenko\": [], \"makkink\": [], \"jensen_haise\": [], \"turc\": []}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "835bd1f9-0173-48f9-9b06-6161e84e652e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loop over different time series lengths and methods\n",
+    "lengths = [10, 100, 1000, 10000, 100000] \n",
+    "for length in lengths:\n",
+    "    for method in [\"pm\", \"oudin\", \"hamon\", \"hargreaves\", \"priestley_taylor\", \"romanenko\", \"makkink\", \"jensen_haise\", \"turc\"]:\n",
+    "        times_100 = []\n",
+    "        for i in range(10):\n",
+    "            times_100.append(compute_pet_and_time_xr(method, str(length)))\n",
+    "        mean_times_xr[method].append(np.mean(times_100))\n",
+    "        #times_25[method].append(np.quantile(times_100, 0.025))\n",
+    "        #times_975[method].append(np.quantile(times_100, 0.975))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e6d66ea-3a6d-445c-aab0-075bf31c0ea3",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Plot figure\n",
+    "cmap_colrs = cm.get_cmap('Paired', len(mean_times.keys()))\n",
+    "colors = [cmap_colrs(i) for i in range(0, len(mean_times.keys()))]\n",
+    "\n",
+    "fig, axs = plt.subplots(2, 1, figsize=(6,5), layout=\"constrained\")\n",
+    "for method, color in zip(mean_times.keys(), colors):\n",
+    "    axs[0].plot(mean_times[method], label=method, color=color)\n",
+    "    #axs[0].fill_between(np.arange(0, len(lengths)), times_25[method], times_975[method], alpha=0.1)\n",
+    "for method, color in zip(mean_times_xr.keys(), colors):\n",
+    "    axs[1].plot(mean_times_xr[method], label=method, color=color)\n",
+    "\n",
+    "axs[0].set_xlabel(\"Time series length [days]\", size=12)\n",
+    "for i in (0,1):\n",
+    "    axs[i].set_ylabel(\"\")\n",
+    "    axs[i].set_yscale(\"log\")\n",
+    "    \n",
+    "axs[0].set_xticks(np.arange(0, 4))\n",
+    "axs[0].set_xticklabels([\"10$^1$\", \"10$^2$\", \"10$^3$\", \"10$^4$\"]);\n",
+    "axs[0].set_xlim(-0.1, 3.1)\n",
+    "\n",
+    "axs[1].set_xticks(np.arange(0, 5))\n",
+    "axs[1].set_xlim(-0.1, 4.1)\n",
+    "axs[1].set_xticklabels([\"10$^1$\", \"10$^2$\", \"10$^3$\", \"10$^4$\", \"10$^5$\"]);\n",
+    "\n",
+    "fig.text(-0.04, 0.5, 'Computation time [s]', va='center', rotation='vertical', fontsize=14)\n",
+    "axs[1].set_xlabel(\"Cell number [n]\", size=12)\n",
+    "\n",
+    "axs[0].legend(bbox_to_anchor=(0.5, 1.55), fontsize=11, ncol=3, loc=\"upper center\");\n",
+    "plt.subplots_adjust(left=0.15, top=0.85);\n",
+    "axs[0].text(0.02, 0.87, \"(a)\", fontsize=13, transform=axs[0].transAxes)\n",
+    "axs[1].text(0.02, 0.87, \"(b)\", fontsize=13, transform=axs[1].transAxes)\n",
+    "fig.savefig(\"figure6.png\", dpi=600, bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2e9318e-61ca-4034-b4f8-7b030b860576",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "d43d1f5e-0bbc-43f9-ac94-6f952278245e",
diff --git a/pyet/version.py b/pyet/version.py
index ff5fea7..ed0847b 100644
--- a/pyet/version.py
+++ b/pyet/version.py
@@ -1,3 +1,3 @@
 # This is the only location where the version will be written and changed.
 # Based on https://packaging.python.org/single_source_version/
-__version__ = "1.3.1"
+__version__ = "1.3.2"