diff --git a/Loop_Over_Cities.ipynb b/Loop_Over_Cities.ipynb new file mode 100644 index 0000000..05881f6 --- /dev/null +++ b/Loop_Over_Cities.ipynb @@ -0,0 +1,717 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "58118ce0-6f51-49c7-a5da-7dee78f29d5d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0b9632b-efd9-451f-8f53-419d8ebf8cc0", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import sys\n", + "sys.path.append('/home/users/kp_epcc/aq_stripes/visualisation_code/')\n", + " \n", + "from plotting_routines_cities import (plot_absolute_bar_plot,\n", + " plot_aq_stripes_withline,\n", + " plot_aq_stripes_withline_withindicativecolourbar,\n", + " plot_summary_statistics)\n", + "\n", + "\n", + "import settings\n", + "\n", + "from settings import OUTPUT_DATA_DIR\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17920772-7b25-4739-aea9-65efe10a1255", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib.colors import LinearSegmentedColormap\n", + "from matplotlib.colors import ListedColormap\n", + "import os\n", + "import pandas as pd \n", + "import textwrap\n", + "\n", + "\n", + "def create_custom_cmap():\n", + "\n", + " colors = [\n", + " '#a4ffff', # 1 Blue\n", + " '#b0dae9', # 2 Blue\n", + " '#b0ceed', # 3 Blue\n", + " '#F9E047', # 4 Yellow\n", + " '#f2c84b', # 5 Yellow\n", + " '#f1a63f', # 6 Orange \n", + " '#E98725', # 7 Orange\n", + " '#af4553', # 8 (Red)\n", + " '#863b47', # 9 (Dark Red)\n", + " '#673a3d', # 10 (Darker Red)\n", + " '#462f30', # 11 (Very Dark Brown)\n", + " '#252424', # 12 (Almost Black)\n", + " ]\n", + "\n", + " num_colors = len(colors)\n", + " custom_cmap = ListedColormap(colors) \n", + " return custom_cmap\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce2c9121-67f8-4a8e-b447-2f5f9af71c4d", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter\n", + "\n", + "# Define the path to cities data file\n", + "cities_file_path = os.path.join('List_of_Cities_Continent.txt')\n", + "\n", + "# Create an empty dictionary to store city data\n", + "cities_continent = {}\n", + "\n", + "# Load the cities dictionary\n", + "with open(cities_file_path, 'r') as file:\n", + " file_content = file.read()\n", + " exec(file_content) # Executes the cities_continent dictionary assignment\n", + "\n", + "# Count the number of unique entries in the dictionary\n", + "unique_entries_count = len(cities_continent)\n", + "\n", + "print(f\"Number of unique entries in cities_continent: {unique_entries_count}\")\n", + "\n", + "# Check for duplicate city entries\n", + "city_names = list(cities_continent.keys())\n", + "city_counts = Counter(city_names)\n", + "duplicates = {city: count for city, count in city_counts.items() if count > 1}\n", + "if duplicates:\n", + " print(\"Duplicate entries found:\")\n", + " for city, count in duplicates.items():\n", + " print(f\"{city} appears {count} times\")\n", + "else:\n", + " print(\"No duplicate entries found.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d61de6c-2146-4b72-bb65-22ca59b9bf96", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cf18bb9-fb4c-4c30-95fd-773ad3050385", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "\n", + "import xarray as xr\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Read in CMIP (model) and VAND (satellitte) data\n", + "dir_path = \"/home/users/kp_epcc/aq_stripes/input_data/\"\n", + "\n", + "ds_VAND = xr.open_dataset(dir_path + \"vand_annual_mean_output.nc\")\n", + "ds_CMIP = xr.open_dataset(dir_path + \"cmip_annual_mean_output.nc\")\n", + "\n", + "# Set the time coordinates to just the year value\n", + "ds_CMIP['time'] = ds_CMIP['time'].dt.year\n", + "ds_VAND['time'] = ds_VAND['time'].dt.year\n", + "\n", + "# Make a long time axis from start of CMIP to end of VAND\n", + "years = np.arange(ds_CMIP['time'].min(), ds_VAND['time'].max() + 1)\n", + "\n", + "# Define the time period used to average for the weighting\n", + "time_period = slice(2000, 2002) \n", + "time_period_list = [2000, 2001, 2002] # Only used for plotting\n", + "\n", + "# Call colour scheme\n", + "custom_cmap = create_custom_cmap()\n", + "\n", + "# Dictionary to hold combined data for each city\n", + "city_combined_data = {}\n", + "\n", + "# Create a DataFrame to store the city and corresponding data_ratio\n", + "df_data_ratio = pd.DataFrame(columns=['City', 'Data_Ratio'])\n", + "df_format1 = pd.DataFrame({'Year': years})\n", + "\n", + "count=0\n", + "for city, info in cities_continent.items():\n", + "\n", + " count += 1\n", + " print(f\"Processing {city}\", f\"Total cities processed: {count}\") \n", + " \n", + " latitude = info['lat']\n", + " longitude = info['lon']\n", + " continent = info['continent'].replace(' ', '_')\n", + "\n", + " print(\"\")\n", + " print(\"******************************************************\")\n", + " print(\"latitude\",latitude)\n", + " print(\"longitude\", longitude)\n", + " print(\"City = \",city)\n", + " print(\"Continent = \",continent)\n", + " print(\"\") \n", + " \n", + " # Extract the value at the correct lat / lon of interest\n", + " \n", + " # Use linear interpolation for CMIP model data and nearest for VAND satellitte data\n", + " data_CMIP = ds_CMIP['PM25_CMIP'].interp(latitude=latitude, longitude=longitude, method='linear') \n", + " data_VAND = ds_VAND['PM25_VAND'].sel(latitude=latitude, longitude=longitude, method='nearest')\n", + " \n", + " # Calculate the three year average value (time_period) for the lat / lon of interest\n", + " data_CMIP_mean = data_CMIP.sel(time=time_period).mean(dim='time')\n", + " data_VAND_mean = data_VAND.sel(time=time_period).mean(dim='time') \n", + " \n", + " # Calculate the ratio of the averaged CMIP data to the averaged VAND data for the lat / lon of interest \n", + " data_ratio = data_VAND_mean / data_CMIP_mean\n", + "\n", + " if data_ratio < 2:\n", + " uncertainty_classification = \"Low Uncertainty\"\n", + " elif data_ratio < 4:\n", + " uncertainty_classification = \"Moderate Uncertainty\"\n", + " else:\n", + " uncertainty_classification = \"High Uncertainty\"\n", + "\n", + " print(uncertainty_classification)\n", + "\n", + " \n", + " print(\"Calculated data ratio (VAND/CMIP):\")\n", + " print(data_ratio) \n", + "\n", + " # Weight the CMIP data by the rato of VAND / CMIP\n", + " CMIP_data_weighted = data_CMIP * data_ratio\n", + "\n", + " print(\"data_VAND_mean\",data_VAND_mean, \"data_CMIP_mean\",data_CMIP_mean,\" data_ratio = \",data_ratio)\n", + " print(\"CMIP_data_weighted size and dimensions before concatenation:\", CMIP_data_weighted.size, CMIP_data_weighted.dims)\n", + " print(\"CMIP_data_weighted time range:\", CMIP_data_weighted.time.min().values, \"to\", CMIP_data_weighted.time.max().values)\n", + " print(\"data_VAND size and dimensions before concatenation:\", data_VAND.size, data_VAND.dims)\n", + " print(\"data_VAND time range:\", data_VAND.time.min().values, \"to\", data_VAND.time.max().values)\n", + "\n", + " # Concatenate data_weighted and data_VAND at the year 2000\n", + " data_combined = xr.concat([CMIP_data_weighted.sel(time=slice(None, 2000)), data_VAND.sel(time=slice(2001, None))], dim='time')\n", + " \n", + " # Print the size and dimensions of the array after concatenation\n", + " print(\"data_combined size and dimensions after concatenation:\", data_combined.size, data_combined.dims)\n", + " print(\"data_combined time range:\", data_combined.time.min().values, \"to\", data_combined.time.max().values)\n", + " \n", + " # Store the combined data in the dictionary\n", + " city_combined_data[city] = data_combined\n", + "\n", + " # Calculate the maximum value of your data\n", + " max_value = max(data_combined.max(), data_CMIP.max(), data_VAND.max())\n", + "\n", + " # Code to create plots to check VAND, CMIMP and combined values \n", + " plt.ylim(0, max_value * 1.2) \n", + " plt.plot(years, data_combined, label=f\"{city} Combined\", color=\"darkgrey\", linewidth=4)\n", + " plt.plot(data_CMIP['time'], data_CMIP, label=f\"{city} Model\", linewidth=1.5, linestyle='-.', color=\"blue\")\n", + " plt.plot(data_VAND['time'], data_VAND, label=f\"{city} Satellite\", linewidth=1.5, linestyle='--', color=\"red\")\n", + " plt.scatter(time_period_list, [data_CMIP_mean] * len(time_period_list), color='darkblue', label=f\"{city} Model 3-year Mean\", zorder=5)\n", + " plt.scatter(time_period_list, [data_VAND_mean] * len(time_period_list), color='darkred', label=f\"{city} Satellite 3-year Mean\", zorder=5)\n", + " extra_info = f\"{uncertainty_classification}, Data Ratio = {data_ratio:.1f}\"\n", + " plt.plot([], [], label=extra_info, color='none') # Invisible plot for the extra legend entry\n", + "\n", + " plt.xlabel('Year')\n", + " plt.ylabel('Annual mean PM2.5 (ug m-3)')\n", + " plt.title('PM2.5 Timeseries')\n", + " plt.legend(fontsize=8)\n", + " plt.grid(True)\n", + " plot_title = city \n", + " plt.savefig(OUTPUT_DATA_DIR + plot_title + '_'+continent+ '_CMIP_VAND_May.png', dpi=300, bbox_inches='tight', pad_inches=0.05)\n", + " plt.show() \n", + " plt.close()\n", + " \n", + " # Stripes plotting routines\n", + " plot_absolute_bar_plot(data_combined.values, data_combined['time'].values, custom_cmap, plot_title, continent) \n", + " plot_aq_stripes_withline(data_combined.values, data_combined['time'].values, custom_cmap, plot_title, continent)\n", + " plot_aq_stripes_withline_withindicativecolourbar(data_combined.values, data_combined['time'].values, custom_cmap, plot_title, continent, uncertainty_classification, data_ratio) \n", + " plot_summary_statistics(data_combined.values, data_combined['time'].values, custom_cmap, plot_title, continent)\n", + "\n", + " # Append the city and its data_ratio to the DataFrame\n", + " df_data_ratio = pd.concat([df_data_ratio, pd.DataFrame({'City': [city], 'Data_Ratio': [data_ratio.values]})], ignore_index=True)\n", + "\n", + " # Add the city's data to the DataFrame in wide format\n", + " df_format1[city] = data_combined.sel(time=years).values\n", + "\n", + "# Write cities PM2.5 data and sat to model data ratio to file\n", + "df_data_ratio.to_excel(OUTPUT_DATA_DIR + \"Cities_Data_Ratios.xlsx\", index=False)\n", + "df_format1.to_excel(OUTPUT_DATA_DIR + \"Cities_Wide_Format_Data.xlsx\", index=False) \n", + "\n", + "# Count the number of files containing the word \"CMIP\"\n", + "cmip_file_count = sum(1 for file_name in os.listdir(OUTPUT_DATA_DIR) if \"CMIP\" in file_name)\n", + "\n", + "# Compare and print results\n", + "if cmip_file_count == unique_entries_count:\n", + " print(f\"The number of files with 'CMIP' ({cmip_file_count}) matches the unique city count ({unique_entries_count}).\")\n", + "else:\n", + " print(f\"Mismatch: {cmip_file_count} files with 'CMIP', but {unique_entries_count} unique city entries.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef6e7dbf-c6bc-429b-9da8-6249a393abb3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "992e2957-69b5-44c5-af22-3256b9b00d8b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "926c7709-955d-4908-b90f-7e82206dca41", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebdfa627-0298-4dcd-84dc-0e014fd05e73", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8981a12e-30d4-44f2-a960-57b394271275", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c808a9aa-03fd-45fa-974a-86210d33c1df", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6d5acf1-5b7d-4e8f-8d7f-8329dfbfb3a8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f492d52-38ec-4128-b257-4f6f3ace3bed", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfcceef6-8dc0-4dd0-abfc-4ccabaf8ae23", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57b71bac-1aa7-4af6-9403-9ebb923de263", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1d078e0-6ad2-4019-abb9-b6452b4a187a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f83a9f5-f0ea-41d6-b1a8-e0cf94d052f8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbf715ff-5cc0-4c11-a72f-f5db6962ac56", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63de0bfe-8862-4fc6-aa8e-66202c68fd06", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acde9002-4925-4a68-8672-07131d3cbe07", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5fe2ee3-21c6-4acb-90d5-404af10e1220", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e6a5413-feac-46d3-88a5-1d4aed5a5ac0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57e59d99-5f30-4140-bcfe-a17c99bbe9cc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a06b934f-43fb-4a3c-ad53-2e567fe966c6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca9a9e61-5314-474a-b8eb-bd1bf18f4ae2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9106051b-2443-451b-9105-cce60ac30012", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d53c971-8573-46ca-9b62-e9ea87589e39", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60e975f0-15d2-4428-af66-747c4e58ce5d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c293fc63-494b-4aa4-b58f-1f06374eb41a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f98e89d-916f-4a81-9ffc-2485f1df74c3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7712a6f-dd13-4f0b-90f4-5f3845107d0b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30b61810-3a4b-4ee0-a719-2f954c52c653", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e746cd95-6121-46f8-be17-8774eea2c835", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c227e288-8889-4df5-bf78-8ec4f18f5c07", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f0ea628-2263-43a9-85ba-fd3a3bec734f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f350ae07-dbf8-41b0-9a5a-f188f9406024", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e71dbd9-a62f-47b9-bf60-3c0cc2efbd74", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebed05f9-7bba-4136-9d56-a0d35788f715", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e004b08-d01b-43d4-a124-310f5e76b2ae", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2382d56c-e027-4f01-981f-06b405e422ee", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8105705b-a867-479b-b898-adb0e677d95d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "672a75aa-8c87-4dcd-9cbb-ba68794c9514", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3de2c24c-5acf-4370-9bf7-5280ff7d0cea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7b40444-d7cf-40f9-a806-c9ef6de56189", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a80ae854-379b-4ba8-935c-931cd8433c73", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7e1f1c1-fa82-4f5f-8146-9e0d5f3305b4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "972d33e1-89aa-40c6-87a5-4f1947b28776", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f78c515d-e9f6-40c9-b657-499c7111ed27", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e924c9-39a2-4a25-8537-03d1d86bac94", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "719d6136-e9d0-4c09-84d2-dd45841ba523", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f6c73c4-8666-4731-93d3-4e0043d01261", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bd091fe-c6f7-4827-bf18-830d56a2402a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bf07c86-42ec-4059-b90d-a56910ee2f6e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6f1345f-8f65-415e-8b88-ae16b2ccdd10", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8056de8b-20e5-4a19-a8ee-92a66dcef9fa", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56e364af-0df6-4c2c-afb9-dceb427b3a3e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "684b244b-1247-4738-872e-4ca11279ccf5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8420588-cbae-46af-82ae-8b3ff652d863", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aq_stripes", + "language": "python", + "name": "aq_stripes" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}