diff --git a/DataHackathon2019/Challenge_7_sort_of/Accidents and Lethal Accidents - Geographical Analysis.ipynb b/DataHackathon2019/Challenge_7_sort_of/Accidents and Lethal Accidents - Geographical Analysis.ipynb new file mode 100644 index 0000000..9d3b5bd --- /dev/null +++ b/DataHackathon2019/Challenge_7_sort_of/Accidents and Lethal Accidents - Geographical Analysis.ipynb @@ -0,0 +1,329 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accidents and Lethal Accidents - Geographical Analysis\n", + "Ori Moisis, Tal Peleg, Aviram Stern, Noam Hershtig & Moran Neuhof" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "import os\n", + "import geopandas\n", + "from shapely.geometry import Point\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load file\n", + "df = pd.read_csv('anyway_tables_csv_updated/involved_markers_hebrew.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Preparing coordinates in dataframe\n", + "df['Coordinates'] = list(zip(df.longitude, df.latitude))\n", + "df['Coordinates'] = df['Coordinates'].apply(Point)\n", + "world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Looking at lethal accidents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Subsetting only lethal accidents\n", + "lethal_df = df[df['accident_severity']==1].reset_index().drop_duplicates('provider_and_id').reset_index()\n", + "lethal_gdf = geopandas.GeoDataFrame(lethal_df, geometry='Coordinates')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by year" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, year in enumerate(range(2008, 2019)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " lethal_df_year = lethal_gdf[lethal_gdf['accident_year'] == year]\n", + " lethal_df_year.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(year)\n", + " plt.savefig(f\"{year}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by hour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, hour in enumerate(range(0, 24)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " lethal_gdf_hour = lethal_gdf[lethal_gdf['accident_hour'] == hour]\n", + " lethal_gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(hour)\n", + " plt.savefig(f\"hour_of_day{hour}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Looking at all accidents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = geopandas.GeoDataFrame(df, geometry='Coordinates')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by year" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, year in enumerate(range(2008, 2019)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " gdf_year = gdf[gdf['accident_year'] == year]\n", + " gdf_year.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(year)\n", + " plt.savefig(f\"all_accidents_year_{year}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by hour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, hour in enumerate(range(0, 24)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " gdf_hour = gdf[gdf['accident_hour'] == hour]\n", + " gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(hour)\n", + " plt.savefig(f\"all_accidents_hour_of_day{hour}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Binned coordinates" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binning coordinated" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Binned distributions\n", + "# Accurate DF (with accuracy filter)\n", + "accurate_df = df[df['location_accuracy'] == 1].reset_index().drop_duplicates('provider_and_id').reset_index()\n", + "\n", + "lat_cut = pd.cut(accurate_df.latitude, np.linspace(min(accurate_df.latitude), max(accurate_df.latitude), 30), right=True).apply(lambda x: x.mid)\n", + "lon_cut = pd.cut(accurate_df.longitude, np.linspace(min(accurate_df.longitude), max(accurate_df.longitude), 20), right=True).apply(lambda x: x.mid)\n", + "\n", + "accurate_df['lon_cut'] = lon_cut\n", + "accurate_df['lat_cut'] = lat_cut\n", + "\n", + "accurate_df['Binned_Coordinates'] = list(zip(accurate_df.lon_cut, accurate_df.lat_cut))\n", + "accurate_df['Binned_Coordinates'] = accurate_df['Binned_Coordinates'].apply(Point)\n", + "# Creating GDF\n", + "accurate_gdf = geopandas.GeoDataFrame(accurate_df, geometry='Binned_Coordinates')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by year (binned)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Binned\n", + "for i, year in enumerate(range(2008, 2019)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " gdf_year = accurate_gdf[accurate_gdf['accident_year'] == year]\n", + " gdf_year.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(year)\n", + " plt.savefig(f\"all_accidents_year_{year}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accidents geographical distribution - by hour (binned)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, hour in enumerate(range(0, 24)):\n", + " ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n", + " gdf_hour = accurate_gdf[accurate_gdf['accident_hour'] == hour]\n", + " gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n", + " plt.title(hour)\n", + " plt.savefig(f\"all_accidents_hour_of_day{hour}.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same can be done for lethal accidents only (by repeating the filter in 1.1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}