Skip to content

Commit

Permalink
Challenge 7 (sort of) upload 1
Browse files Browse the repository at this point in the history
  • Loading branch information
neuhofmo authored Mar 10, 2019
1 parent 7ca96cf commit 4318e4b
Showing 1 changed file with 329 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Accidents and Lethal Accidents - Geographical Analysis\n",
"Ori Moisis, Tal Peleg, Aviram Stern, Noam Hershtig & Moran Neuhof"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc": true
},
"source": [
"<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
"<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Accidents-and-Lethal-Accidents---Geographical-Analysis\" data-toc-modified-id=\"Accidents-and-Lethal-Accidents---Geographical-Analysis-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Accidents and Lethal Accidents - Geographical Analysis</a></span><ul class=\"toc-item\"><li><span><a href=\"#Looking-at-lethal-accidents\" data-toc-modified-id=\"Looking-at-lethal-accidents-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Looking at lethal accidents</a></span><ul class=\"toc-item\"><li><span><a href=\"#Accidents-geographical-distribution---by-year\" data-toc-modified-id=\"Accidents-geographical-distribution---by-year-1.1.1\"><span class=\"toc-item-num\">1.1.1&nbsp;&nbsp;</span>Accidents geographical distribution - by year</a></span></li><li><span><a href=\"#Accidents-geographical-distribution---by-hour\" data-toc-modified-id=\"Accidents-geographical-distribution---by-hour-1.1.2\"><span class=\"toc-item-num\">1.1.2&nbsp;&nbsp;</span>Accidents geographical distribution - by hour</a></span></li></ul></li><li><span><a href=\"#Looking-at-all-accidents\" data-toc-modified-id=\"Looking-at-all-accidents-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Looking at all accidents</a></span><ul class=\"toc-item\"><li><span><a href=\"#Accidents-geographical-distribution---by-year\" data-toc-modified-id=\"Accidents-geographical-distribution---by-year-1.2.1\"><span class=\"toc-item-num\">1.2.1&nbsp;&nbsp;</span>Accidents geographical distribution - by year</a></span></li><li><span><a href=\"#Accidents-geographical-distribution---by-hour\" data-toc-modified-id=\"Accidents-geographical-distribution---by-hour-1.2.2\"><span class=\"toc-item-num\">1.2.2&nbsp;&nbsp;</span>Accidents geographical distribution - by hour</a></span></li></ul></li><li><span><a href=\"#Binned-coordinates\" data-toc-modified-id=\"Binned-coordinates-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Binned coordinates</a></span><ul class=\"toc-item\"><li><span><a href=\"#Binning-coordinated\" data-toc-modified-id=\"Binning-coordinated-1.3.1\"><span class=\"toc-item-num\">1.3.1&nbsp;&nbsp;</span>Binning coordinated</a></span></li><li><span><a href=\"#Accidents-geographical-distribution---by-year-(binned)\" data-toc-modified-id=\"Accidents-geographical-distribution---by-year-(binned)-1.3.2\"><span class=\"toc-item-num\">1.3.2&nbsp;&nbsp;</span>Accidents geographical distribution - by year (binned)</a></span></li><li><span><a href=\"#Accidents-geographical-distribution---by-hour-(binned)\" data-toc-modified-id=\"Accidents-geographical-distribution---by-hour-(binned)-1.3.3\"><span class=\"toc-item-num\">1.3.3&nbsp;&nbsp;</span>Accidents geographical distribution - by hour (binned)</a></span></li></ul></li></ul></li></ul></div>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import os\n",
"import geopandas\n",
"from shapely.geometry import Point\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load file\n",
"df = pd.read_csv('anyway_tables_csv_updated/involved_markers_hebrew.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Preparing coordinates in dataframe\n",
"df['Coordinates'] = list(zip(df.longitude, df.latitude))\n",
"df['Coordinates'] = df['Coordinates'].apply(Point)\n",
"world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Looking at lethal accidents"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Subsetting only lethal accidents\n",
"lethal_df = df[df['accident_severity']==1].reset_index().drop_duplicates('provider_and_id').reset_index()\n",
"lethal_gdf = geopandas.GeoDataFrame(lethal_df, geometry='Coordinates')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by year"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, year in enumerate(range(2008, 2019)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" lethal_df_year = lethal_gdf[lethal_gdf['accident_year'] == year]\n",
" lethal_df_year.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(year)\n",
" plt.savefig(f\"{year}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by hour"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, hour in enumerate(range(0, 24)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" lethal_gdf_hour = lethal_gdf[lethal_gdf['accident_hour'] == hour]\n",
" lethal_gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(hour)\n",
" plt.savefig(f\"hour_of_day{hour}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Looking at all accidents"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gdf = geopandas.GeoDataFrame(df, geometry='Coordinates')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by year"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, year in enumerate(range(2008, 2019)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" gdf_year = gdf[gdf['accident_year'] == year]\n",
" gdf_year.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(year)\n",
" plt.savefig(f\"all_accidents_year_{year}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by hour"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, hour in enumerate(range(0, 24)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" gdf_hour = gdf[gdf['accident_hour'] == hour]\n",
" gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(hour)\n",
" plt.savefig(f\"all_accidents_hour_of_day{hour}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Binned coordinates"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Binning coordinated"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Binned distributions\n",
"# Accurate DF (with accuracy filter)\n",
"accurate_df = df[df['location_accuracy'] == 1].reset_index().drop_duplicates('provider_and_id').reset_index()\n",
"\n",
"lat_cut = pd.cut(accurate_df.latitude, np.linspace(min(accurate_df.latitude), max(accurate_df.latitude), 30), right=True).apply(lambda x: x.mid)\n",
"lon_cut = pd.cut(accurate_df.longitude, np.linspace(min(accurate_df.longitude), max(accurate_df.longitude), 20), right=True).apply(lambda x: x.mid)\n",
"\n",
"accurate_df['lon_cut'] = lon_cut\n",
"accurate_df['lat_cut'] = lat_cut\n",
"\n",
"accurate_df['Binned_Coordinates'] = list(zip(accurate_df.lon_cut, accurate_df.lat_cut))\n",
"accurate_df['Binned_Coordinates'] = accurate_df['Binned_Coordinates'].apply(Point)\n",
"# Creating GDF\n",
"accurate_gdf = geopandas.GeoDataFrame(accurate_df, geometry='Binned_Coordinates')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by year (binned)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Binned\n",
"for i, year in enumerate(range(2008, 2019)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" gdf_year = accurate_gdf[accurate_gdf['accident_year'] == year]\n",
" gdf_year.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(year)\n",
" plt.savefig(f\"all_accidents_year_{year}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accidents geographical distribution - by hour (binned)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, hour in enumerate(range(0, 24)):\n",
" ax = world[world.name == 'Israel'].plot(color='white', edgecolor='black', figsize=(16,9))\n",
" gdf_hour = accurate_gdf[accurate_gdf['accident_hour'] == hour]\n",
" gdf_hour.plot(ax=ax, color='red',alpha=0.3)\n",
" plt.title(hour)\n",
" plt.savefig(f\"all_accidents_hour_of_day{hour}.png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The same can be done for lethal accidents only (by repeating the filter in 1.1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 4318e4b

Please sign in to comment.