From a3526a71507f762d36b205275230d73b5e94cf18 Mon Sep 17 00:00:00 2001 From: ChayneAzriel <61120310+ChayneAzriel@users.noreply.github.com> Date: Tue, 24 Nov 2020 09:12:02 +0200 Subject: [PATCH] Create case_study_250020.ipynb --- case_study_250020.ipynb | 1047 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1047 insertions(+) create mode 100644 case_study_250020.ipynb diff --git a/case_study_250020.ipynb b/case_study_250020.ipynb new file mode 100644 index 0000000..6fd657e --- /dev/null +++ b/case_study_250020.ipynb @@ -0,0 +1,1047 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import scipy +from scipy import stats +from scipy.stats import chi2 + +#importing data +file_path = 'involved_markers_hebrew.csv' +involved_markers_hebrew = pd.read_csv(file_path) +data = involved_markers_hebrew + +#data functions +def filtered_data(data, column): + data_filtered = data.copy() + data_filtered.sort_values(column) + data_filtered.drop_duplicates(subset = column, keep = 'first', inplace = True) + return data_filtered + +def road_section(data, section_no): + road_section = (data['road_segment_id']) == int(section_no) + data_section = data[road_section] + return data_section +def road_type(data,type_no): + type_no = (data['road_type']) == int(type_no) + data_road = data[type_no] + return data_road +def road_street(street_no): + road_street = (data['street1']) == int(street_no) + data_street = data[road_street] + return data_street + +def data_years(data,start_year, end_year): + data_years = (data['accident_year'] >= int(start_year)) & (data['accident_year'] <= int(end_year)) + return data[data_years] + +def data_fatal(data): + data_fatal = data[data['accident_severity'] == 1] + return data_fatal + +def fatal_road_section(section_no): + road_section = (data_fatal['road_segment_id']) == int(section_no) + data_fatal_section = data_fatal[road_section] + return data_fatal_section + + #creating different values for variables + +data['involve_vehicle_type_t'] = data['involve_vehicle_type'].replace(to_replace = {1:1,2:2,3:3,4:4,5:4,6:4,7:4,8:5,9:5,10:5,11:5,12:1,13:5,14:5,15:5,16:5,17:5,18:5,19:5,20:5,21:5,22:5,23:5,24:4,25:4}) + +'''1==Private car, 2==Truck, 3==Motorcycle, 4==Bus, 5==Cab, 6==Other''' +data['involve_vehicle_type'] = data['involve_vehicle_type'].replace(to_replace = {1:1,2:2,3:2,4:2,5:2,6:2,7:2,8:3,9:3,10:3,11:4,12:5,13:6,14:6,15:6,16:6,17:6,18:4,19:3,20:3,21:10,22:6,23:6,24:2,25:2}) + +'''1==day light, 2==night light, 3==night no sight, 4==night, 5==twilight''' +data['road_light'] = data['road_light'].replace(to_replace = {1:1,2:3,3:2,4:3,5:3,6:4,7:3,8:3,9:3,10:5,11:1}) + +data['injury_severity'] =data['injury_severity'].replace(to_replace={1:3,2:2,3:1}).astype('category') + +data['accident_severity'] =data['accident_severity'].replace(to_replace={1:3,2:2,3:1}).astype('category') + +data['medical_type'] = data['medical_type'].replace(to_replace={9:0}).astype('category') + +data['speed_limit'] =data['speed_limit'].replace(to_replace={0:0, 1:50, 2:60, 3:70, 4:80, 5:90, 6:100, 7:110, 8:120}) + +data['road_width'] = data['road_width'].replace(to_replace={0:0, 1:5, 2:7, 3:10.5, 4:14, 5:20}) + +data['engine_volume_hebrew'] = data['engine_volume_hebrew'].astype('category') + +#creating unique data for markers analysis (reduces from 1897266 to 771638 [duplicates = 1125628]) +data_filtered = filtered_data(data, 'provider_and_id') + +##datat for accident severity harsh & fatal +data_fatal = data_fatal(data) +data_filtered_fatal = filtered_data(data_fatal, 'provider_and_id') + +#data different types of the road +data_4 = road_type(data, 4) +data_3 = road_type(data, 3) + +data_filtered_4 = road_type(data_filtered, 4) +data_filtered_3 = road_type(data_filtered, 3) + +#create different data frames by road section and road type for involve +#road_segment_name: צומת דימונה - צומת בית אשל +data_4_250020 = road_section(data_4, 250020) + +road_section_drop_4 = data_4.road_segment_id != (250020) +data_4 = data_4[road_section_drop_4] + +data_3_250020 = road_section(data_3, 250020) + +road_section_drop_3 = data_3.road_segment_id != (250020) +data_3 = data_3[road_section_drop_3] + +data_250020 = road_section(data, 250020) + +road_section_drop = data.road_segment_id != (250020) +data = data[road_section_drop] + +#create different data filtered frames by road section and road type for markers +data_filtered_4_250020 = road_section(data_filtered_4, 250020) + +road_section_drop_4 = data_filtered_4.road_segment_id != (250020) +data_filtered_4 = data_filtered_4[road_section_drop_4] + +data_filtered_3_250020 = road_section(data_filtered_3, 250020) + +road_section_drop_3 = data_filtered_3.road_segment_id != (250020) +data_filtered_3 = data_filtered_3[road_section_drop_3] + +df1 = data_filtered_4_250020.groupby(['accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +# Data to plot +labels = 'minor','harsh','fatal' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0]] +colors = ['#f9f998','#f9ac86','#fc999c'] +#cfac86 +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident severity on road 25", loc='left', fontsize=10.5, fontweight=0, color='k') + +#plt.savefig(r'slide_25.1.1.png') +plt.show() + +df1 = data_filtered_4.groupby(['accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['4'] +# Data to plot +labels = 'minor','harsh','fatal' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0]] +colors = ['#f9f998','#f9ac86','#fc999c'] +#cfac86 +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident severity on other interurban roads", loc='left', fontsize=10.5, fontweight=0, color='k') +#plt.savefig(r'slide_25.1.2.png') +plt.show() + +df1 = data_4_250020.groupby(['involve_vehicle_type']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +df + +height = [df.iloc[0,0], df.iloc[1,0], df.iloc[3,0], df.iloc[4,0], df.iloc[2,0], df.iloc[5,0]] +bars = ('private','truck','bus', 'cab','motorcycle','other') +y_pos = np.arange(len(bars)) + +# Create horizontal bars +plt.barh(y_pos, height, color = '#f9ac86') + +# Create names on the y-axis +plt.yticks(y_pos, bars) + + +y = [0,1,2,3,4,5] +label = [916, 250,105,21,9, 68] +for i in range(0,6): + plt.text(x = label[i]+2 , y = y[i], s = label[i], size = 11, color = 'k') + +#title +plt.title("involve vehicle type", loc='left', fontsize=10.5, fontweight=0, color='k') + +plt.box(on=None) + +#plt.savefig(r'slide_25.2.png') + +# Show graphic +plt.show() + +df1 = data_4_250020.groupby(['population_type']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +df.apply(lambda x: x/x.sum()).round(2) +height = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0]] +bars = ('jewish', 'arab', 'other', 'forien') +y_pos = np.arange(len(bars)) + +# Create horizontal bars +plt.barh(y_pos, height, color = '#f9f998') + +# Create names on the y-axis +plt.yticks(y_pos, bars) + +x = 680 +y = [0,1,2,3] +label = ['51%', '46%', '2%', '1%'] +for i in range(0,4): + plt.text(x = x , y = y[i], s = label[i], size = 11, color = 'k') + +#title +plt.title("road 25 population type", loc='left', fontsize=10.5, fontweight=0, color='k') + +plt.box(on=None) + +#plt.savefig(r'slide_25.3.png') + +# Show graphic +plt.show() + +df1 = data_4_250020.groupby(['accident_year', 'injury_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['year','severity','count'] +df = pd.pivot_table(df, values='count', index=['year'], + columns=['severity'], aggfunc=np.sum) +df.columns =['not harmed','Slightly', 'harsh', 'dead'] +df_idx = df.reset_index(inplace = True) +df.fillna(value = 0, inplace = True) + +df['sum_injured'] = df['harsh']+df['dead'] +N = 4 +l = [] +for x in df.sum_injured: + l.append(np.convolve(x, np.ones((N,))/N, mode='valid')[0]) + +ld = pd.DataFrame(l, columns =['moving_avg'] ) +df['4_yr_moving_avg'] = ld +df = df.drop('sum_injured', axis =1) +df + +plt.style.use('seaborn-white') + +fig, ax = plt.subplots(figsize=(8, 4)) + +# create a color palette +palette = ['#f9ac86','#fc999c','#cfac86'] + +#plt.get_cmap('Set2') + +# multiple line plot +num=0 +for column in df.iloc[:,3:6]: + num+=1 + plt.plot(df.iloc[:,0], df[column], marker='', color=palette[num-1], linewidth=1, alpha=0.9, label=column) + +plt.xticks([2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019]) + +ax.get_xaxis().get_major_formatter().set_useOffset(False) + + +# Add legend +plt.legend(loc='upper left') + +plt.box(on=None) + +# Add titles +plt.title("count of harshly injured and dead has increased in the last years", loc='left', fontsize=11, fontweight=0, color='k') +plt.xlabel("year") +plt.ylabel("injured count") + +#plt.savefig(r'slide_25.4.1.png') + +#graph 3 +data_08_10_250020 = data_years(data_4_250020, 2008, 2010) +data_11_13_250020 = data_years(data_4_250020, 2011, 2013) +data_14_16_250020 = data_years(data_4_250020, 2014, 2016) +data_17_19_250020 = data_years(data_4_250020, 2017, 2019) + +df1 = data_08_10_250020.groupby('injury_severity').size() +df2 = data_11_13_250020.groupby('injury_severity').size() +df3 = data_14_16_250020.groupby('injury_severity').size() +df4 = data_17_19_250020.groupby('injury_severity').size() +df1, df2, df3, df4 + +# Create bars +barWidth = 0.5 +bars1 = df1[:] +bars2 = df2[:] +bars3 = df3[:] +bars4 = df4[:] +bars5 = bars1 + bars2 + bars3 +bars4 + +# The X position of bars +r1 = [1,1.5] +r2 = [3,3.5] +r3 = [5,5.5] +r4 = [7,7.5] +r5 = r1 + r2 + r3 +r4 + +#figure size +plt.figure(figsize=(8,4)) + +# Create barplot +plt.bar(r1[0], bars1[2], width = barWidth, color = ('#f9f99f')) +plt.bar(r1[1], bars1[3], width = barWidth, color = ('#f9ac8f')) +plt.bar(r2[0], bars2[2], width = barWidth, color = ('#f9f99f')) +plt.bar(r2[1], bars2[3], width = barWidth, color = ('#f9ac8f')) +plt.bar(r3[0], bars3[2], width = barWidth, color = ('#f9f99f')) +plt.bar(r3[1], bars3[3], width = barWidth, color = ('#f9ac8f')) +plt.bar(r4[0], bars4[2], width = barWidth, color = ('#fbf981'), label='harsh') +plt.bar(r4[1], bars4[3], width = barWidth, color = ('#fbac71'), label='deadly') + +# Create legend +plt.legend(loc='upper left') + +# Text below each barplot +plt.xticks([r + barWidth*5 for r in [-1.2, 0.8, 2.8, 4.8]], [' 2008-10 ', ' 2011-13 ', ' 2014-16 ', '2017-19']) + +# Create labels +label = ['11', '5', '11', '5', '11', '8', '14', '9'] + +# Text on the top of each barplot +for i in range(len(r5)): + plt.text(x = r5[i]-0.1 , y = 3, s = label[i], size = 11) + +#Adjust the margins +plt.subplots_adjust(bottom= 0.5, top = 0.95) + +plt.title(" On road 25 harsh and deadly injured increased significantly over years", loc='left', fontsize=12, fontweight=0, color=('k')) + +plt.box(on=None) + +plt.savefig(r'slide_25.4.2.png') + +# Show graphic +plt.show() + +#graph 12 +''' +significant differencess were found for accident of collision with a still object +the folowing graphs are presenting the differences (in percents) between road 250020 to Inter-urban roads count of accidents +of collision with a still object. +collisions with a still object are twice more common on road 250020 +''' +data_filtered_4_250020['collision_still'] = data_filtered_4_250020['accident_type'].replace(to_replace = {1:1,2:1,3:1,4:1,5:1,7:1,8:2,9:1,10:1,11:1,12:1,13:1,15:1,17:1,19:1}) +df1 = data_filtered_4_250020.groupby(['collision_still']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] + +data_filtered_4['collision_still'] = data_filtered_4['accident_type'].replace(to_replace = {1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:2,9:1,10:1,11:1,12:1,13:1,14:1,15:1,16:1,17:1,18:1,19:1,20:1}) +df2 = data_filtered_4.groupby(['collision_still']).size() +table = pd.DataFrame(df2) +df_idx = table.reset_index(inplace = False) +df.insert(1,'4',table) + +df + +# Data to plot +labels = 'collision_still_4', 'other_4' +sizes = [df.iloc[1,1], df.iloc[0,1]] +colors = ['#f9ac86','#cfac86'] +explode = (0.2, 0) # explode 1st slice + +# Plot +plt.pie(sizes, explode=explode, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +#plt.savefig(r'slide_25.5.1.png') + +plt.show() + +# Data to plot +labels = 'collision_still_250020', 'other_250020' +sizes = [df.iloc[1,0], df.iloc[0,0]] +colors = ['#f9ac86','#cfac86'] +explode = (0.2, 0) # explode 1st slice + +# Plot +plt.pie(sizes, explode=explode, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +#plt.savefig(r'slide25.5.2.png') + +plt.show() + +#graph 12 +df1 = data_filtered_4_250020.groupby(['road_object']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +# Data to plot +labels = 'pole', 'road sign', 'buliding', 'safety fence', 'other' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0], df.iloc[4,0]] +colors = ['#fc999f','#f9fc99','#f9ac86','#cfac86', '#afac86'] + + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident collision objects road 25", loc='left', fontsize=10.5, fontweight=0, color='k') + +#plt.savefig(r'slide_25.5.3.png') + +plt.show() + +df1 = data_filtered_4.groupby(['road_object']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +# Data to plot +labels = 'pole', 'road sign', 'buliding', 'safety fence', 'other' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0], df.iloc[4,0]] +colors = ['#fc999f','#f9fc99','#f9ac86','#cfac86', '#afac86'] + + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident collision objects all interurban", loc='left', fontsize=10.5, fontweight=0, color='k') + +#plt.savefig(r'slide_25.5.4.png') + +plt.show() + +df1 =data_filtered_4_250020.groupby(['object_distance']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['count_road_25'] +df2 = data_filtered_4.groupby(['object_distance', 'object_distance_hebrew']).size() +table = pd.DataFrame(df2) +df_idx = table.reset_index(inplace = True) +df.insert(1,'count_other_interurban',table[0]) +df.insert(0,'object_distance',table.object_distance_hebrew) +df + +#creating data of all roads including 25 +data_4_all = road_type(data, 4) +data_filtered_4_all = road_type(data_filtered, 4) + +lane1 = (data_filtered_4_all['one_lane'] > 1) & (data_filtered_4_all['one_lane'] < 4) +df1 = data_filtered_4_all[lane1].groupby(['accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_count'] +# Data to plot +labels = 'minor','harsh','fatal' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0]] +colors = ['#bfa9a6','#cc99af','#f9a1a6'] + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident severity on one lane roads - all interurban", loc='left', fontsize=10.5, fontweight=0, color='k') + +#plt.savefig(r'slide_25.6.1.png') +plt.show() + +lane2 = (data_filtered_4_all['multi_lane'] > 1) & (data_filtered_4_all['multi_lane'] < 5) +df1 = data_filtered_4_all[lane2].groupby(['accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_count'] +# Data to plot +labels = 'minor','harsh','fatal' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0]] +colors = ['#bfa9a6','#cc99af','#f9a1a6'] + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("accident severity on multi lane roads - all interurban", loc='left', fontsize=10.5, fontweight=0, color='k') +#plt.savefig(r'slide_25.6.2.png') +plt.show() + +data_filtered_4_all.groupby(['road_object','accident_severity']).size() + + +data_filtered_4_all['safety_fence'] = data_filtered_4_all['road_object'].replace(to_replace = {1:1,2:1,3:1,4:1,5:1,6:2,7:1,8:1,9:3}) +df1 = data_filtered_4_all.groupby(['safety_fence', 'accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['accident_type','accident_severity','accident_count'] + +df + +# Data +r = [0,1,2] +df1 = df.iloc[[0,1,2],2] +df1 = df1.reset_index(drop = True) +df2 = df.iloc[[3,4,5],2] +df2 = df2.reset_index(drop = True) +df3 = df.iloc[[6,7,8],2] +df3 = df3.reset_index(drop = True) +df1 = pd.DataFrame(df1) +df1.insert(1,'fatal',df3) +df1.insert(1,'harsh',df2) +raw_data = {'firstBars':df1.iloc[:,2],'secondBars': df1.iloc[:,0], 'thirdBars':df1.iloc[:,1]} +df4 = pd.DataFrame(raw_data) + +# From raw value to percentage +totals = [i+j+k for i,j,k in zip(df4['firstBars'], df4['secondBars'], df4['thirdBars'])] +firstBars = [i / j * 100 for i,j in zip(df4['firstBars'], totals)] +secondBars = [i / j * 100 for i,j in zip(df4['secondBars'], totals)] +thirdBars = [i / j * 100 for i,j in zip(df4['thirdBars'], totals)] + +# plot +barWidth = 0.85 +names = ('other_accidents','collision_other','collision_safety_fence') +# Create first Bars +plt.bar(r, firstBars, color='#bfa9a6', edgecolor='white', width=barWidth, label="minor") +# Create second Bars +plt.bar(r, secondBars, bottom=firstBars, color='#cc99af', edgecolor='white', width=barWidth, label="harsh") +# Create third Bars +plt.bar(r, secondBars, bottom=[i+j for i,j in zip(firstBars, secondBars)], color='#f9a1a6', edgecolor='white', width=barWidth, label="fatal") + +# Custom x axis +plt.xticks(r, names) +plt.xlabel("accident type") + +# Text of each barplot +label1 = ['95%','87%','90%'] +label2 = ['4%', '10%', '8%'] +label3 = ['1%', '3%', '2%'] + + +for i in range(len(r)): + plt.text(x = r[i]-0.1 , y = 60, s = label1[i], size = 10) + plt.text(x = r[i]-0.1 , y = 89, s = label2[i], size = 10) + plt.text(x = r[i]-0.1 , y =98, s = label3[i], size = 10) + +# Add a legend +plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1) + +plt.title("90% percent of the collision with safety fence accidents are minor \n all inter urban roads", loc='left', fontsize=12, fontweight=0, color=('k')) + +plt.box(on=None) +plt.yticks([]) + +#plt.savefig(r'slide_25.7.1.png', bbox_inches='tight') +# Show graphic +plt.show() + +#data_4_all = road_type(data, 4) +#data_filtered_4_all = road_type(data_filtered, 4) + +df1 = data_filtered_4_all.groupby(['accident_type','accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['accident_type','accident_severity','accident_count'] +df = pd.pivot_table(df, values='accident_count', index=['accident_type'], + columns=['accident_severity'], aggfunc=np.sum) +df.columns =['minor','harsh', 'fatal'] + +a = [] +for i in range(19): + b = df.iloc[i,:].sum() + for j in range(3): + c = df.iloc[i,j]/b + a.append(c.round(2)) +ar = np.array(a) +ar = ar.reshape(19, 3) + +df1 = pd.DataFrame(ar) +df1.index = df.index +df1.columns = ['minor', 'harsh', 'fatal'] + +df1 = data_filtered_4_all.groupby(['accident_type','accident_severity']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['accident_type','accident_severity','accident_count'] +df = pd.pivot_table(df, values='accident_count', index=['accident_type'], + columns=['accident_severity'], aggfunc=np.sum) +df.columns =['minor','harsh', 'fatal'] +df = df.fillna(value = 0) + +# Data to plot +labels = ['pedestrian', 'front side collision', 'Front to back', 'Side-side collision','Frontal Collision', 'Collision stopped vehicle','Collision parked vehicle', 'still collision', 'down road or sidewalk', 'overturning', 'skided', 'Injury to a passenger inside the vehicle', 'fall moving vehicle', 'back to front', 'back to side', 'animal', "car's luggage"] +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0], df.iloc[4,0], df.iloc[5,0], df.iloc[6,0], df.iloc[7,0], df.iloc[8,0], df.iloc[9,0],df.iloc[10,0], df.iloc[11,0], df.iloc[12,0], df.iloc[15,0], df.iloc[16,0], df.iloc[17,0], df.iloc[18,0]] +colors = ['#9eab99','#f9ac86','#fc999f','#f9fc99','#afac86','#e9acd9','#cfac86','#bc999f','#b9fc99','#b9eff9','#a294f9','#fcac86','#fcc99f','#dc2c99','#bc1c86','#eeabb6','#9e2bb3'] + +# Plot +patches, texts, pct = plt.pie(sizes, colors=colors, autopct='%1d%%', startangle=90) +plt.legend(patches, labels, loc=(0.85, 0.12)) + +plt.axis('equal') + +plt.title("minor accident types", loc='left', fontsize=13, fontweight=0, color='k') + +#plt.savefig(r'slide_25.11.3.png') + +plt.show() + +# Data to plot +labels = ['pedestrian', 'front side collision', 'Front to back', 'Side-side collision','Frontal Collision', 'Collision stopped vehicle','Collision parked vehicle', 'still collision', 'down road or sidewalk', 'overturning', 'skided', 'Injury to a passenger inside the vehicle', 'fall moving vehicle', 'back to front', 'back to side', 'animal', "car's luggage"] +sizes = [df.iloc[0,1], df.iloc[1,1], df.iloc[2,1], df.iloc[3,1], df.iloc[4,1], df.iloc[5,1], df.iloc[6,1], df.iloc[7,1], df.iloc[8,1], df.iloc[9,1],df.iloc[10,1], df.iloc[11,1], df.iloc[12,1], df.iloc[15,1], df.iloc[16,1], df.iloc[17,1], df.iloc[18,1]] +colors = ['#9eab99','#f9ac86','#fc999f','#f9fc99','#afac86','#e9acd9','#cfac86','#bc999f','#b9fc99','#b9eff9','#a294f9','#fcac86','#fcc99f','#dc2c99','#bc1c86','#eeabb6','#9e2bb3'] + +# Plot +patches, texts, pct = plt.pie(sizes, colors=colors, autopct='%1d%%', startangle=90) +plt.legend(patches, labels, loc=(0.85, 0.12)) + +plt.axis('equal') + +plt.title("harsh accident types", loc='left', fontsize=13, fontweight=0, color='k') + +#plt.savefig(r'slide_25.11.3.png') + +plt.show() + +# Data to plot +labels = ['pedestrian', 'front side collision', 'Front to back', 'Side-side collision','Frontal Collision', 'Collision stopped vehicle','Collision parked vehicle', 'still collision', 'down road or sidewalk', 'overturning', 'skided', 'Injury to a passenger inside the vehicle', 'fall moving vehicle', 'back to front', 'back to side', 'animal', "car's luggage"] +sizes = [df.iloc[0,2], df.iloc[1,2], df.iloc[2,2], df.iloc[3,2], df.iloc[4,2], df.iloc[5,2], df.iloc[6,2], df.iloc[7,2], df.iloc[8,2], df.iloc[9,2],df.iloc[10,2], df.iloc[11,2], df.iloc[12,2], df.iloc[15,2], df.iloc[16,2], df.iloc[17,2], df.iloc[18,2]] +colors = ['#9eab99','#f9ac86','#fc999f','#f9fc99','#afac86','#e9acd9','#cfac86','#bc999f','#b9fc99','#b9eff9','#a294f9','#fcac86','#fcc99f','#dc2c99','#bc1c86','#eeabb6','#9e2bb3'] + +# Plot +patches, texts, pct = plt.pie(sizes, colors=colors, autopct='%1d%%', startangle=90) +plt.legend(patches, labels, loc=(0.85, 0.12)) + +plt.axis('equal') + +plt.title("fatal accident types", loc='left', fontsize=13, fontweight=0, color='k') + +#plt.savefig(r'slide_25.11.3.png') + +plt.show() + +''' +significant differencess were found for front to side collision. +the folowing graph is comparing the precents of accident front side collisions and other types of accidents +by accidents types on road 250020. +accident collisions relative part of harsh and fatal accidents is bigger than minor ones. +''' +#data_filtered_4_250020['front_side_collision'] = data_filtered_4_250020['accident_type'].replace(to_replace = {1:1,2:2,3:1,4:1,5:1,7:1,8:1,9:1,10:1,11:1,12:1,13:1,15:1,17:1,19:1}) +df1 = data_filtered_4_250020.groupby(['accident_severity','front_side_collision']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['accident_severity','front_side_collision','count'] + +df + + +from matplotlib import rc +# Data +r = [0,1,2] +df1 = df.iloc[[1,3,5],2] +df1 = df1.reset_index(drop = True) +df2 = df.iloc[[0,2,4],2] +df2 = df2.reset_index(drop = True) +df1 = pd.DataFrame(df1) +df1.insert(1,'count all',df2) +raw_data = {'firstBars': df1.iloc[:,0], 'secondBars':df1.iloc[:,1]} +df3 = pd.DataFrame(raw_data) + +# From raw value to percentage +totals = [i+j for i,j in zip(df3['firstBars'], df3['secondBars'])] +firstBars = [i / j * 100 for i,j in zip(df3['firstBars'], totals)] +secondBars = [i / j * 100 for i,j in zip(df3['secondBars'], totals)] + +# plot +barWidth = 0.85 +names = ('minor','harsh','fatal') +# Create green Bars +plt.bar(r, firstBars, color='#f3bc87', edgecolor='white', width=barWidth, label="front-side collision") +# Create orange Bars +plt.bar(r, secondBars, bottom=firstBars, color='#f6ffb5', edgecolor='white', width=barWidth, label="other") + +# Custom x axis +plt.xticks(r, names) +plt.xlabel("accident severity") + +# Text of each barplot +label1 = ['22%','24%','52%'] +label2 = ['78%', '76%', '48%'] + +for i in range(len(r)): + plt.text(x = r[i]-0.1 , y = 9, s = label1[i], size = 10) + plt.text(x = r[i]-0.1 , y = 60, s = label2[i], size = 10) + +# Add a legend +plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1) + +plt.title("more than 50% of the fatal accidents are front-side collision on road 250020", loc='left', fontsize=12, fontweight=0, color=('k')) + +plt.box(on=None) +plt.yticks([]) + +#plt.savefig(r'slide_25.7.2.png', bbox_inches='tight') +# Show graphic +plt.show() + +#data_filtered_4_250020['front_side_collision'] = data_filtered_4_250020['accident_type'].replace(to_replace = {1:1,2:2,3:1,4:1,5:1,7:1,8:1,9:1,10:1,11:1,12:1,13:1,15:1,17:1,19:1}) +df1 = data_filtered_4_250020.groupby(['day_night','front_side_collision']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = True) +df.columns =['day_night','front_side_collision','count'] +df = pd.pivot_table(df, values='count', index=['day_night'], + columns=['front_side_collision'], aggfunc=np.sum) +df.columns =['Other_250020','front_side_collision_250020'] +#df = df.apply(lambda x: x/x.sum()).round(2) +df + +data_filtered_4['front_side_collision'] = data_filtered_4['accident_type'].replace(to_replace = {1:1,2:2,3:1,4:1,5:1,6:1,7:1,8:1,9:1,10:1,11:1,12:1,13:1,14:1,15:1,16:1,17:1,18:1,19:1,20:1}) +df2 = data_filtered_4.groupby(['day_night','front_side_collision']).size() +df3 = pd.DataFrame(df2) +df_idx = df3.reset_index(inplace = True) +df3.columns =['day_night','front_side_collision','count'] +df3 = pd.pivot_table(df3, values='count', index=['day_night'], + columns=['front_side_collision'], aggfunc=np.sum) +df3.columns =['Other_4','front_side_collision_4'] +#df3 = df3.apply(lambda x: x/x.sum()).round(2) +df3 + +# Data +r = [0,1] +raw_data = {'firstBars': df.front_side_collision_250020, 'secondBars':df3.front_side_collision_4} +df4 = pd.DataFrame(raw_data) +# From raw value to percentage +totals = [i+j for i,j in zip(df4['firstBars'], df4['secondBars'])] +firstBars = [i / j * 100 for i,j in zip(df4.firstBars, totals)] +secondBars = [i / j * 100 for i,j in zip(df4.secondBars, totals)] + +# plot +barWidth = 0.85 +names = ('road 25','inter urban roads') +# Create first Bars +plt.bar(r, firstBars, color='khaki', edgecolor='white', width=barWidth, label="day") +# Create second Bars +plt.bar(r, secondBars, bottom=firstBars, color='dimgray', edgecolor='white', width=barWidth, label="night") + +# Custom x axis +plt.xticks(r, names) +plt.xlabel("road") + +# Text of each barplot +label1 = ['63%','70%'] +label2 = ['37%', '30%'] + +for i in range(len(r)): + plt.text(x = r[i]-0.1 , y = 25, s = label1[i], size = 10) + plt.text(x = r[i]-0.1 , y = 60, s = label2[i], size = 10) + +# Add a legend +plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1) + +plt.title("on road 250020 front side collision accidents accurr more often at night", loc='left', fontsize=12, fontweight=0, color=('k')) + +plt.box(on=None) +plt.yticks([]) + +#plt.savefig(r'slide_25.8.png', bbox_inches='tight') +# Show graphic +plt.show() + +#map of front side accidents, more common near juctions +accu_frontside = (data_filtered_4_250020['accident_type'] == 2) & (data_filtered_4_250020['location_accuracy'] == 1) +data_filtered_4_250020[accu_frontside].groupby(['latitude','longitude']) + +import folium +m = folium.Map(location=[31.198543, 34.923482], zoom_start=12) +for row in data_filtered_4_250020[accu_frontside][['longitude','latitude']].values: + try: + folium.Circle(radius=10, + location = [row[1], row[0]] + ).add_to(m) + except ValueError: + continue + +display(m) + + +''' +significant differencess were found for overturn accidents +the folowing graphs are presenting the differences (in percents) between road 250020 to Inter-urban roads count of overturn accidents. +overturns are more common on road 250020 +''' +data_filtered_4_250020['overturned'] = data_filtered_4_250020['accident_type'].replace(to_replace = {1:1,2:1,3:1,4:1,5:1,7:1,8:1,9:1,10:2,11:1,12:1,13:1,15:1,17:1,19:1}) +df1 = data_filtered_4_250020.groupby(['overturned']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] + +data_filtered_4['overturned'] = data_filtered_4['accident_type'].replace(to_replace = {1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1,10:2,11:1,12:1,13:1,14:1,15:1,16:1,17:1,18:1,19:1,20:1}) +df2 = data_filtered_4.groupby(['overturned']).size() +table = pd.DataFrame(df2) +df_idx = table.reset_index(inplace = False) +df.insert(1,'4',table) + +df + +# prepare data +names='overturned_250020', 'other_250020' +size=[df.iloc[1,0],df.iloc[0,0].sum()] + +# Create a circle for the center of the plot +my_circle=plt.Circle((0,0), 0.7, color='white') + +# Give color names +plt.pie(size, labels=names,autopct='%1.1f%%', wedgeprops = { 'linewidth' : 0.5, 'edgecolor' : 'white' }, colors=['#fb999f','#f6ffb5']) +p=plt.gcf() +p.gca().add_artist(my_circle) + +#plt.savefig(r'slide_25.10.1.png', bbox_inches='tight') + +# prepare data +names='overturned_4', 'other_4' +size=[df.iloc[1,1],df.iloc[0,1].sum()] + +# Create a circle for the center of the plot +my_circle=plt.Circle((0,0), 0.7, color='white') + +# Give color names +plt.pie(size, labels=names,autopct='%1.1f%%', wedgeprops = { 'linewidth' : 0.5, 'edgecolor' : 'white' }, colors=['#fb999f','#f6ffb5']) +p=plt.gcf() +p.gca().add_artist(my_circle) + +#plt.savefig(r'slide_25.10.2.png', bbox_inches='tight') + +#7 +''' + +''' +truck = data_4_250020['accident_type'] == 10 +data_4_250020['involve_vehicle_type2'] = data_4_250020['involve_vehicle_type'].replace(to_replace = {1:1,2:2,3:3,4:3,5:1,6:3}) +df1 = data_4_250020[truck].groupby(['involve_vehicle_type2']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_type_250020'] + +truck4 = data_4['accident_type'] == 10 +data_4['involve_vehicle_type2'] = data_4['involve_vehicle_type'].replace(to_replace = {1:1,2:2,3:3,4:3,5:3,6:3,7:3,8:3,9:3,10:3}) +df2 = data_4[truck4].groupby(['involve_vehicle_type2']).size() +table = pd.DataFrame(df2) +df_idx = table.reset_index(inplace = False) +df.insert(1,'accident_type_4',table) + +#df = df.apply(lambda x: x/x.sum()) + +df + +#graph 7 + +# Create bars +barWidth = 0.9 +bars1 = df.iloc[0,:] +bars2 = df.iloc[1,:] +bars3 = df.iloc[2,:] +bars4 = df + +# The X position of bars +r1 = [1,2] +r2 = [3,4] +r3 = [5,6] +r4 = r1 + r2 + r3 + + +# Create barplot +plt.bar(r1[0], bars1[0], width = barWidth, color = ('#f9f99f')) +plt.bar(r1[1], bars1[1], width = barWidth, color = ('#f9ac8f')) +plt.bar(r2[0], bars2[0], width = barWidth, color = ('#fbf970'), label='250020') +plt.bar(r2[1], bars2[1], width = barWidth, color = ('#fbac60'), label='4') +plt.bar(r3[0], bars3[0], width = barWidth, color = ('#f9f99f')) +plt.bar(r3[1], bars3[1], width = barWidth, color = ('#f9ac8f')) +# Note: the barplot could be created easily. See the barplot section for other examples. + +# Create legend +plt.legend(loc = 'upper right') + +# Text below each barplot with a rotation at 90° +plt.xticks([r + barWidth for r in range((6))], ['Private', 'Private', 'Truck', 'Truck', 'Other', 'Other'], rotation=90) + +# Create labels +label = ['61%', '62%', '30%', '21%', '8%', '16%'] +y = [0.61, 0.62, 0.30, 0.21, 0.08, 0.16] + +# Text on the top of each barplot +for i in range(len(r4)): + plt.text(x = r4[i]-0.3 , y = y[i]-0.03, s = label[i], size = 10, color = 'k') + +#Adjust the margins +plt.subplots_adjust(bottom= 0.2, top = 0.98) + +#title +plt.title("Trucks involved in overturned accidents on road 250020 more than on other inter urban roads", loc='left', fontsize=11, fontweight=0, color='k') + +plt.box(on=None) +plt.yticks([]) + +#plt.savefig(r'slide_25.11.1.png', bbox_inches='tight') + +# Show graphic +plt.show() + +overturn = data_filtered_4_250020['accident_type'] == 10 +df1 = data_filtered_4_250020[overturn].groupby(['involve_vehicle_type_t']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_type_250020'] +# Data to plot +labels = 'private', 'Cargo up to 3.5 tons - Unified', 'Cargo up to 3.5 tons - not unified', 'Freight 3.6 to 34 tons','other' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0], df.iloc[4,0] ] +colors = ['#cfac86','#f9ac86','#fc999f','#f9fc99','#afac86'] + + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("Trucks types involved in overturned accidents on road 250020\n", loc='left', fontsize=11, fontweight=0, color='k') + +#plt.savefig(r'slide_25.11.2.png') + +plt.show() + +notoverturn = data_filtered_4_250020['accident_type'] != 10 +df1 = data_filtered_4_250020[notoverturn].groupby(['involve_vehicle_type_t']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_type_250020'] +# Data to plot +labels = 'private', 'Cargo up to 3.5 tons - Unified', 'Cargo up to 3.5 tons - not unified', 'Freight 3.6 to 34 tons','other' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0], df.iloc[4,0] ] +colors = ['#cfac86','#f9ac86','#fc999f','#f9fc99','#afac86'] + + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +plt.title("Trucks types involved in overturned accidents on other urban inter roads\n", loc='left', fontsize=11, fontweight=0, color='k') + +#plt.savefig(r'slide_25.11.3.png') + +plt.show() + +animal = data_filtered_4_250020['accident_type'] == 19 +df1 = data_filtered_4_250020[animal].groupby(['day_night']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['accident_type_250020'] + +animal4 = data_filtered_4['accident_type'] == 19 +df2 = data_filtered_4[animal4].groupby(['day_night']).size() +table = pd.DataFrame(df2) +df_idx = table.reset_index(inplace = False) +df.insert(1,'accident_type_4',table) + +df = df.apply(lambda x: x/x.sum()) + +df + +# Data +r = [0,1] +raw_data = {'firstBars': df.iloc[0,:], 'secondBars':df.iloc[1,:]} +df4 = pd.DataFrame(raw_data) + +# From raw value to percentage +totals = [i+j for i,j in zip(df4['firstBars'], df4['secondBars'])] +firstBars = [i / j * 100 for i,j in zip(df4['firstBars'], totals)] +secondBars = [i / j * 100 for i,j in zip(df4['secondBars'], totals)] + +# plot +barWidth = 0.85 +names = ('250020','inter urban roads') +# Create green Bars +plt.bar(r, firstBars, color='khaki', edgecolor='white', width=barWidth, label="day") +# Create orange Bars +plt.bar(r, secondBars, bottom=firstBars, color='dimgray', edgecolor='white', width=barWidth, label="night") + +# Custom x axis +plt.xticks(r, names) +plt.xlabel("road") + +# Text of each barplot +label1 = ['50%','16%'] +label2 = ['50%', '84%'] + +for i in range(len(r)): + plt.text(x = r[i]-0.1 , y = 10, s = label1[i], size = 10) + plt.text(x = r[i]-0.1 , y = 75, s = label2[i], size = 10) + +# Add a legend +plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1) + +plt.title("on road 25 collision with an animal accurr more often at day", loc='left', fontsize=12, fontweight=0, color=('k')) + +plt.box(on=None) +plt.yticks([]) + +#plt.savefig(r'slide_25.12.png', bbox_inches='tight') + +# Show graphic +plt.show() + +pedestrian = data_250020['accident_type'] == 1 +df1 = data_250020[pedestrian].groupby(['population_type']).size() +df = pd.DataFrame(df1) +df_idx = df.reset_index(inplace = False) +df.columns =['250020'] +# Data to plot +labels = 'Jewish', 'Arab', 'Other', 'Forighn' +sizes = [df.iloc[0,0], df.iloc[1,0], df.iloc[2,0], df.iloc[3,0]] +colors = ['#f9ac86','#fc999f','#cfac86','#f9fc99'] +#explode = (0.2, 0) # explode 1st slice + +# Plot +plt.pie(sizes, labels=labels, colors=colors, +autopct='%1d%%', shadow=True, startangle=140) + +plt.axis('equal') + +#plt.savefig(r'slide25.13.1.png', bbox_inches='tight') + +plt.show() + +#most of the pedestrian hits happened near beduian settelments junctions + +pedestrian = data_250020['accident_type'] == 1 +data_250020[pedestrian].groupby(['latitude','longitude']) + +import folium +m = folium.Map(location=[31.198543, 34.923482], zoom_start=11) +for row in data_250020[pedestrian][['longitude','latitude']].values: + try: + folium.Circle(radius=10, + location = [row[1], row[0]] + ).add_to(m) + except ValueError: + continue + +display(m) + + + +