-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
210 lines (179 loc) · 7.11 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""Module for Sprint 6 project"""
import streamlit as st
import pandas as pd
import plotly.express as px
# Loaded the used cars dataset.
df = pd.read_csv('vehicles_us.csv')
# Created new column 'manufacturer'
# by getting the first word from the 'model' column.
df['manufacturer'] = df['model'].apply(lambda x: x.split()[0])
# Filled in missing values from the following columns:
# Replaced missing values from 'model_year'
# with median year grouped by model
df['model_year'] = df.groupby('model')['model_year'].fillna(df['model_year'].median())
# Replaced missing values from 'cylinders'
# with median cylinders grouped by model
df['cylinders'] = df.groupby('model')['cylinders'].fillna(df['cylinders'].median())
# Replaced missing values from 'odometer'
# with mean odometer grouped by year and model
df['odometer'] = df.groupby(['model_year','model'], sort=False)['odometer'].fillna(int(df['odometer'].mean()))
# Replaced missing values from 'is_4wd' with 0
df['is_4wd'] = df['is_4wd'].fillna(0)
# Renamed 'price' column to 'price_$'.
df.rename(columns={'price':'price_$'}, inplace=True)
# Renamed 'price' column to 'price_$'.
df.rename(columns={'price':'price_$'}, inplace=True)
# Renamed 'price' column to 'price_$'.
df.rename(columns={'price':'price_$'}, inplace=True)
# Added project title.
st.title('Sprint 6 Project: Used Vehicles Application')
# Added a project summary.
st.write('This software development project explores the vehicles_us.csv dataset.')
st.write('Follow this link for the Github repository: https://github.com/peachykeenani/sprint_6_app.')
# Added a divider.
st.header('', divider='rainbow')
st.header('')
# Created a text header above the DataFrame.
st.header('Data viewer')
# Displayed the DataFrame with Streamlit.
st.dataframe(df)
st.divider()
# Created a histogram of vehicle types by manufacturer.
st.header('Vehicle types by manufacturer')
# Created a Plotly histogram figure.
fig = px.histogram(df, x='manufacturer',
color='type')
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a histogram that explores the relationship
# between condition and model_year.
st.header('Histogram of `condition` vs `model_year`')
# Created a Plotly histogram figure.
fig = px.histogram(df, x='model_year',
color='condition')
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a histogram that compares the price distribution
# between two manufacturers.
st.header('Compare price distribution between manufacturers')
# Get a list of car manufacturers.
manufac_list = sorted(df['manufacturer'].unique())
# Get user's inputs from a dropdown menu.
manufacturer_1 = st.selectbox(
label='Select manufacturer 1', # Title of the select box
options=manufac_list, # Options listed in the select box
index=manufac_list.index('chevrolet') # Default pre-selected option
)
# Repeat for the second dropdown menu.
manufacturer_2 = st.selectbox(
label='Select manufacturer 2',
options=manufac_list,
index=manufac_list.index('hyundai')
)
# Filter the dataframe.
mask_filter = (df['manufacturer'] == manufacturer_1) | (df['manufacturer'] == manufacturer_2)
df_filtered = df[mask_filter]
# Add a checkbox if a user wants to normalize the histogram.
# Pass a unique key argument to st.selectbox.
counter = 0
normalize = st.checkbox('Normalize histogram', value=True, key=counter)
counter += 1
if normalize:
histnorm = 'percent'
else:
histnorm = None
# Created a plotly histogram figure.
fig = px.histogram(df_filtered,
x='price_$',
nbins=30,
color='manufacturer',
histnorm=histnorm,
barmode='overlay'
)
# Displayed figure with Streamlit.
st.write(fig)
st.divider()
# Created a histogram of vehicle condition by manufacturer.
st.header('Vehicle condition by manufacturer')
# Created a Plotly histogram figure.
fig = px.histogram(df, x='manufacturer',
color='condition'
)
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a histogram of sales totals by vehicle condition.
st.header('Total sales by vehicle condition')
# Created a Plotly histogram figure.
fig = px.histogram(df, x='condition',
y='price_$',
color='condition',
# Sorted bar chart by 'condition' in descending order.
category_orders={'condition':['excellent',
'good',
'like new',
'fair',
'new',
'salvage'
]
},
labels={'price_$':'sales USD'}
)
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a scatter plot that displays the relationship between vehicle price and condition.
st.header('Explore relationship between vehicle `price` and `condition`')
# Created a Plotly scatter plot figure.
fig = px.scatter(df, x='condition',
y='price_$',
color='condition'
)
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a box plot that compares price distribution of fuel types by vehicle transmission.
st.header('Comparing the price distribution of `fuel` types by vehicle `transmission`')
# Created a Plotly box plot figure.
fig = px.box(df, x='transmission',
y='price_$',
color='fuel',
points='suspectedoutliers'
)
# Excluded the median to divide the ordered dataset into tow halves.
fig.update_traces(quartilemethod='exclusive')
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a box plot that compares price distribution of 4WD vs. non-4WD vehicles by transmission.
st.header('Comparing the price distribution of `is_4wd` vehicles by `transmission`')
# Created a Plotly box plot figure.
fig = px.box(df, x='transmission',
y='price_$',
color='is_4wd',
points='suspectedoutliers'
)
# Excluded the median to divide the ordered dataset into tow halves.
fig.update_traces(quartilemethod='exclusive')
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()
# Created a scatter plot that displays the relationship between days listed and price.
st.header('Explore relationship between vehicle `price` and `days_listed`')
# Add a checkbox if a user wants to show trendline.
show_trend_ln = st.checkbox('Show trendline', value=False, key=counter)
if show_trend_ln:
trendline = 'expanding'
else:
trendline = None
# Created a Plotly scatter plot figure.
fig = px.scatter(df, x='price_$',
y='days_listed',
trendline=trendline,
trendline_scope='overall'
)
# Displayed the figure with Streamlit.
st.write(fig)
st.divider()