-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplotting_code.R
88 lines (78 loc) · 2.85 KB
/
plotting_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library(dplyr)
library(ggplot2)
library(magrittr)
library(scales)
library(SparkR)
###################
# Plot Histograms #
###################
# Set theme of plot
hist_theme <- theme(plot.title = element_text(size = 22, hjust = 0.5),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12))
# Plot hourly usage for weekday
png(filename = 'plots/hourly_usage_weekday.png', width = 700, height = 400)
ggplot() +
geom_bar(data = start_hour_weekday_group_df,
aes(x = start_hour - 0.2,
y = count,
fill = 'Start'),
stat = 'identity',
width = 0.4) +
geom_bar(data = end_hour_weekday_group_df,
aes(x = stop_hour + 0.2,
y = count,
fill = 'Stop'),
stat = 'identity',
width = 0.4) +
geom_vline(xintercept = 8.5, size = 1, linetype = 2) +
geom_vline(xintercept = 17.5, size = 1, linetype = 2) +
scale_fill_manual(values = c('royalblue', 'orangered3')) +
scale_y_continuous(labels = comma) +
hist_theme +
labs(title = 'Hourly Usage For Weekdays', x = 'Hour of Day', y = 'Frequency')
dev.off()
# Plot hourly usage for weekend
png(filename = 'plots/hourly_usage_weekend.png', width = 700, height = 400)
ggplot() +
geom_bar(data = start_hour_weekend_group_df,
aes(x = start_hour - 0.2,
y = count,
fill = 'Start'),
stat = 'identity',
width = 0.4) +
geom_bar(data = end_hour_weekend_group_df,
aes(x = stop_hour + 0.2,
y = count,
fill = 'Stop'),
stat = 'identity',
width = 0.4) +
scale_fill_manual(values = c('royalblue', 'orangered3')) +
scale_y_continuous(labels = comma) +
hist_theme +
labs(title = 'Hourly Usage for Weekends', x = 'Hour of Day', y = 'Frequency')
dev.off()
# Look at day of the week
weekday_count_df <- citi_bike_trips_sdf %>%
select(date_format(column('starttime'), 'E') %>%
alias('day_of_week')) %>%
select('day_of_week') %>%
groupBy('day_of_week') %>%
count() %>%
orderBy('day_of_week') %>%
collect()
# Create a mapping from day of week to number
weekday_vec <- 1:7
names(weekday_vec) <- c('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')
weekday_count_df$ordinal_number <- factor(weekday_vec[weekday_count_df$day_of_week])
# Sort by day of week
weekday_count_df <- weekday_count_df %>%
dplyr::arrange(ordinal_number)
ggplot(weekday_count_df, aes(x = ordinal_number, y = count)) +
geom_bar(stat = 'identity') +
labs(x = 'Day of Week', y = 'Frequency') +
scale_x_discrete(labels = names(weekday_vec))