-
Notifications
You must be signed in to change notification settings - Fork 353
/
Build a Data Warehouse with BigQuery: Challenge Lab
174 lines (137 loc) · 4.26 KB
/
Build a Data Warehouse with BigQuery: Challenge Lab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
export DATASET_NAME_1=covid
export DATASET_NAME_2=covid_data
bq mk --dataset $DEVSHELL_PROJECT_ID:covid
bq query --use_legacy_sql=false \
"
CREATE OR REPLACE TABLE $DATASET_NAME_1.oxford_policy_tracker
PARTITION BY date
OPTIONS(
partition_expiration_days=1080,
description='oxford_policy_tracker table in the COVID 19 Government Response public dataset with an expiry time set to 90 days.'
) AS
SELECT
*
FROM
\`bigquery-public-data.covid19_govt_response.oxford_policy_tracker\`
WHERE
alpha_3_code NOT IN ('GBR', 'BRA', 'CAN','USA')
"
#TASK 2
bq query --use_legacy_sql=false \
"
ALTER TABLE $DATASET_NAME_2.global_mobility_tracker_data
ADD COLUMN population INT64,
ADD COLUMN country_area FLOAT64,
ADD COLUMN mobility STRUCT<
avg_retail FLOAT64,
avg_grocery FLOAT64,
avg_parks FLOAT64,
avg_transit FLOAT64,
avg_workplace FLOAT64,
avg_residential FLOAT64
>
"
#TASK 3
bq query --use_legacy_sql=false \
"
CREATE OR REPLACE TABLE $DATASET_NAME_2.pop_data_2019 AS
SELECT
country_territory_code,
pop_data_2019
FROM
\`bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide\`
GROUP BY
country_territory_code,
pop_data_2019
ORDER BY
country_territory_code
"
bq query --use_legacy_sql=false \
"
UPDATE
\`$DATASET_NAME_2.consolidate_covid_tracker_data\` t0
SET
population = t1.pop_data_2019
FROM
\`$DATASET_NAME_2.pop_data_2019\` t1
WHERE
CONCAT(t0.alpha_3_code) = CONCAT(t1.country_territory_code);
"
#TASK 4
bq query --use_legacy_sql=false \
"
UPDATE
\`$DATASET_NAME_2.consolidate_covid_tracker_data\` t0
SET
t0.country_area = t1.country_area
FROM
\`bigquery-public-data.census_bureau_international.country_names_area\` t1
WHERE
t0.country_name = t1.country_name
"
# BONUS_TASK
bq query --use_legacy_sql=false \
"
UPDATE
\`$DATASET_NAME_2.consolidate_covid_tracker_data\` t0
SET
t0.mobility.avg_retail = t1.avg_retail,
t0.mobility.avg_grocery = t1.avg_grocery,
t0.mobility.avg_parks = t1.avg_parks,
t0.mobility.avg_transit = t1.avg_transit,
t0.mobility.avg_workplace = t1.avg_workplace,
t0.mobility.avg_residential = t1.avg_residential
FROM
( SELECT country_region, date,
AVG(retail_and_recreation_percent_change_from_baseline) as avg_retail,
AVG(grocery_and_pharmacy_percent_change_from_baseline) as avg_grocery,
AVG(parks_percent_change_from_baseline) as avg_parks,
AVG(transit_stations_percent_change_from_baseline) as avg_transit,
AVG(workplaces_percent_change_from_baseline) as avg_workplace,
AVG(residential_percent_change_from_baseline) as avg_residential
FROM \`bigquery-public-data.covid19_google_mobility.mobility_report\`
GROUP BY country_region, date
) AS t1
WHERE
CONCAT(t0.country_name, t0.date) = CONCAT(t1.country_region, t1.date)
"
bq query --use_legacy_sql=false \
"
SELECT DISTINCT country_name
FROM \`$DATASET_NAME_2.oxford_policy_tracker_worldwide\`
WHERE population is NULL
UNION ALL
SELECT DISTINCT country_name
FROM \`$DATASET_NAME_2.oxford_policy_tracker_worldwide\`
WHERE country_area IS NULL
ORDER BY country_name ASC
"
# Create a new table for country area data
bq query --use_legacy_sql=false \
"
CREATE TABLE $DATASET_NAME_2.country_area_data AS
SELECT *
FROM \`bigquery-public-data.census_bureau_international.country_names_area\`;
"
# Create a new table 'mobility_data' in the 'covid_data' dataset
bq query --use_legacy_sql=false \
"CREATE TABLE $DATASET_NAME_2.mobility_data AS
SELECT *
FROM \`bigquery-public-data.covid19_google_mobility.mobility_report\`"
# Delete Null population and country area data from oxford_policy_tracker_by_countries table
bq query --use_legacy_sql=false \
"DELETE FROM covid_data.oxford_policy_tracker_by_countries
WHERE population IS NULL AND country_area IS NULL"
bq query --use_legacy_sql=false \
"
DELETE FROM \`covid_data.oxford_policy_tracker_by_countries\`
WHERE
\`population\` IS NULL AND
\`country_area\` IS NULL
"
# Delete rows with NULL population
bq query --use_legacy_sql=false \
"DELETE FROM \`covid_data.oxford_policy_tracker_by_countries\` WHERE population IS NULL;"
# Delete rows with NULL country_area
bq query --use_legacy_sql=false \
"DELETE FROM \`covid_data.oxford_policy_tracker_by_countries\` WHERE country_area IS NULL;"