forked from GoogleCloudPlatform/bigquery-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
query_audit.sql
140 lines (136 loc) · 5.9 KB
/
query_audit.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/*
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Create a user-friendly view over the query audit logs. */
WITH query_audit AS (
SELECT
protopayload_auditlog.authenticationInfo.principalEmail,
protopayload_auditlog.requestMetadata.callerIp,
protopayload_auditlog.serviceName,
protopayload_auditlog.methodName,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.eventName,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.projectId,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.jobId,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.createTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.code as errorCode,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.message as errorMessage,
TIMESTAMP_DIFF(
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, MILLISECOND) as runtimeMs,
TIMESTAMP_DIFF(
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, MILLISECOND) / 1000 as runtimeSecs,
CAST(CEIL((TIMESTAMP_DIFF(
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, MILLISECOND) / 1000) / 60) AS INT64) as executionMinuteBuckets,
CASE
WHEN
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalProcessedBytes IS NULL
AND protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalSlotMs IS NULL
AND protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.code IS NULL
THEN true
ELSE false
END as cached,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalSlotMs,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalTablesProcessed,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalViewsProcessed,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalProcessedBytes,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.billingTier,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobConfiguration.query,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.referencedTables,
protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.referencedViews
FROM
`project-id.dataset.cloudaudit_googleapis_com_data_access_*`
)
/* Query the audit */
SELECT
principalEmail,
callerIp,
serviceName,
methodName,
eventName,
projectId,
jobId,
CASE
WHEN REGEXP_CONTAINS(jobId, 'beam') THEN true
ELSE false
END as isBeamJob,
CASE
WHEN REGEXP_CONTAINS(query.query, 'cloudaudit_googleapis_com_data_access_') THEN true
ELSE false
END as isAuditDashboardQuery,
errorCode,
errorMessage,
CASE
WHEN errorCode IS NOT NULL THEN true
ELSE false
END as isError,
CASE
WHEN REGEXP_CONTAINS(errorMessage, 'timeout') THEN true
ELSE false
END as isTimeout,
STRUCT(
EXTRACT(MINUTE FROM createTime) as minuteOfDay,
EXTRACT(HOUR FROM createTime) as hourOfDay,
EXTRACT(DAYOFWEEK FROM createTime) - 1 as dayOfWeek,
EXTRACT(DAYOFYEAR FROM createTime) as dayOfYear,
EXTRACT(ISOWEEK FROM createTime) as week,
EXTRACT(MONTH FROM createTime) as month,
EXTRACT(QUARTER FROM createTime) as quarter,
EXTRACT(YEAR FROM createTime) as year
) as date,
createTime,
startTime,
endTime,
runtimeMs,
runtimeSecs,
cached,
totalSlotMs,
totalSlotMs / runtimeMs as avgSlots,
/* The following statement breaks down the query into minute buckets
* and provides the average slot usage within that minute. This is a
* crude way of making it so you can retrieve the average slot utilization
* for a particular minute across multiple queries.
*/
ARRAY(
SELECT
STRUCT(
TIMESTAMP_TRUNC(TIMESTAMP_ADD(startTime, INTERVAL bucket_num MINUTE), MINUTE) as time,
totalSlotMs / runtimeMs as avgSlotUsage
)
FROM
UNNEST(GENERATE_ARRAY(1, executionMinuteBuckets)) as bucket_num
) as executionTimeline,
totalTablesProcessed,
totalViewsProcessed,
totalProcessedBytes,
totalBilledBytes,
(totalBilledBytes / 1000000000) as totalBilledGigabytes,
(totalBilledBytes / 1000000000) / 1000 as totalBilledTerabytes,
((totalBilledBytes / 1000000000) / 1000) * 5 as estimatedCostUsd,
billingTier,
query,
referencedTables,
referencedViews,
1 as queries
FROM
query_audit
WHERE
serviceName = 'bigquery.googleapis.com'
AND methodName = 'jobservice.jobcompleted'
AND eventName = 'query_job_completed'