Skip to content

Commit

Permalink
Merge pull request #448 from ww24/add-bigquery-routine
Browse files Browse the repository at this point in the history
feat(terraform): add with_geolocation function
  • Loading branch information
ww24 authored Dec 29, 2022
2 parents 44da480 + 1f07512 commit 039ddcb
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 1 deletion.
23 changes: 22 additions & 1 deletion terraform/access_log.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ resource "google_bigquery_dataset" "access_log" {

resource "google_bigquery_table" "access_log" {
dataset_id = google_bigquery_dataset.access_log.dataset_id
table_id = "${local.name}_access_log"
table_id = "access_log"
clustering = ["timestamp"]
schema = file("access_log_schema/v1.json")

Expand All @@ -50,6 +50,27 @@ resource "google_bigquery_table" "access_log" {
}
}

resource "google_bigquery_routine" "with_geolocation" {
dataset_id = google_bigquery_dataset.access_log.dataset_id
routine_id = "with_geolocation"
routine_type = "TABLE_VALUED_FUNCTION"
language = "SQL"
definition_body = templatefile("geolite2/function_with_geolocation.sql", {
project = var.project,
dataset = google_bigquery_dataset.access_log.dataset_id,
})
arguments {
name = "since"
argument_kind = "FIXED_TYPE"
data_type = jsonencode({ "typeKind" : "TIMESTAMP" })
}
arguments {
name = "until"
argument_kind = "FIXED_TYPE"
data_type = jsonencode({ "typeKind" : "TIMESTAMP" })
}
}

resource "google_storage_bucket" "geolite2" {
project = var.project
name = var.geolite2_bucket
Expand Down
28 changes: 28 additions & 0 deletions terraform/geolite2/function_with_geolocation.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- with_geolocation function
-- CREATE OR REPLACE TABLE FUNCTION ${dataset}.with_geolocation(since TIMESTAMP, until TIMESTAMP) AS
WITH
access_logs AS (SELECT *
FROM `${project}.${dataset}.access_log`
WHERE `timestamp` BETWEEN since AND until),
geolocations AS (SELECT *
FROM `${project}.geolite2.GeoLite2_City_*`
WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', DATE(since)))
SELECT * FROM access_logs
LEFT JOIN (
WITH ips AS (SELECT DISTINCT ip FROM access_logs)
-- IPv4 address => country, city
SELECT ip, country, city FROM (
SELECT NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), mask) network, *
FROM ips, UNNEST(GENERATE_ARRAY(8,32)) mask
WHERE ip LIKE '%.%'
)
JOIN geolocations USING (network, mask)
UNION ALL
-- IPv6 address => country, city
SELECT ip, country, city FROM (
SELECT NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), mask) network, *
FROM ips, UNNEST(GENERATE_ARRAY(19,64)) mask
WHERE ip LIKE '%:%'
)
JOIN geolocations USING (network, mask)
) USING (ip)

0 comments on commit 039ddcb

Please sign in to comment.