diff --git a/terraform/access_log.tf b/terraform/access_log.tf index f6a74bb5..323bb05c 100644 --- a/terraform/access_log.tf +++ b/terraform/access_log.tf @@ -38,7 +38,7 @@ resource "google_bigquery_dataset" "access_log" { resource "google_bigquery_table" "access_log" { dataset_id = google_bigquery_dataset.access_log.dataset_id - table_id = "${local.name}_access_log" + table_id = "access_log" clustering = ["timestamp"] schema = file("access_log_schema/v1.json") @@ -50,6 +50,27 @@ resource "google_bigquery_table" "access_log" { } } +resource "google_bigquery_routine" "with_geolocation" { + dataset_id = google_bigquery_dataset.access_log.dataset_id + routine_id = "with_geolocation" + routine_type = "TABLE_VALUED_FUNCTION" + language = "SQL" + definition_body = templatefile("geolite2/function_with_geolocation.sql", { + project = var.project, + dataset = google_bigquery_dataset.access_log.dataset_id, + }) + arguments { + name = "since" + argument_kind = "FIXED_TYPE" + data_type = jsonencode({ "typeKind" : "TIMESTAMP" }) + } + arguments { + name = "until" + argument_kind = "FIXED_TYPE" + data_type = jsonencode({ "typeKind" : "TIMESTAMP" }) + } +} + resource "google_storage_bucket" "geolite2" { project = var.project name = var.geolite2_bucket diff --git a/terraform/geolite2/function_with_geolocation.sql b/terraform/geolite2/function_with_geolocation.sql new file mode 100644 index 00000000..6061564f --- /dev/null +++ b/terraform/geolite2/function_with_geolocation.sql @@ -0,0 +1,28 @@ +-- with_geolocation function +-- CREATE OR REPLACE TABLE FUNCTION ${dataset}.with_geolocation(since TIMESTAMP, until TIMESTAMP) AS +WITH + access_logs AS (SELECT * + FROM `${project}.${dataset}.access_log` + WHERE `timestamp` BETWEEN since AND until), + geolocations AS (SELECT * + FROM `${project}.geolite2.GeoLite2_City_*` + WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', DATE(since))) +SELECT * FROM access_logs +LEFT JOIN ( + WITH ips AS (SELECT DISTINCT ip FROM access_logs) + -- IPv4 address => country, city + SELECT ip, country, city FROM ( + SELECT NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), mask) network, * + FROM ips, UNNEST(GENERATE_ARRAY(8,32)) mask + WHERE ip LIKE '%.%' + ) + JOIN geolocations USING (network, mask) + UNION ALL + -- IPv6 address => country, city + SELECT ip, country, city FROM ( + SELECT NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), mask) network, * + FROM ips, UNNEST(GENERATE_ARRAY(19,64)) mask + WHERE ip LIKE '%:%' + ) + JOIN geolocations USING (network, mask) +) USING (ip)