-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
64 lines (53 loc) · 1.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import base64
import json
import os
from google.cloud import bigquery
from render import convert_rows_to_tables, render
project_id = os.getenv("PROJECT_ID")
client = bigquery.Client(project=project_id)
def handler(event: dict, _: dict) -> None:
dataset_name, table_name = _extract_data(event)
metadata_query = f'''
WITH
/* Get tables with all associated column names */
tab2col AS (
SELECT
table_name,
ARRAY_AGG(DISTINCT column_name) AS column_names
FROM
`{project_id}`.{dataset_name}.INFORMATION_SCHEMA.COLUMNS
WHERE
table_name LIKE "{table_name}\\\\_v\\\\_%"
GROUP BY
table_name),
/* Get columns with all associated data types */
col2typ AS (
SELECT
column_name,
ARRAY_AGG(DISTINCT data_type) AS data_types
FROM
`{project_id}`.{dataset_name}.INFORMATION_SCHEMA.COLUMNS
WHERE
table_name LIKE "{table_name}\\\\_v\\\\_%"
GROUP BY
column_name)
/* Get combination of table names with column definitions */
SELECT
tab2col.table_name,
ARRAY(
SELECT AS STRUCT
col2typ.column_name AS name,
col2typ.data_types AS types
FROM
UNNEST(tab2col.column_names) AS col_name
JOIN
col2typ ON col_name = col2typ.column_name) AS cols
FROM
tab2col;'''
rows = client.query(metadata_query)
table_versions = convert_rows_to_tables(rows)
view_query = render(project_id, dataset_name, table_name, table_versions)
client.query(view_query).result()
def _extract_data(event: dict) -> (str, str):
data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
return data['dataset'], data['table']['name']