-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathheavy_query.py
38 lines (34 loc) · 934 Bytes
/
heavy_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
import sys
from base import run_query
os.environ["MALLOC_CONF"] = (
f"narenas:{os.cpu_count()},lg_chunk:21,background_thread:true,dirty_decay_ms:10000,muzzy_decay_ms:10000"
)
folder_size = sys.argv[1] if len(sys.argv) > 1 else ''
users_parquet_path = f'./data/{folder_size}/users.parquet'
orders_parquet_path = f'./data/{folder_size}/orders.parquet'
query = f"""
WITH joined_data AS (
SELECT
u.id AS user_id,
u.uuid,
u.name,
u.birthdate,
u.address,
u.telephone,
o.user_id AS order_user_id,
o.product_id,
o.price,
o.quantity,
o.order_timestamp
FROM '{users_parquet_path}' AS u
JOIN '{orders_parquet_path}' AS o
ON u.id = o.user_id
)
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_timestamp) AS new_number
FROM joined_data
"""
print("With window function:")
run_query(query, folder_size)