-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
28 lines (20 loc) · 892 Bytes
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os
import time
import duckdb
def get_file_size_in_gb(file_path):
file_size = os.path.getsize(file_path)
return file_size / (1024 * 1024 * 1024) #GB
con = duckdb.connect()
# con.execute("SET memory_limit='15GB'")
# con.execute("SET threads=8")
# con.execute("SET partitioned_write_flush_threshold=1024")
def run_query(query:str, folder_size:str):
print(f"DuckDB version: {duckdb.__version__}")
users_parquet_path = f'./data/{folder_size}/users.parquet'
orders_parquet_path = f'./data/{folder_size}/orders.parquet'
print(f"Size of users.parquet: {get_file_size_in_gb(users_parquet_path):.2f} GB")
print(f"Size of orders.parquet: {get_file_size_in_gb(orders_parquet_path):.2f} GB")
start_time = time.time()
result = con.execute(query).fetchdf()
end_time = time.time()
print(f"Query execution time: {end_time - start_time:.2f} seconds")