Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tpcds refresh queries for sf1 #3

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions benchmarks/tpc-ds/ds_refresh.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"description": "Sequentially run the first refresh run for tpc-ds and then verify the results.",
"query_files": [
"queries/refresh/LF_CR.sql",
"queries/refresh/LF_CS.sql",
"queries/refresh/LF_SR.sql",
"queries/refresh/LF_SS.sql",
"queries/refresh/LF_WR.sql",
"queries/refresh/LF_WS.sql",
"queries/refresh/LF_I.sql",
"queries/refresh/verify_LF_CR_query_01.sql",
"queries/refresh/verify_LF_CR_query_02.sql",
"queries/refresh/verify_LF_CS_query_03.sql",
"queries/refresh/verify_LF_CS_query_04.sql",
"queries/refresh/verify_LF_SR_query_05.sql",
"queries/refresh/verify_LF_SR_query_06.sql",
"queries/refresh/verify_LF_SS_query_07.sql",
"queries/refresh/verify_LF_SS_query_08.sql",
"queries/refresh/verify_LF_WR_query_09.sql",
"queries/refresh/verify_LF_WR_query_10.sql",
"queries/refresh/verify_LF_WS_query_11.sql",
"queries/refresh/verify_LF_WS_query_12.sql",
"queries/refresh/verify_LF_I_query_13.sql",
"queries/refresh/verify_LF_I_query_14.sql"
]
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any expected number of rows returned?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the expected number of rows is actually an expression like >=2. Do we support such expressions currently @wanglinsong ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you tell if the verification is successful?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can decide based on the number of rows getting returned. The number of rows getting returned can be >=2, and I am not sure if such an expression is currently supported.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So there is a way to compare the output with a baseline result and is currently being used for ds_orderby flavor. We will extend that support for ds_refresh flavor as well.

94 changes: 94 additions & 0 deletions benchmarks/tpc-ds/queries/refresh/LF_CR.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
DROP VIEW IF EXISTS crv;

CREATE VIEW crv
AS
SELECT d_date_sk cr_return_date_sk,
t_time_sk cr_return_time_sk,
i_item_sk cr_item_sk,
c1.c_customer_sk cr_refunded_customer_sk,
c1.c_current_cdemo_sk cr_refunded_cdemo_sk,
c1.c_current_hdemo_sk cr_refunded_hdemo_sk,
c1.c_current_addr_sk cr_refunded_addr_sk,
c2.c_customer_sk cr_returning_customer_sk,
c2.c_current_cdemo_sk cr_returning_cdemo_sk,
c2.c_current_hdemo_sk cr_returning_hdemo_sk,
c2.c_current_addr_sk cr_returning_addr_sk,
cc_call_center_sk cr_call_center_sk,
cp_catalog_page_sk CR_CATALOG_PAGE_SK,
sm_ship_mode_sk CR_SHIP_MODE_SK,
w_warehouse_sk CR_WAREHOUSE_SK,
r_reason_sk cr_reason_sk,
cret_order_id cr_order_number,
cret_return_qty cr_return_quantity,
cret_return_amt cr_return_amount,
cret_return_tax cr_return_tax,
cret_return_amt + cret_return_tax AS cr_return_amt_inc_tax,
cret_return_fee cr_fee,
cret_return_ship_cost cr_return_ship_cost,
cret_refunded_cash cr_refunded_cash,
cret_reversed_charge cr_reversed_charge,
cret_merchant_credit cr_merchant_credit,
cret_return_amt + cret_return_tax + cret_return_fee - cret_refunded_cash-cret_reversed_charge-cret_merchant_credit
cr_net_loss
FROM s_catalog_returns_1
LEFT OUTER JOIN date_dim
ON ( CAST(cret_return_date AS DATE) = d_date )
LEFT OUTER JOIN time_dim
ON ( ( CAST(Substr(cret_return_time, 1, 2) AS INTEGER) *
3600 +
CAST(Substr(cret_return_time, 4
,
2) AS
INTEGER) * 60 +
CAST(Substr(cret_return_time, 7,
2)
AS
INTEGER) ) = t_time )
LEFT OUTER JOIN item
ON ( cret_item_id = i_item_id )
LEFT OUTER JOIN customer c1
ON ( cret_return_customer_id = c1.c_customer_id )
LEFT OUTER JOIN customer c2
ON ( cret_refund_customer_id = c2.c_customer_id )
LEFT OUTER JOIN reason
ON ( cret_reason_id = r_reason_id )
LEFT OUTER JOIN call_center
ON ( cret_call_center_id = cc_call_center_id )
LEFT OUTER JOIN catalog_page
ON ( cret_catalog_page_id = cp_catalog_page_id )
LEFT OUTER JOIN ship_mode
ON ( cret_shipmode_id = sm_ship_mode_id )
LEFT OUTER JOIN warehouse
ON ( cret_warehouse_id = w_warehouse_id )
WHERE i_rec_end_date IS NULL
AND cc_rec_end_date IS NULL;

INSERT INTO catalog_returns
SELECT cr_return_date_sk,
cr_return_time_sk,
cr_item_sk,
cr_refunded_customer_sk,
cr_refunded_cdemo_sk,
cr_refunded_hdemo_sk,
cr_refunded_addr_sk,
cr_returning_customer_sk,
cr_returning_cdemo_sk,
cr_returning_hdemo_sk,
cr_returning_addr_sk,
cr_call_center_sk,
cr_catalog_page_sk,
cr_ship_mode_sk,
cr_warehouse_sk,
cr_reason_sk,
cr_order_number,
cr_return_quantity,
cr_return_amount,
cr_return_tax,
cast(cr_return_amt_inc_tax as decimal(7,2)),
cr_fee,
cr_return_ship_cost,
cr_refunded_cash,
cr_reversed_charge,
cr_merchant_credit,
cast(cr_net_loss as decimal(7,2))
FROM crv;
128 changes: 128 additions & 0 deletions benchmarks/tpc-ds/queries/refresh/LF_CS.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
DROP VIEW IF EXISTS csv;

CREATE VIEW csv
AS
SELECT d1.d_date_sk cs_sold_date_sk,
t_time_sk cs_sold_time_sk,
d2.d_date_sk cs_ship_date_sk,
c1.c_customer_sk
cs_bill_customer_sk,
c1.c_current_cdemo_sk cs_bill_cdemo_sk
,
c1.c_current_hdemo_sk
cs_bill_hdemo_sk,
c1.c_current_addr_sk cs_bill_addr_sk,
c2.c_customer_sk
cs_ship_customer_sk,
c2.c_current_cdemo_sk cs_ship_cdemo_sk
,
c2.c_current_hdemo_sk
cs_ship_hdemo_sk,
c2.c_current_addr_sk cs_ship_addr_sk,
cc_call_center_sk
cs_call_center_sk,
cp_catalog_page_sk
cs_catalog_page_sk
,
sm_ship_mode_sk
cs_ship_mode_sk,
w_warehouse_sk cs_warehouse_sk,
i_item_sk cs_item_sk,
p_promo_sk cs_promo_sk,
cord_order_id cs_order_number,
clin_quantity cs_quantity,
i_wholesale_cost
cs_wholesale_cost,
i_current_price cs_list_price,
clin_sales_price cs_sales_price,
( i_current_price-clin_sales_price ) * clin_quantity
cs_ext_discount_amt,
clin_sales_price * clin_quantity
cs_ext_sales_price
,
i_wholesale_cost * clin_quantity
cs_ext_wholesale_cost,
i_current_price * clin_quantity
CS_EXT_LIST_PRICE,
i_current_price * cc_tax_percentage CS_EXT_TAX,
clin_coupon_amt cs_coupon_amt,
clin_ship_cost * clin_quantity CS_EXT_SHIP_COST
,
( clin_sales_price * clin_quantity ) - clin_coupon_amt
cs_net_paid,
( ( clin_sales_price * clin_quantity ) - clin_coupon_amt ) * ( 1 +
cc_tax_percentage )
cs_net_paid_inc_tax,
( clin_sales_price * clin_quantity ) - clin_coupon_amt + (
clin_ship_cost * clin_quantity )
CS_NET_PAID_INC_SHIP,
( clin_sales_price * clin_quantity ) - clin_coupon_amt + (
clin_ship_cost * clin_quantity ) + i_current_price * cc_tax_percentage
CS_NET_PAID_INC_SHIP_TAX,
( ( clin_sales_price * clin_quantity ) - clin_coupon_amt ) - (
clin_quantity * i_wholesale_cost ) cs_net_profit
FROM s_catalog_order_1
LEFT OUTER JOIN date_dim d1
ON ( CAST(cord_order_date AS DATE) = d1.d_date )
LEFT OUTER JOIN time_dim
ON ( cord_order_time = t_time )
LEFT OUTER JOIN customer c1
ON ( cord_bill_customer_id = c1.c_customer_id )
LEFT OUTER JOIN customer c2
ON ( cord_ship_customer_id = c2.c_customer_id )
LEFT OUTER JOIN call_center
ON ( cord_call_center_id = cc_call_center_id
AND cc_rec_end_date IS NULL )
LEFT OUTER JOIN ship_mode
ON ( cord_ship_mode_id = sm_ship_mode_id )
JOIN s_catalog_order_lineitem_1
ON ( cord_order_id = clin_order_id )
LEFT OUTER JOIN date_dim d2
ON ( CAST(clin_ship_date AS DATE) = d2.d_date )
LEFT OUTER JOIN catalog_page
ON ( clin_catalog_page_number = cp_catalog_page_number
AND clin_catalog_number = cp_catalog_number )
LEFT OUTER JOIN warehouse
ON ( clin_warehouse_id = w_warehouse_id )
LEFT OUTER JOIN item
ON ( clin_item_id = i_item_id
AND i_rec_end_date IS NULL )
LEFT OUTER JOIN promotion
ON ( clin_promotion_id = p_promo_id );

INSERT INTO catalog_sales
SELECT cs_sold_date_sk,
cs_sold_time_sk,
cs_ship_date_sk,
cs_bill_customer_sk,
cs_bill_cdemo_sk,
cs_bill_hdemo_sk,
cs_bill_addr_sk,
cs_ship_customer_sk,
cs_ship_cdemo_sk,
cs_ship_hdemo_sk,
cs_ship_addr_sk,
cs_call_center_sk,
cs_catalog_page_sk,
cs_ship_mode_sk,
cs_warehouse_sk,
cs_item_sk,
cs_promo_sk,
cs_order_number,
cast(cs_quantity as integer),
cs_wholesale_cost,
cs_list_price,
cs_sales_price,
cast(cs_ext_discount_amt as decimal(7,2)),
cast(cs_ext_sales_price as decimal(7,2)),
cast(cs_ext_wholesale_cost as decimal(7,2)),
cast(cs_ext_list_price as decimal(7,2)),
cast(cs_ext_tax as decimal(7,2)),
cs_coupon_amt,
cast(cs_ext_ship_cost as decimal(7,2)),
cast(cs_net_paid as decimal(7,2)),
cast(cs_net_paid_inc_tax as decimal(7,2)),
cast(cs_net_paid_inc_ship as decimal(7,2)),
cast(cs_net_paid_inc_ship_tax as decimal(7,2)),
cast(cs_net_profit as decimal(7,2))
FROM csv;
23 changes: 23 additions & 0 deletions benchmarks/tpc-ds/queries/refresh/LF_I.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
DROP VIEW IF EXISTS iv;

CREATE VIEW iv
AS
SELECT d_date_sk inv_date_sk,
i_item_sk inv_item_sk,
w_warehouse_sk inv_warehouse_sk,
invn_qty_on_hand inv_quantity_on_hand
FROM s_inventory_1
LEFT OUTER JOIN warehouse
ON ( invn_warehouse_id = w_warehouse_id )
LEFT OUTER JOIN item
ON ( invn_item_id = i_item_id
AND i_rec_end_date IS NULL )
LEFT OUTER JOIN date_dim
ON ( cast(d_date as varchar) = invn_date );

INSERT INTO inventory
SELECT inv_date_sk,
inv_item_sk,
inv_warehouse_sk,
inv_quantity_on_hand
FROM iv;
74 changes: 74 additions & 0 deletions benchmarks/tpc-ds/queries/refresh/LF_SR.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
DROP VIEW IF EXISTS srv;

CREATE VIEW srv
AS
SELECT d_date_sk sr_returned_date_sk,
t_time_sk sr_return_time_sk,
i_item_sk sr_item_sk,
c_customer_sk sr_customer_sk,
c_current_cdemo_sk sr_cdemo_sk,
c_current_hdemo_sk sr_hdemo_sk,
c_current_addr_sk sr_addr_sk,
s_store_sk sr_store_sk,
r_reason_sk sr_reason_sk,
sret_ticket_number sr_ticket_number,
sret_return_qty sr_return_quantity,
sret_return_amt sr_return_amt,
sret_return_tax sr_return_tax,
sret_return_amt + sret_return_tax sr_return_amt_inc_tax,
sret_return_fee sr_fee,
sret_return_ship_cost sr_return_ship_cost,
sret_refunded_cash sr_refunded_cash,
sret_reversed_charge sr_reversed_charge,
sret_store_credit sr_store_credit,
sret_return_amt + sret_return_tax
+ sret_return_fee
- sret_refunded_cash-sret_reversed_charge-sret_store_credit
sr_net_loss
FROM s_store_returns_1
LEFT OUTER JOIN date_dim
ON ( CAST(sret_return_date AS DATE) = d_date )
LEFT OUTER JOIN time_dim
ON ( ( CAST(Substr(sret_return_time, 1, 2) AS INTEGER) *
3600 +
CAST(Substr(sret_return_time, 4
,
2) AS
INTEGER) * 60 +
CAST(Substr(sret_return_time, 7,
2)
AS
INTEGER) ) = t_time )
LEFT OUTER JOIN item
ON ( sret_item_id = i_item_id )
LEFT OUTER JOIN customer
ON ( sret_customer_id = c_customer_id )
LEFT OUTER JOIN store
ON ( sret_store_id = s_store_id )
LEFT OUTER JOIN reason
ON ( sret_reason_id = r_reason_id )
WHERE i_rec_end_date IS NULL
AND s_rec_end_date IS NULL;

INSERT INTO store_returns
SELECT sr_returned_date_sk,
sr_return_time_sk,
sr_item_sk,
sr_customer_sk,
sr_cdemo_sk,
sr_hdemo_sk,
sr_addr_sk,
sr_store_sk,
sr_reason_sk,
cast(sr_ticket_number as bigint),
sr_return_quantity,
sr_return_amt,
sr_return_tax,
cast(sr_return_amt_inc_tax as decimal(7,2)),
sr_fee,
sr_return_ship_cost,
sr_refunded_cash,
sr_reversed_charge,
sr_store_credit,
cast(sr_net_loss as decimal(7,2))
FROM srv;
Loading