DUE TO SPAM, SIGN-UP IS DISABLED. Goto Selfserve wiki signup and request an account.
TPC-DS Benchmark
The TPC Benchmark DS (TPC-DS) is a decision support benchmark that models several generally applicable aspects of a decision support system, including queries and data maintenance. Although the underlying business model of TPC-DS is a retail product supplier, the database schema, data population, queries, data maintenance model and implementation rules have been designed to be broadly representative of modern decision support systems. (http://www.tpc.org/tpcds/)
This benchmark illustrates decision support systems that:
- Examine large volumes of data
- Give answers to real-world business questions
- Execute queries of various operational requirements and complexities (e.g., ad-hoc, reporting, iterative OLAP, data mining)
- Are characterized by high CPU and IO load
- Are periodically synchronized with source OLTP databases through database maintenance functions
Official scale factors of datasets are 100GB, 300GB, 1TB, 3TB, 10TB, 30TB and 100TB.
DDL for TPC-DS datasets
The TPC-DS schema models the sales and sales returns process for an organization that employs three primary sales channels: store, catalogs, and the Internet. The schema includes 7 fact tables and 17 dimension tables.
- Fact tables: store_sales, store_returns, catalog_sales, catalog_returns, web_sales, web_returns, inventory
- Dimension tables: store, call_center, catalog_page, web_site, web_page, warehouse, customer, customer_address, customer_demographics, date_dim, household_demographics, item, income_band, promotion, reason, ship_mode, time_dim
The following DDL statements are for them in alphabetical order.
create external table call_center (
cc_call_center_sk bigint,
cc_call_center_id text,
cc_rec_start_date date,
cc_rec_end_date date,
cc_closed_date_sk bigint,
cc_open_date_sk bigint,
cc_name text,
cc_class text,
cc_employees bigint,
cc_sq_ft bigint,
cc_hours text,
cc_manager text,
cc_mkt_id bigint,
cc_mkt_class text,
cc_mkt_desc text,
cc_market_manager text,
cc_division bigint,
cc_division_name text,
cc_company bigint,
cc_company_name text,
cc_street_number text,
cc_street_name text,
cc_street_type text,
cc_suite_number text,
cc_city text,
cc_county text,
cc_state text,
cc_zip text,
cc_country text,
cc_gmt_offset double,
cc_tax_percentage double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/call_center';
create external table catalog_page (
cp_catalog_page_sk bigint,
cp_catalog_page_id text,
cp_start_date_sk bigint,
cp_end_date_sk bigint,
cp_department text,
cp_catalog_number bigint,
cp_catalog_page_number bigint,
cp_description text,
cp_type text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/catalog_page';
create external table catalog_returns (
cr_returned_date_sk bigint,
cr_returned_time_sk bigint,
cr_item_sk bigint,
cr_refunded_customer_sk bigint,
cr_refunded_cdemo_sk bigint,
cr_refunded_hdemo_sk bigint,
cr_refunded_addr_sk bigint,
cr_returning_customer_sk bigint,
cr_returning_cdemo_sk bigint,
cr_returning_hdemo_sk bigint,
cr_returning_addr_sk bigint,
cr_call_center_sk bigint,
cr_catalog_page_sk bigint,
cr_ship_mode_sk bigint,
cr_warehouse_sk bigint,
cr_reason_sk bigint,
cr_order_number bigint,
cr_return_quantity bigint,
cr_return_amount double,
cr_return_tax double,
cr_return_amt_inc_tax double,
cr_fee double,
cr_return_ship_cost double,
cr_refunded_cash double,
cr_reversed_charge double,
cr_store_credit double,
cr_net_loss double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/catalog_returns';
create external table catalog_sales (
cs_sold_date_sk bigint,
cs_sold_time_sk bigint,
cs_ship_date_sk bigint,
cs_bill_customer_sk bigint,
cs_bill_cdemo_sk bigint,
cs_bill_hdemo_sk bigint,
cs_bill_addr_sk bigint,
cs_ship_customer_sk bigint,
cs_ship_cdemo_sk bigint,
cs_ship_hdemo_sk bigint,
cs_ship_addr_sk bigint,
cs_call_center_sk bigint,
cs_catalog_page_sk bigint,
cs_ship_mode_sk bigint,
cs_warehouse_sk bigint,
cs_item_sk bigint,
cs_promo_sk bigint,
cs_order_number bigint,
cs_quantity bigint,
cs_wholesale_cost double,
cs_list_price double,
cs_sales_price double,
cs_ext_discount_amt double,
cs_ext_sales_price double,
cs_ext_wholesale_cost double,
cs_ext_list_price double,
cs_ext_tax double,
cs_coupon_amt double,
cs_ext_ship_cost double,
cs_net_paid double,
cs_net_paid_inc_tax double,
cs_net_paid_inc_ship double,
cs_net_paid_inc_ship_tax double,
cs_net_profit double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/catalog_sales';
create external table customer (
c_customer_sk bigint,
c_customer_id text,
c_current_cdemo_sk bigint,
c_current_hdemo_sk bigint,
c_current_addr_sk bigint,
c_first_shipto_date_sk bigint,
c_first_sales_date_sk bigint,
c_salutation text,
c_first_name text,
c_last_name text,
c_preferred_cust_flag text,
c_birth_day bigint,
c_birth_month bigint,
c_birth_year bigint,
c_birth_country text,
c_login text,
c_email_address text,
c_last_review_date_sk bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/customer';
create external table customer_address (
ca_address_sk bigint,
ca_address_id text,
ca_street_number text,
ca_street_name text,
ca_street_type text,
ca_suite_number text,
ca_city text,
ca_county text,
ca_state text,
ca_zip text,
ca_country text,
ca_gmt_offset double,
ca_location_type text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/customer_address';
create external table customer_demographics (
cd_demo_sk bigint,
cd_gender text,
cd_marital_status text,
cd_education_status text,
cd_purchase_estimate bigint,
cd_credit_rating text,
cd_dep_count bigint,
cd_dep_employed_count bigint,
cd_dep_college_count bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/customer_demographics';
create external table date_dim (
d_date_sk bigint,
d_date_id text,
d_date date,
d_month_seq bigint,
d_week_seq bigint,
d_quarter_seq bigint,
d_year bigint,
d_dow bigint,
d_moy bigint,
d_dom bigint,
d_qoy bigint,
d_fy_year bigint,
d_fy_quarter_seq bigint,
d_fy_week_seq bigint,
d_day_name text,
d_quarter_name text,
d_holiday text,
d_weekend text,
d_following_holiday text,
d_first_dom bigint,
d_last_dom bigint,
d_same_day_ly bigint,
d_same_day_lq bigint,
d_current_day text,
d_current_week text,
d_current_month text,
d_current_quarter text,
d_current_year text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/date_dim';
create external table household_demographics (
hd_demo_sk bigint,
hd_income_band_sk bigint,
hd_buy_potential text,
hd_dep_count bigint,
hd_vehicle_count bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/household_demographics';
create external table income_band (
ib_income_band_sk bigint,
ib_lower_bound bigint,
ib_upper_bound bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/income_band';
create external table inventory (
inv_date_sk bigint,
inv_item_sk bigint,
inv_warehouse_sk bigint,
inv_quantity_on_hand bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/inventory';
create external table item (
i_item_sk bigint,
i_item_id text,
i_rec_start_date date,
i_rec_end_date date,
i_item_desc text,
i_current_price double,
i_wholesale_cost double,
i_brand_id bigint,
i_brand text,
i_class_id bigint,
i_class text,
i_category_id bigint,
i_category text,
i_manufact_id bigint,
i_manufact text,
i_size text,
i_formulation text,
i_color text,
i_units text,
i_container text,
i_manager_id bigint,
i_product_name text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/item';
create external table promotion (
p_promo_sk bigint,
p_promo_id text,
p_start_date_sk bigint,
p_end_date_sk bigint,
p_item_sk bigint,
p_cost double,
p_response_target bigint,
p_promo_name text,
p_channel_dmail text,
p_channel_email text,
p_channel_catalog text,
p_channel_tv text,
p_channel_radio text,
p_channel_press text,
p_channel_event text,
p_channel_demo text,
p_channel_details text,
p_purpose text,
p_discount_active text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/promotion';
create external table reason (
r_reason_sk bigint,
r_reason_id text,
r_reason_desc text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/reason';
create external table ship_mode (
sm_ship_mode_sk bigint,
sm_ship_mode_id text,
sm_type text,
sm_code text,
sm_carrier text,
sm_contract text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/ship_mode';
create external table store (
s_store_sk bigint,
s_store_id text,
s_rec_start_date date,
s_rec_end_date date,
s_closed_date_sk bigint,
s_store_name text,
s_number_employees bigint,
s_floor_space bigint,
s_hours text,
s_manager text,
s_market_id bigint,
s_geography_class text,
s_market_desc text,
s_market_manager text,
s_division_id bigint,
s_division_name text,
s_company_id bigint,
s_company_name text,
s_street_number text,
s_street_name text,
s_street_type text,
s_suite_number text,
s_city text,
s_county text,
s_state text,
s_zip text,
s_country text,
s_gmt_offset double,
s_tax_precentage double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/store';
create external table store_returns (
sr_returned_date_sk bigint,
sr_return_time_sk bigint,
sr_item_sk bigint,
sr_customer_sk bigint,
sr_cdemo_sk bigint,
sr_hdemo_sk bigint,
sr_addr_sk bigint,
sr_store_sk bigint,
sr_reason_sk bigint,
sr_ticket_number bigint,
sr_return_quantity bigint,
sr_return_amt double,
sr_return_tax double,
sr_return_amt_inc_tax double,
sr_fee double,
sr_return_ship_cost double,
sr_refunded_cash double,
sr_reversed_charge double,
sr_store_credit double,
sr_net_loss double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/store_returns';
create external table store_sales (
ss_sold_date_sk bigint,
ss_sold_time_sk bigint,
ss_item_sk bigint,
ss_customer_sk bigint,
ss_cdemo_sk bigint,
ss_hdemo_sk bigint,
ss_addr_sk bigint,
ss_store_sk bigint,
ss_promo_sk bigint,
ss_ticket_number bigint,
ss_quantity bigint,
ss_wholesale_cost double,
ss_list_price double,
ss_sales_price double,
ss_ext_discount_amt double,
ss_ext_sales_price double,
ss_ext_wholesale_cost double,
ss_ext_list_price double,
ss_ext_tax double,
ss_coupon_amt double,
ss_net_paid double,
ss_net_paid_inc_tax double,
ss_net_profit double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/store_sales';
create external table time_dim (
t_time_sk bigint,
t_time_id text,
t_time bigint,
t_hour bigint,
t_minute bigint,
t_second bigint,
t_am_pm text,
t_shift text,
t_sub_shift text,
t_meal_time text
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/time_dim';
create external table warehouse (
w_warehouse_sk bigint,
w_warehouse_id text,
w_warehouse_name text,
w_warehouse_sq_ft bigint,
w_street_number text,
w_street_name text,
w_street_type text,
w_suite_number text,
w_city text,
w_county text,
w_state text,
w_zip text,
w_country text,
w_gmt_offset double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/warehouse';
create external table web_page (
wp_web_page_sk bigint,
wp_web_page_id text,
wp_rec_start_date date,
wp_rec_end_date date,
wp_creation_date_sk bigint,
wp_access_date_sk bigint,
wp_autogen_flag text,
wp_customer_sk bigint,
wp_url text,
wp_type text,
wp_char_count bigint,
wp_link_count bigint,
wp_image_count bigint,
wp_max_ad_count bigint
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/web_page';
create external table web_returns (
wr_returned_date_sk bigint,
wr_returned_time_sk bigint,
wr_item_sk bigint,
wr_refunded_customer_sk bigint,
wr_refunded_cdemo_sk bigint,
wr_refunded_hdemo_sk bigint,
wr_refunded_addr_sk bigint,
wr_returning_customer_sk bigint,
wr_returning_cdemo_sk bigint,
wr_returning_hdemo_sk bigint,
wr_returning_addr_sk bigint,
wr_web_page_sk bigint,
wr_reason_sk bigint,
wr_order_number bigint,
wr_return_quantity bigint,
wr_return_amt double,
wr_return_tax double,
wr_return_amt_inc_tax double,
wr_fee double,
wr_return_ship_cost double,
wr_refunded_cash double,
wr_reversed_charge double,
wr_account_credit double,
wr_net_loss double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/web_returns';
create external table web_sales (
ws_sold_date_sk bigint,
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint,
ws_bill_customer_sk bigint,
ws_bill_cdemo_sk bigint,
ws_bill_hdemo_sk bigint,
ws_bill_addr_sk bigint,
ws_ship_customer_sk bigint,
ws_ship_cdemo_sk bigint,
ws_ship_hdemo_sk bigint,
ws_ship_addr_sk bigint,
ws_web_page_sk bigint,
ws_web_site_sk bigint,
ws_ship_mode_sk bigint,
ws_warehouse_sk bigint,
ws_promo_sk bigint,
ws_order_number bigint,
ws_quantity bigint,
ws_wholesale_cost double,
ws_list_price double,
ws_sales_price double,
ws_ext_discount_amt double,
ws_ext_sales_price double,
ws_ext_wholesale_cost double,
ws_ext_list_price double,
ws_ext_tax double,
ws_coupon_amt double,
ws_ext_ship_cost double,
ws_net_paid double,
ws_net_paid_inc_tax double,
ws_net_paid_inc_ship double,
ws_net_paid_inc_ship_tax double,
ws_net_profit double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/web_sales';
create external table web_site (
web_site_sk bigint,
web_site_id text,
web_rec_start_date date,
web_rec_end_date date,
web_name text,
web_open_date_sk bigint,
web_close_date_sk bigint,
web_class text,
web_manager text,
web_mkt_id bigint,
web_mkt_class text,
web_mkt_desc text,
web_market_manager text,
web_company_id bigint,
web_company_name text,
web_street_number text,
web_street_name text,
web_street_type text,
web_suite_number text,
web_city text,
web_county text,
web_state text,
web_zip text,
web_country text,
web_gmt_offset double,
web_tax_percentage double
)
using text with ('text.delimiter'='|') location 'hdfs://x/y/web_site';
TPC-DS Queries
TPC-DS provides 99 queries. Since Tajo does not support some features like WITH clause, ROLLUP operation, and STDDEV_SAMP function, I omit queries containing them. The following are 26 queries tested on Tajo.
select d_year, i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) sum_agg from date_dim, store_sales, item where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manufact_id = 436 and d_moy=12 group by d_year, i_brand, i_brand_id order by d_year, sum_agg desc, brand_id limit 100;
select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4
from store_sales, customer_demographics, date_dim, item, promotion
where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and ss_cdemo_sk = cd_demo_sk and ss_promo_sk = p_promo_sk and
cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and
(p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998
group by i_item_id
order by i_item_id
limit 100;
select ca_zip, sum(cs_sales_price)
from catalog_sales, customer, customer_address, date_dim
where cs_bill_customer_sk = c_customer_sk and c_current_addr_sk = ca_address_sk and
(substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
or ca_state in ('CA','WA','GA') or cs_sales_price > 500) and
cs_sold_date_sk = d_date_sk and d_qoy = 2 and d_year = 2000
group by ca_zip
order by ca_zip
limit 100;
select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price
from date_dim, store_sales, item, customer, customer_address, store
where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manager_id=7 and d_moy=11 and d_year=1999 and
ss_customer_sk = c_customer_sk and c_current_addr_sk = ca_address_sk and substr(ca_zip,1,5) <> substr(s_zip,1,5) and
ss_store_sk = s_store_sk
group by i_brand, i_brand_id, i_manufact_id, i_manufact
order by ext_price desc, i_brand, i_brand_id, i_manufact_id, i_manufact
limit 100;
select *
from (select w_warehouse_name, i_item_id,
sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) then inv_quantity_on_hand else 0 end) as inv_before,
sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) then inv_quantity_on_hand else 0 end) as inv_after
from inventory, warehouse, item, date_dim
where i_current_price between 0.99 and 1.49 and i_item_sk = inv_item_sk and inv_warehouse_sk = w_warehouse_sk and
inv_date_sk = d_date_sk and d_date between (cast ('1998-04-08' as date) - 30) and (cast ('1998-04-08' as date) + 30)
group by w_warehouse_name, i_item_id) x
where (case when inv_before > 0 then inv_after / inv_before else null end) between 2.0/3.0 and 3.0/2.0
order by w_warehouse_name, i_item_id
limit 100;
select i_item_id, i_item_desc, s_store_id, s_store_name,
sum(ss_net_profit) as store_sales_profit, sum(sr_net_loss) as store_returns_loss, sum(cs_net_profit) as catalog_sales_profit
from store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item
where d1.d_moy = 4 and d1.d_year = 2000 and d1.d_date_sk = ss_sold_date_sk and
i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and
ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and
sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 10 and d2.d_year = 2000 and
sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and
cs_sold_date_sk = d3.d_date_sk and d3.d_moy between 4 and 10 and d3.d_year = 2000
group by i_item_id, i_item_desc, s_store_id, s_store_name
order by i_item_id, i_item_desc, s_store_id, s_store_name
limit 100;
select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4
from catalog_sales, customer_demographics, date_dim, item, promotion
where cs_sold_date_sk = d_date_sk and cs_item_sk = i_item_sk and cs_bill_cdemo_sk = cd_demo_sk and
cs_promo_sk = p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and
(p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998
group by i_item_id
order by i_item_id
limit 100;
select i_item_id, i_item_desc, s_store_id, s_store_name,
sum(ss_quantity) as store_sales_quantity,
sum(sr_return_quantity) as store_returns_quantity,
sum(cs_quantity) as catalog_sales_quantity
from store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item
where d1.d_moy = 4 and d1.d_year = 1999 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and
s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and
ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 4 + 3 and
d2.d_year = 1999 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and
cs_sold_date_sk = d3.d_date_sk and d3.d_year in (1999,1999+1,1999+2)
group by i_item_id, i_item_desc, s_store_id, s_store_name
order by i_item_id, i_item_desc, s_store_id, s_store_name
limit 100;
select c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, cnt
from (select ss_ticket_number, ss_customer_sk, count(*) cnt
from store_sales,date_dim,store,household_demographics
where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and
store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and
(date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) and
(household_demographics.hd_buy_potential = '>10000' or household_demographics.hd_buy_potential = 'unknown') and
household_demographics.hd_vehicle_count > 0 and
(case when household_demographics.hd_vehicle_count > 0
then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count
else null end) > 1.2 and
date_dim.d_year in (1998,1998+1,1998+2) and
store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
'Williamson County','Williamson County','Williamson County','Williamson County')
group by ss_ticket_number,ss_customer_sk) dn, customer
where ss_customer_sk = c_customer_sk and cnt between 15 and 20
order by c_last_name, c_first_name, c_salutation, c_preferred_cust_flag desc;
select i_item_id, i_item_desc, i_current_price
from item, inventory, date_dim, catalog_sales
where i_current_price between 22 and 22 + 30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and
d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60) and
i_manufact_id in (678,964,918,849) and inv_quantity_on_hand between 100 and 500 and cs_item_sk = i_item_sk
group by i_item_id, i_item_desc, i_current_price
order by i_item_id
limit 100;
select w_state, i_item_id,
sum(case when (cast(d_date as date) < cast ('1998-04-08' as date))
then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before ,
sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date))
then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
from catalog_sales left outer join catalog_returns on (cs_order_number = cr_order_number and cs_item_sk = cr_item_sk),
warehouse, item, date_dim
where i_current_price between 0.99 and 1.49 and i_item_sk = cs_item_sk and cs_warehouse_sk = w_warehouse_sk and cs_sold_date_sk = d_date_sk and
d_date between (cast ('1998-04-08' as date) - 30) and (cast ('1998-04-08' as date) + 30)
group by w_state,i_item_id
order by w_state,i_item_id
limit 100;
select d_year, i_category_id, i_category, sum(ss_ext_sales_price) from date_dim, store_sales, item where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manager_id = 1 and d_moy=12 and d_year=1998 group by d_year, i_category_id, i_category order by sum(ss_ext_sales_price) desc, d_year, i_category_id, i_category limit 100;
select s_store_name, s_store_id,
sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
from date_dim, store_sales, store
where d_date_sk = ss_sold_date_sk and s_store_sk = ss_store_sk and s_gmt_offset = -5 and d_year = 1998
group by s_store_name, s_store_id
order by s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, thu_sales, fri_sales, sat_sales
limit 100;
select c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, amt, profit
from (select ss_ticket_number, ss_customer_sk, ca_city bought_city, sum(ss_coupon_amt) amt, sum(ss_net_profit) profit
from store_sales,date_dim,store,household_demographics,customer_address
where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and
store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and
(household_demographics.hd_dep_count = 5 or household_demographics.hd_vehicle_count= 3) and
date_dim.d_dow in (6,0) and date_dim.d_year in (1999,1999+1,1999+2) and
store.s_city in ('Midway','Fairview','Fairview','Fairview','Fairview')
group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn, customer, customer_address current_addr
where ss_customer_sk = c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city
order by c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number
limit 100;
select s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip,
sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days",
sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and
(sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days",
sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and
(sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days",
sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
(sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days",
sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
from store_sales, store_returns, store, date_dim d1, date_dim d2
where d2.d_year = 2000 and d2.d_moy = 9 and ss_ticket_number = sr_ticket_number and ss_item_sk = sr_item_sk and
ss_sold_date_sk = d1.d_date_sk and sr_returned_date_sk = d2.d_date_sk and ss_customer_sk = sr_customer_sk and
ss_store_sk = s_store_sk
group by s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip
order by s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip
limit 100;
select d_year, i_brand_id brand_id ,i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim dt, store_sales, item where d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = i_item_sk and i_manager_id = 1 and d_moy=12 and d_year=1998 group by d_year, i_brand, i_brand_id order by d_year, ext_price desc, brand_id limit 100;
select i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item where d_date_sk = ss_sold_date_sk and ss_item_sk = i_item_sk and i_manager_id=36 and d_moy=12 and d_year=2001 group by i_brand, i_brand_id order by ext_price desc, i_brand_id limit 100;
select substr(w_warehouse_name,1,20), sm_type, web_name,
sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days",
sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and
(ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end) as "31-60 days",
sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and
(ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days",
sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and
(ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days",
sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
from web_sales, warehouse, ship_mode, web_site, date_dim
where d_month_seq between 1212 and 1212 + 11 and ws_ship_date_sk = d_date_sk and ws_warehouse_sk = w_warehouse_sk and
ws_ship_mode_sk = sm_ship_mode_sk and ws_web_site_sk = web_site_sk
group by substr(w_warehouse_name,1,20), sm_type, web_name
order by substr(w_warehouse_name,1,20), sm_type, web_name
limit 100;
select s_store_name, i_item_desc, sc.revenue, i_current_price, i_wholesale_cost, i_brand
from store, item,
(select ss_store_sk, avg(revenue) as ave
from (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
from store_sales, date_dim
where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
group by ss_store_sk, ss_item_sk) sa
group by ss_store_sk) sb,
(select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
from store_sales, date_dim
where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
group by ss_store_sk, ss_item_sk) sc
where sb.ss_store_sk = sc.ss_store_sk and sc.revenue <= 0.1 * sb.ave and s_store_sk = sc.ss_store_sk and i_item_sk = sc.ss_item_sk
order by s_store_name, i_item_desc
limit 100;
select c_last_name ,c_first_name, ca_city, bought_city, ss_ticket_number, extended_price, extended_tax, list_price
from (select ss_ticket_number, ss_customer_sk, ca_city bought_city,
sum(ss_ext_sales_price) extended_price, sum(ss_ext_list_price) list_price, sum(ss_ext_tax) extended_tax
from store_sales, date_dim, store, household_demographics, customer_address
where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and
store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and
date_dim.d_dom between 1 and 2 and
(household_demographics.hd_dep_count = 5 or household_demographics.hd_vehicle_count= 3) and
date_dim.d_year in (1999,1999+1,1999+2) and store.s_city in ('Midway','Fairview')
group by ss_ticket_number, ss_customer_sk, ss_addr_sk,ca_city) dn,
customer, customer_address current_addr
where ss_customer_sk = c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city
order by c_last_name, ss_ticket_number
limit 100;
select channel, col_name, d_year, d_qoy, i_category, count(*) sales_cnt, SUM(ext_sales_price) sales_amt
from (select 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
from store_sales, item, date_dim
where ss_addr_sk IS NULL and ss_sold_date_sk=d_date_sk and ss_item_sk=i_item_sk
union all
select 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
from web_sales, item, date_dim
where ws_web_page_sk IS NULL and ws_sold_date_sk=d_date_sk and ws_item_sk=i_item_sk
union all
select 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
from catalog_sales, item, date_dim
where cs_warehouse_sk IS NULL and cs_sold_date_sk=d_date_sk and cs_item_sk=i_item_sk) foo
group by channel, col_name, d_year, d_qoy, i_category
order by channel, col_name, d_year, d_qoy, i_category
limit 100;
select c_last_name, c_first_name, substr(s_city,1,30), ss_ticket_number, amt, profit
from (select ss_ticket_number, ss_customer_sk, store.s_city, sum(ss_coupon_amt) amt, sum(ss_net_profit) profit
from store_sales,date_dim,store,household_demographics
where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and
store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and
(household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) and
date_dim.d_dow = 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_number_employees between 200 and 295
group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,
customer
where ss_customer_sk = c_customer_sk
order by c_last_name,c_first_name,substr(s_city,1,30), profit
limit 100;
select c_customer_id as customer_id, c_last_name || ', ' || c_first_name as customername
from customer, customer_address, customer_demographics, household_demographics, income_band, store_returns
where ca_city = 'Hopewell' and c_current_addr_sk = ca_address_sk and
ib_lower_bound >= 32287 and ib_upper_bound <= 32287 + 50000 and ib_income_band_sk = hd_income_band_sk and
cd_demo_sk = c_current_cdemo_sk and hd_demo_sk = c_current_hdemo_sk and sr_cdemo_sk = cd_demo_sk
order by c_customer_id
limit 100;
select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where cr_call_center_sk = cc_call_center_sk and cr_returned_date_sk = d_date_sk and cr_returning_customer_sk = c_customer_sk and cd_demo_sk = c_current_cdemo_sk and hd_demo_sk = c_current_hdemo_sk and ca_address_sk = c_current_addr_sk and d_year = 1999 and d_moy = 11 and ((cd_marital_status = 'M' and cd_education_status = 'Unknown') or (cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by sum(cr_net_loss) desc;
select ss_customer_sk ,sum(act_sales) sumsales
from (select ss_item_sk, ss_ticket_number, ss_customer_sk,
case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
else (ss_quantity*ss_sales_price) end act_sales
from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk and sr_ticket_number = ss_ticket_number), reason
where sr_reason_sk = r_reason_sk and r_reason_desc = 'Did not like the warranty') t
group by ss_customer_sk
order by sumsales, ss_customer_sk
limit 100;
select substr(w_warehouse_name,1,20), sm_type, cc_name,
sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days",
sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
(cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days",
sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
(cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days",
sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
(cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days",
sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days"
from catalog_sales, warehouse, ship_mode, call_center, date_dim
where d_month_seq between 1212 and 1212 + 11 and cs_ship_date_sk = d_date_sk and
cs_warehouse_sk = w_warehouse_sk and cs_ship_mode_sk = sm_ship_mode_sk and cs_call_center_sk = cc_call_center_sk
group by substr(w_warehouse_name,1,20), sm_type, cc_name
order by substr(w_warehouse_name,1,20), sm_type, cc_name
limit 100;
Reference
- http://www.tpc.org/TPC_Documents_Current_Versions/pdf/tpcds_1.3.1.pdf
- https://github.com/cloudera/impala-tpcds-kit
- https://github.com/hortonworks/hive-testbench