DUE TO SPAM, SIGN-UP IS DISABLED. Goto Selfserve wiki signup and request an account.
TPC-H Benchmark
TPC-H is an ad-hoc and decision support benchmark. Some of queries are available in the current Tajo. You can download the TPC-H data generator from here.
DDL for TPC-H datasets
TPC-H benchmark provides 8 table datasets. The below DDL statements is for them.
create external table supplier (
S_SUPPKEY bigint,
S_NAME text,
S_ADDRESS text,
S_NATIONKEY bigint,
S_PHONE text,
S_ACCTBAL double,
S_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table lineitem (
L_ORDERKEY bigint,
L_PARTKEY bigint,
L_SUPPKEY bigint,
L_LINENUMBER bigint,
L_QUANTITY double,
L_EXTENDEDPRICE double,
L_DISCOUNT double,
L_TAX double,
L_RETURNFLAG text,
L_LINESTATUS text,
L_SHIPDATE date,
L_COMMITDATE date,
L_RECEIPTDATE date,
L_SHIPINSTRUCT text,
L_SHIPMODE text,
L_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table part (
P_PARTKEY bigint,
P_NAME text,
P_MFGR text,
P_BRAND text,
P_TYPE text,
P_SIZE integer,
P_CONTAINER text,
P_RETAILPRICE double,
P_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table partsupp (
PS_PARTKEY bigint,
PS_SUPPKEY bigint,
PS_AVAILQTY int,
PS_SUPPLYCOST double,
PS_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table customer (
C_CUSTKEY bigint,
C_NAME text,
C_ADDRESS text,
C_NATIONKEY bigint,
C_PHONE text,
C_ACCTBAL double,
C_MKTSEGMENT text,
C_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table orders (
O_ORDERKEY bigint,
O_CUSTKEY bigint,
O_ORDERSTATUS text,
O_TOTALPRICE double,
O_ORDERDATE date,
O_ORDERPRIORITY text,
O_CLERK text,
O_SHIPPRIORITY int,
O_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table nation (
N_NATIONKEY bigint,
N_NAME text,
N_REGIONKEY bigint,
N_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
create external table region (
R_REGIONKEY bigint,
R_NAME text,
R_COMMENT text)
using text with ('text.delimiter'='|') location 'hdfs://x/y';
TPC-H Queries
Q1
select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice*(1-l_discount)) as sum_disc_price, sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= '1998-09-01'::date group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus
Q2
Tajo does not support sclar subquery yet. So, you should use multiple queries as follows:
create table nation_region as select n_regionkey, r_regionkey, n_nationkey, n_name, r_name from region join nation on n_regionkey = r_regionkey where r_name = 'EUROPE'; create table r2_1 as select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, ps_supplycost from nation_region join supplier on s_nationkey = n_nationkey join partsupp on s_suppkey = ps_suppkey join part on p_partkey = ps_partkey where p_size = 15 and p_type like '%BRASS'; create table r2_2 as select p_partkey, min(ps_supplycost) as min_ps_supplycost from r2_1 group by p_partkey; select s_acctbal, s_name, n_name, r2_1.p_partkey, p_mfgr, s_address, s_phone, s_comment from r2_1 join r2_2 on r2_1.p_partkey = r2_2.p_partkey where ps_supplycost = min_ps_supplycost order by s_acctbal, n_name, s_name, r2_1.p_partkey;
Q3
select l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority from customer as c join orders as o on c.c_mktsegment = 'BUILDING' and c.c_custkey = o.o_custkey join lineitem as l on l.l_orderkey = o.o_orderkey where o_orderdate < '1995-03-15'::date and l_shipdate > '1995-03-15'::date group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate;
Q5
select n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from customer, orders, lineitem, supplier, nation, region where c_custkey = o_custkey and l_orderkey = o_orderkey and l_suppkey = s_suppkey and c_nationkey = s_nationkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'ASIA' and o_orderdate >= '1994-01-01'::date and o_orderdate < '1995-01-01'::date group by n_name order by revenue desc;
Q6
select sum(l_extendedprice*l_discount) as revenue from lineitem where l_shipdate >= '1994-01-01'::date and l_shipdate < '1995-01-01'::date and l_discount >= 0.05 and l_discount <= 0.07 and l_quantity < 24;
Q10
select c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer as c join nation as n on c.c_nationkey = n.n_nationkey join orders as o on c.c_custkey = o.o_custkey and o.o_orderdate >= '1993-10-01'::date and o.o_orderdate < '1994-01-01'::date join lineitem as l on l.l_orderkey = o.o_orderkey and l.l_returnflag = 'R' group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by revenue desc
Q12
select l_shipmode, sum(case when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and (l_shipmode = 'MAIL' or l_shipmode = 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= '1994-01-01'::date and l_receiptdate < '1995-01-01'::date group by l_shipmode order by l_shipmode
Q14
select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice*(1-l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= '1995-09-01'::date and l_shipdate < '1995-10-01'::date
Q19
select
sum(l_extendedprice * (1 - l_discount) ) as revenue
from
lineitem,
part
where
(
p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1 and l_quantity <= 1 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
);