diff --git a/.gitignore b/.gitignore index 7482630..1a2e520 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /iceberg /iceberg-test .env +/benchmark/tpch-kit +/benchmark/data/*.tbl diff --git a/Makefile b/Makefile index a95bc34..ae5ea76 100644 --- a/Makefile +++ b/Makefile @@ -24,3 +24,12 @@ lint: outdated: devbox run "cd src && go list -u -m -f '{{if and .Update (not .Indirect)}}{{.}}{{end}}' all" + +pg-init: + devbox run initdb + +pg-up: + devbox services start postgresql + +pg-down: + devbox services stop postgresql diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..183cf98 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,74 @@ +# BemiDB Bechmark + +## Running the TPC-H Benchmark + +Set up a local PostgreSQL database: + +```sh +# Install PostgreSQL +make pg-init +make pg-up + +# Create the database +make sh +createdb tpch + +# Load the tables' structure +cd ./benchmark/data +psql tpch -f ./structure.ddl + +# Download and unzip "TPC-H_generated_data.zip" from the latest release into the "benchmark/data" directory +# And load the data +for i in `ls *.tbl`; do + table=${i/.tbl/} + echo "Loading $table..." + sed 's/|$//' $i > /tmp/$i + psql tpch -q -c "TRUNCATE $table" + psql tpch -c "\\copy $table FROM '/tmp/$i' CSV DELIMITER '|'" +done + +# Run the queries +psql tpch -c "ANALYZE VERBOSE" +time psql tpch < ../queries.sql +``` + +## Generating the TPC-H Data + +Install the TPC-H benchmark kit: + +```sh +make sh +cd benchmark +git clone https://github.com/gregrahn/tpch-kit.git +cd tpch-kit/dbgen +make MACHINE=MACOS DATABASE=POSTGRESQL # Use MACHINE=LINUX for Linux +cd - +``` + +Copy tables' structure DDL file: + +```sh +export DSS_PATH="$(pwd)/data" +export DSS_CONFIG=./tpch-kit/dbgen +cp ./tpch-kit/dbgen/dss.ddl ./data/structure.ddl +``` + +Generate the data: + +```sh +./tpch-kit/dbgen/dbgen -vf -s 0.1 # Generate ~100MB of data +``` + +Generate the queries: + +```sh +mkdir /tmp/query-templates +for i in `ls query-templates/*.sql`; do + tac $i | sed '2s/;//' | tac > /tmp/$i # Remove ";" +done + +export DSS_QUERY=/tmp/query-templates +cd ./tpch-kit/dbgen +./qgen -v -s 0.1 | sed 's/limit -1//' | sed 's/day (3)/day/' > ../../queries.sql +cd - +``` diff --git a/benchmark/data/structure.ddl b/benchmark/data/structure.ddl new file mode 100644 index 0000000..89efe56 --- /dev/null +++ b/benchmark/data/structure.ddl @@ -0,0 +1,70 @@ +-- Sccsid: @(#)dss.ddl 2.1.8.1 +CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL, + N_NAME CHAR(25) NOT NULL, + N_REGIONKEY INTEGER NOT NULL, + N_COMMENT VARCHAR(152)); + +CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL, + R_NAME CHAR(25) NOT NULL, + R_COMMENT VARCHAR(152)); + +CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL, + P_NAME VARCHAR(55) NOT NULL, + P_MFGR CHAR(25) NOT NULL, + P_BRAND CHAR(10) NOT NULL, + P_TYPE VARCHAR(25) NOT NULL, + P_SIZE INTEGER NOT NULL, + P_CONTAINER CHAR(10) NOT NULL, + P_RETAILPRICE DECIMAL(15,2) NOT NULL, + P_COMMENT VARCHAR(23) NOT NULL ); + +CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL, + S_NAME CHAR(25) NOT NULL, + S_ADDRESS VARCHAR(40) NOT NULL, + S_NATIONKEY INTEGER NOT NULL, + S_PHONE CHAR(15) NOT NULL, + S_ACCTBAL DECIMAL(15,2) NOT NULL, + S_COMMENT VARCHAR(101) NOT NULL); + +CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL, + PS_SUPPKEY INTEGER NOT NULL, + PS_AVAILQTY INTEGER NOT NULL, + PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, + PS_COMMENT VARCHAR(199) NOT NULL ); + +CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL, + C_NAME VARCHAR(25) NOT NULL, + C_ADDRESS VARCHAR(40) NOT NULL, + C_NATIONKEY INTEGER NOT NULL, + C_PHONE CHAR(15) NOT NULL, + C_ACCTBAL DECIMAL(15,2) NOT NULL, + C_MKTSEGMENT CHAR(10) NOT NULL, + C_COMMENT VARCHAR(117) NOT NULL); + +CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL, + O_CUSTKEY INTEGER NOT NULL, + O_ORDERSTATUS CHAR(1) NOT NULL, + O_TOTALPRICE DECIMAL(15,2) NOT NULL, + O_ORDERDATE DATE NOT NULL, + O_ORDERPRIORITY CHAR(15) NOT NULL, + O_CLERK CHAR(15) NOT NULL, + O_SHIPPRIORITY INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL); + +CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL); + diff --git a/benchmark/queries.sql b/benchmark/queries.sql new file mode 100644 index 0000000..e49837d --- /dev/null +++ b/benchmark/queries.sql @@ -0,0 +1,694 @@ +-- TPC TPC-H Parameter Substitution (Version 2.17.3 build 0) +-- using 1730817251 as a seed to the RNG + + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '113' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus +; + + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 31 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AMERICA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AMERICA' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100; + + +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'FURNITURE' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-09' + and l_shipdate > date '1995-03-09' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10; + + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1994-05-01' + and o_orderdate < date '1994-05-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority +; + + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'MIDDLE EAST' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name +order by + revenue desc +; + + +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between 0.02 - 0.01 and 0.02 + 0.01 + and l_quantity < 25 +; + + +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'ARGENTINA' and n2.n_name = 'ROMANIA') + or (n1.n_name = 'ROMANIA' and n2.n_name = 'ARGENTINA') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year +; + + +select + o_year, + sum(case + when nation = 'ROMANIA' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'EUROPE' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED BRASS' + ) as all_nations +group by + o_year +order by + o_year +; + + +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%peach%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc +; + + +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1994-02-01' + and o_orderdate < date '1994-02-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20; + + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ETHIOPIA' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0010000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ETHIOPIA' + ) +order by + value desc +; + + +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('REG AIR', 'RAIL') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode +; + + +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%pending%packages%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc +; + + +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' month +; + + with revenue (supplier_no, total_revenue) as ( + select + l_suppkey, + sum(l_extendedprice * (1-l_discount)) + from + lineitem + where + l_shipdate >= date '1995-04-01' + and l_shipdate < date '1995-04-01' + interval '3' month + group by + l_suppkey + ) + + + select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue + from + supplier, + revenue + where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue + ) + order by + s_suppkey + ; + + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#53' + and p_type not like 'LARGE POLISHED%' + and p_size in (40, 33, 6, 45, 3, 20, 19, 25) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size +; + + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container = 'JUMBO DRUM' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ) +; + + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 314 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100; + + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#44' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 3 and l_quantity <= 3 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#15' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 14 and l_quantity <= 14 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 27 and l_quantity <= 27 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) +; + + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'orchid%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1993-01-01' + and l_shipdate < date '1993-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'RUSSIA' +order by + s_name +; + + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'ARGENTINA' +group by + s_name +order by + numwait desc, + s_name +limit 100; + + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('32', '11', '19', '12', '22', '29', '20') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('32', '11', '19', '12', '22', '29', '20') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode +; diff --git a/benchmark/query-templates/1.sql b/benchmark/query-templates/1.sql new file mode 100644 index 0000000..d030959 --- /dev/null +++ b/benchmark/query-templates/1.sql @@ -0,0 +1,28 @@ +-- $ID$ +-- TPC-H/TPC-R Pricing Summary Report Query (Q1) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval ':1' day (3) +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; +:n -1 diff --git a/benchmark/query-templates/10.sql b/benchmark/query-templates/10.sql new file mode 100644 index 0000000..aa52f8e --- /dev/null +++ b/benchmark/query-templates/10.sql @@ -0,0 +1,38 @@ +-- $ID$ +-- TPC-H/TPC-R Returned Item Reporting Query (Q10) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date ':1' + and o_orderdate < date ':1' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc; +:n 20 diff --git a/benchmark/query-templates/11.sql b/benchmark/query-templates/11.sql new file mode 100644 index 0000000..8d8b744 --- /dev/null +++ b/benchmark/query-templates/11.sql @@ -0,0 +1,34 @@ +-- $ID$ +-- TPC-H/TPC-R Important Stock Identification Query (Q11) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = ':1' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * :2 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = ':1' + ) +order by + value desc; +:n -1 diff --git a/benchmark/query-templates/12.sql b/benchmark/query-templates/12.sql new file mode 100644 index 0000000..2cc7a0f --- /dev/null +++ b/benchmark/query-templates/12.sql @@ -0,0 +1,35 @@ +-- $ID$ +-- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in (':1', ':2') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date ':3' + and l_receiptdate < date ':3' + interval '1' year +group by + l_shipmode +order by + l_shipmode; +:n -1 diff --git a/benchmark/query-templates/13.sql b/benchmark/query-templates/13.sql new file mode 100644 index 0000000..20dd693 --- /dev/null +++ b/benchmark/query-templates/13.sql @@ -0,0 +1,27 @@ +-- $ID$ +-- TPC-H/TPC-R Customer Distribution Query (Q13) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%:1%:2%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc; +:n -1 diff --git a/benchmark/query-templates/14.sql b/benchmark/query-templates/14.sql new file mode 100644 index 0000000..27c5b47 --- /dev/null +++ b/benchmark/query-templates/14.sql @@ -0,0 +1,20 @@ +-- $ID$ +-- TPC-H/TPC-R Promotion Effect Query (Q14) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date ':1' + and l_shipdate < date ':1' + interval '1' month; +:n -1 diff --git a/benchmark/query-templates/15.sql b/benchmark/query-templates/15.sql new file mode 100644 index 0000000..9d41d3c --- /dev/null +++ b/benchmark/query-templates/15.sql @@ -0,0 +1,39 @@ +-- $ID$ +-- TPC-H/TPC-R Top Supplier Query (Q15) +-- Variant A +-- Approved February 1998 +:x + with revenue (supplier_no, total_revenue) as ( + select + l_suppkey, + sum(l_extendedprice * (1-l_discount)) + from + lineitem + where + l_shipdate >= date ':1' + and l_shipdate < date ':1' + interval '3' month + group by + l_suppkey + ) + + :o + select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue + from + supplier, + revenue + where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue + ) + order by + s_suppkey; + :n -1 diff --git a/benchmark/query-templates/16.sql b/benchmark/query-templates/16.sql new file mode 100644 index 0000000..f07a965 --- /dev/null +++ b/benchmark/query-templates/16.sql @@ -0,0 +1,37 @@ +-- $ID$ +-- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> ':1' + and p_type not like ':2%' + and p_size in (:3, :4, :5, :6, :7, :8, :9, :10) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; +:n -1 diff --git a/benchmark/query-templates/17.sql b/benchmark/query-templates/17.sql new file mode 100644 index 0000000..bca3f1b --- /dev/null +++ b/benchmark/query-templates/17.sql @@ -0,0 +1,24 @@ +-- $ID$ +-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = ':1' + and p_container = ':2' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); +:n -1 diff --git a/benchmark/query-templates/18.sql b/benchmark/query-templates/18.sql new file mode 100644 index 0000000..3f7e125 --- /dev/null +++ b/benchmark/query-templates/18.sql @@ -0,0 +1,39 @@ +-- $ID$ +-- TPC-H/TPC-R Large Volume Customer Query (Q18) +-- Function Query Definition +-- Approved February 1998 +:x +:o +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > :1 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate; +:n 100 diff --git a/benchmark/query-templates/19.sql b/benchmark/query-templates/19.sql new file mode 100644 index 0000000..a9c6e5d --- /dev/null +++ b/benchmark/query-templates/19.sql @@ -0,0 +1,42 @@ +-- $ID$ +-- TPC-H/TPC-R Discounted Revenue Query (Q19) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = ':1' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= :4 and l_quantity <= :4 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = ':2' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= :5 and l_quantity <= :5 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = ':3' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= :6 and l_quantity <= :6 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); +:n -1 diff --git a/benchmark/query-templates/2.sql b/benchmark/query-templates/2.sql new file mode 100644 index 0000000..2c941f5 --- /dev/null +++ b/benchmark/query-templates/2.sql @@ -0,0 +1,50 @@ +-- $ID$ +-- TPC-H/TPC-R Minimum Cost Supplier Query (Q2) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = :1 + and p_type like '%:2' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = ':3' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = ':3' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey; +:n 100 diff --git a/benchmark/query-templates/20.sql b/benchmark/query-templates/20.sql new file mode 100644 index 0000000..23cecc7 --- /dev/null +++ b/benchmark/query-templates/20.sql @@ -0,0 +1,44 @@ +-- $ID$ +-- TPC-H/TPC-R Potential Part Promotion Query (Q20) +-- Function Query Definition +-- Approved February 1998 +:x +:o +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like ':1%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date ':2' + and l_shipdate < date ':2' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = ':3' +order by + s_name; +:n -1 diff --git a/benchmark/query-templates/21.sql b/benchmark/query-templates/21.sql new file mode 100644 index 0000000..380bcfd --- /dev/null +++ b/benchmark/query-templates/21.sql @@ -0,0 +1,46 @@ +-- $ID$ +-- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = ':1' +group by + s_name +order by + numwait desc, + s_name; +:n 100 diff --git a/benchmark/query-templates/22.sql b/benchmark/query-templates/22.sql new file mode 100644 index 0000000..3cf5b61 --- /dev/null +++ b/benchmark/query-templates/22.sql @@ -0,0 +1,44 @@ +-- $ID$ +-- TPC-H/TPC-R Global Sales Opportunity Query (Q22) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + (':1', ':2', ':3', ':4', ':5', ':6', ':7') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + (':1', ':2', ':3', ':4', ':5', ':6', ':7') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; +:n -1 diff --git a/benchmark/query-templates/3.sql b/benchmark/query-templates/3.sql new file mode 100644 index 0000000..7c38828 --- /dev/null +++ b/benchmark/query-templates/3.sql @@ -0,0 +1,29 @@ +-- $ID$ +-- TPC-H/TPC-R Shipping Priority Query (Q3) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = ':1' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date ':2' + and l_shipdate > date ':2' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate; +:n 10 diff --git a/benchmark/query-templates/4.sql b/benchmark/query-templates/4.sql new file mode 100644 index 0000000..8a99c8d --- /dev/null +++ b/benchmark/query-templates/4.sql @@ -0,0 +1,28 @@ +-- $ID$ +-- TPC-H/TPC-R Order Priority Checking Query (Q4) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date ':1' + and o_orderdate < date ':1' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; +:n -1 diff --git a/benchmark/query-templates/5.sql b/benchmark/query-templates/5.sql new file mode 100644 index 0000000..499a735 --- /dev/null +++ b/benchmark/query-templates/5.sql @@ -0,0 +1,31 @@ +-- $ID$ +-- TPC-H/TPC-R Local Supplier Volume Query (Q5) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = ':1' + and o_orderdate >= date ':2' + and o_orderdate < date ':2' + interval '1' year +group by + n_name +order by + revenue desc; +:n -1 diff --git a/benchmark/query-templates/6.sql b/benchmark/query-templates/6.sql new file mode 100644 index 0000000..8698a28 --- /dev/null +++ b/benchmark/query-templates/6.sql @@ -0,0 +1,16 @@ +-- $ID$ +-- TPC-H/TPC-R Forecasting Revenue Change Query (Q6) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date ':1' + and l_shipdate < date ':1' + interval '1' year + and l_discount between :2 - 0.01 and :2 + 0.01 + and l_quantity < :3; +:n -1 diff --git a/benchmark/query-templates/7.sql b/benchmark/query-templates/7.sql new file mode 100644 index 0000000..d1a4441 --- /dev/null +++ b/benchmark/query-templates/7.sql @@ -0,0 +1,46 @@ +-- $ID$ +-- TPC-H/TPC-R Volume Shipping Query (Q7) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = ':1' and n2.n_name = ':2') + or (n1.n_name = ':2' and n2.n_name = ':1') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; +:n -1 diff --git a/benchmark/query-templates/8.sql b/benchmark/query-templates/8.sql new file mode 100644 index 0000000..677e06f --- /dev/null +++ b/benchmark/query-templates/8.sql @@ -0,0 +1,44 @@ +-- $ID$ +-- TPC-H/TPC-R National Market Share Query (Q8) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + o_year, + sum(case + when nation = ':1' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = ':2' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = ':3' + ) as all_nations +group by + o_year +order by + o_year; +:n -1 diff --git a/benchmark/query-templates/9.sql b/benchmark/query-templates/9.sql new file mode 100644 index 0000000..1d63511 --- /dev/null +++ b/benchmark/query-templates/9.sql @@ -0,0 +1,39 @@ +-- $ID$ +-- TPC-H/TPC-R Product Type Profit Measure Query (Q9) +-- Functional Query Definition +-- Approved February 1998 +:x +:o +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%:1%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; +:n -1 diff --git a/devbox.json b/devbox.json index c3b030a..05aa359 100644 --- a/devbox.json +++ b/devbox.json @@ -1,7 +1,8 @@ { "$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.13.1/.schema/devbox.schema.json", "packages": [ - "go@latest" + "go@latest", + "postgresql@latest" ], "shell": { "init_hook": [], diff --git a/devbox.lock b/devbox.lock index 845f5a1..5a0fa73 100644 --- a/devbox.lock +++ b/devbox.lock @@ -48,6 +48,131 @@ "store_path": "/nix/store/mi0ybwsm6pmxzv9hsm6bcbqaq1pkf8wh-go-1.23.1" } } + }, + "postgresql@latest": { + "last_modified": "2024-10-24T16:50:28Z", + "plugin_version": "0.0.2", + "resolved": "github:NixOS/nixpkgs/63487b2f26fa065cfeeaa47dddb08e2856ba53e8#postgresql", + "source": "devbox-search", + "version": "16.4", + "systems": { + "aarch64-darwin": { + "outputs": [ + { + "name": "out", + "path": "/nix/store/6dzxj78wph840cpwslh96s4gpm0iwch2-postgresql-16.4", + "default": true + }, + { + "name": "man", + "path": "/nix/store/z1n2vh799a5icpaxbrjfqsasagb276bk-postgresql-16.4-man", + "default": true + }, + { + "name": "dev", + "path": "/nix/store/afjpl8ilq8s6j6zh4qqyy6mxz3v2xbav-postgresql-16.4-dev" + }, + { + "name": "doc", + "path": "/nix/store/ry9d9by692xj92y5b9j6z0aa5y3lh3px-postgresql-16.4-doc" + }, + { + "name": "lib", + "path": "/nix/store/d1im42w02x8gl2y380r4hgj8xgkkkbwc-postgresql-16.4-lib" + } + ], + "store_path": "/nix/store/6dzxj78wph840cpwslh96s4gpm0iwch2-postgresql-16.4" + }, + "aarch64-linux": { + "outputs": [ + { + "name": "out", + "path": "/nix/store/37r0vmsb8xd1kv3wjd99kr59q99ja3g0-postgresql-16.4", + "default": true + }, + { + "name": "man", + "path": "/nix/store/mawnv85hv5y64csbmpgrnz88j7r8cby5-postgresql-16.4-man", + "default": true + }, + { + "name": "debug", + "path": "/nix/store/71hz4hv1n6ivymbzd0jm3a61cyj9fwh5-postgresql-16.4-debug" + }, + { + "name": "dev", + "path": "/nix/store/ibhwvhq4gkdibkfrkqg9vmip9mhhrg2q-postgresql-16.4-dev" + }, + { + "name": "doc", + "path": "/nix/store/rmvkab0pxjjjznk350syr3gzpa13dz1k-postgresql-16.4-doc" + }, + { + "name": "lib", + "path": "/nix/store/39mnmp40qhpq2h6r3cj66s23sb5fkzr6-postgresql-16.4-lib" + } + ], + "store_path": "/nix/store/37r0vmsb8xd1kv3wjd99kr59q99ja3g0-postgresql-16.4" + }, + "x86_64-darwin": { + "outputs": [ + { + "name": "out", + "path": "/nix/store/vlgydd1rakmw9j14i8dgrlhzj4pa82vi-postgresql-16.4", + "default": true + }, + { + "name": "man", + "path": "/nix/store/2wm2caki07a557z97228n2zxrd3a8j4b-postgresql-16.4-man", + "default": true + }, + { + "name": "doc", + "path": "/nix/store/r03r96a44grl85sflw6hvwwlrzr32rk9-postgresql-16.4-doc" + }, + { + "name": "lib", + "path": "/nix/store/cy3q9y20jwk1vkd6jxf3mnq6xzbb9dn8-postgresql-16.4-lib" + }, + { + "name": "dev", + "path": "/nix/store/96nxx00m06jl2jmvb16916l2rpwb13hk-postgresql-16.4-dev" + } + ], + "store_path": "/nix/store/vlgydd1rakmw9j14i8dgrlhzj4pa82vi-postgresql-16.4" + }, + "x86_64-linux": { + "outputs": [ + { + "name": "out", + "path": "/nix/store/mjjfx6yyaaba5hmv6bga20m8fxrca93l-postgresql-16.4", + "default": true + }, + { + "name": "man", + "path": "/nix/store/b8cvsw47h2487y4j805zi0645x3ajh1i-postgresql-16.4-man", + "default": true + }, + { + "name": "doc", + "path": "/nix/store/apbxfs52v8im9725mn2f1jhgbdfggrpd-postgresql-16.4-doc" + }, + { + "name": "lib", + "path": "/nix/store/32cprs7xwxvb0rw2imfrgy5vcacc27hc-postgresql-16.4-lib" + }, + { + "name": "debug", + "path": "/nix/store/alcnsd7fkkr3iipvcn9gzsyv16kab6m9-postgresql-16.4-debug" + }, + { + "name": "dev", + "path": "/nix/store/pqya8lq5jyplfmbmafrrwsrsi07d5ssn-postgresql-16.4-dev" + } + ], + "store_path": "/nix/store/mjjfx6yyaaba5hmv6bga20m8fxrca93l-postgresql-16.4" + } + } } } }