This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-benchmarks.git


The following commit(s) were added to refs/heads/main by this push:
     new af27848  feat: add TPC-DS data (#24)
af27848 is described below

commit af27848bd345df60c06f5f9fe9bbb2a547bdf113
Author: Oleks V <[email protected]>
AuthorDate: Wed Nov 26 17:22:53 2025 -0800

    feat: add TPC-DS data (#24)
    
    * chore: support specific query
    
    * Adding TPCDS SF1 example data along with updated `gen.sh`
    
    ---------
    
    Co-authored-by: ovoievodin <[email protected]>
---
 tpcds/Dockerfile                              |  23 ++++++++++++++++-------
 tpcds/data/sf1/call_center.parquet            | Bin 0 -> 20680 bytes
 tpcds/data/sf1/catalog_page.parquet           | Bin 0 -> 451245 bytes
 tpcds/data/sf1/catalog_returns.parquet        | Bin 0 -> 9877124 bytes
 tpcds/data/sf1/catalog_sales.parquet          | Bin 0 -> 85430135 bytes
 tpcds/data/sf1/customer.parquet               | Bin 0 -> 4574173 bytes
 tpcds/data/sf1/customer_address.parquet       | Bin 0 -> 895931 bytes
 tpcds/data/sf1/customer_demographics.parquet  | Bin 0 -> 3747969 bytes
 tpcds/data/sf1/date_dim.parquet               | Bin 0 -> 1411299 bytes
 tpcds/data/sf1/household_demographics.parquet | Bin 0 -> 25497 bytes
 tpcds/data/sf1/income_band.parquet            | Bin 0 -> 2698 bytes
 tpcds/data/sf1/inventory.parquet              | Bin 0 -> 15534351 bytes
 tpcds/data/sf1/item.parquet                   | Bin 0 -> 1190446 bytes
 tpcds/data/sf1/promotion.parquet              | Bin 0 -> 24047 bytes
 tpcds/data/sf1/reason.parquet                 | Bin 0 -> 3370 bytes
 tpcds/data/sf1/ship_mode.parquet              | Bin 0 -> 4921 bytes
 tpcds/data/sf1/store.parquet                  | Bin 0 -> 19512 bytes
 tpcds/data/sf1/store_returns.parquet          | Bin 0 -> 13787027 bytes
 tpcds/data/sf1/store_sales.parquet            | Bin 0 -> 104062329 bytes
 tpcds/data/sf1/time_dim.parquet               | Bin 0 -> 837756 bytes
 tpcds/data/sf1/warehouse.parquet              | Bin 0 -> 9351 bytes
 tpcds/data/sf1/web_page.parquet               | Bin 0 -> 10334 bytes
 tpcds/data/sf1/web_returns.parquet            | Bin 0 -> 5222963 bytes
 tpcds/data/sf1/web_sales.parquet              | Bin 0 -> 40606746 bytes
 tpcds/data/sf1/web_site.parquet               | Bin 0 -> 20192 bytes
 tpcds/gen.sh                                  |  16 +++++++++++-----
 26 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/tpcds/Dockerfile b/tpcds/Dockerfile
index ad1b00e..7b8e862 100644
--- a/tpcds/Dockerfile
+++ b/tpcds/Dockerfile
@@ -1,20 +1,29 @@
-FROM rust:1-slim-buster
+FROM rust:1-slim-bullseye
 
 RUN apt update && apt install -y zip gcc make flex bison byacc git
 
 # TPC-DS generator
 COPY tpc-ds-tool.zip .
 RUN unzip tpc-ds-tool.zip
-WORKDIR /DSGen-software-code-3.2.0rc1/tools
+
+# Find the actual extracted directory and create a consistent symlink
+RUN ls -la && \
+    EXTRACTED_DIR=$(find . -maxdepth 1 -name "DSGen-software-code*" -type d | 
head -1) && \
+    echo "Found directory: $EXTRACTED_DIR" && \
+    ln -sf "$EXTRACTED_DIR" /dsgen-tools
+
+WORKDIR /dsgen-tools/tools
 
 # Fix bad UTF-8 char
 RUN iconv -f ISO-8859-14 -t UTF-8 tpcds.dst > tpcds.dst2
 RUN mv tpcds.dst2 tpcds.dst
 
-# compile
-RUN make
+# compile with flags to handle multiple definitions
+RUN make CC="gcc -fcommon"
 
-# tpctools crate
-RUN cargo install tpctools
+# Copy and make gen.sh executable
+COPY gen.sh /usr/local/bin/gen.sh
+RUN chmod +x /usr/local/bin/gen.sh
 
-ADD gen.sh .
\ No newline at end of file
+# Set working directory to root for script execution
+WORKDIR /
\ No newline at end of file
diff --git a/tpcds/data/sf1/call_center.parquet 
b/tpcds/data/sf1/call_center.parquet
new file mode 100644
index 0000000..efeded2
Binary files /dev/null and b/tpcds/data/sf1/call_center.parquet differ
diff --git a/tpcds/data/sf1/catalog_page.parquet 
b/tpcds/data/sf1/catalog_page.parquet
new file mode 100644
index 0000000..c40f4a3
Binary files /dev/null and b/tpcds/data/sf1/catalog_page.parquet differ
diff --git a/tpcds/data/sf1/catalog_returns.parquet 
b/tpcds/data/sf1/catalog_returns.parquet
new file mode 100644
index 0000000..6d7f2fb
Binary files /dev/null and b/tpcds/data/sf1/catalog_returns.parquet differ
diff --git a/tpcds/data/sf1/catalog_sales.parquet 
b/tpcds/data/sf1/catalog_sales.parquet
new file mode 100644
index 0000000..2abdfac
Binary files /dev/null and b/tpcds/data/sf1/catalog_sales.parquet differ
diff --git a/tpcds/data/sf1/customer.parquet b/tpcds/data/sf1/customer.parquet
new file mode 100644
index 0000000..0301da3
Binary files /dev/null and b/tpcds/data/sf1/customer.parquet differ
diff --git a/tpcds/data/sf1/customer_address.parquet 
b/tpcds/data/sf1/customer_address.parquet
new file mode 100644
index 0000000..f99c948
Binary files /dev/null and b/tpcds/data/sf1/customer_address.parquet differ
diff --git a/tpcds/data/sf1/customer_demographics.parquet 
b/tpcds/data/sf1/customer_demographics.parquet
new file mode 100644
index 0000000..f21868e
Binary files /dev/null and b/tpcds/data/sf1/customer_demographics.parquet differ
diff --git a/tpcds/data/sf1/date_dim.parquet b/tpcds/data/sf1/date_dim.parquet
new file mode 100644
index 0000000..7ae176f
Binary files /dev/null and b/tpcds/data/sf1/date_dim.parquet differ
diff --git a/tpcds/data/sf1/household_demographics.parquet 
b/tpcds/data/sf1/household_demographics.parquet
new file mode 100644
index 0000000..d8046c8
Binary files /dev/null and b/tpcds/data/sf1/household_demographics.parquet 
differ
diff --git a/tpcds/data/sf1/income_band.parquet 
b/tpcds/data/sf1/income_band.parquet
new file mode 100644
index 0000000..b3b6e9e
Binary files /dev/null and b/tpcds/data/sf1/income_band.parquet differ
diff --git a/tpcds/data/sf1/inventory.parquet b/tpcds/data/sf1/inventory.parquet
new file mode 100644
index 0000000..d898890
Binary files /dev/null and b/tpcds/data/sf1/inventory.parquet differ
diff --git a/tpcds/data/sf1/item.parquet b/tpcds/data/sf1/item.parquet
new file mode 100644
index 0000000..76dbb94
Binary files /dev/null and b/tpcds/data/sf1/item.parquet differ
diff --git a/tpcds/data/sf1/promotion.parquet b/tpcds/data/sf1/promotion.parquet
new file mode 100644
index 0000000..83f8260
Binary files /dev/null and b/tpcds/data/sf1/promotion.parquet differ
diff --git a/tpcds/data/sf1/reason.parquet b/tpcds/data/sf1/reason.parquet
new file mode 100644
index 0000000..4155a60
Binary files /dev/null and b/tpcds/data/sf1/reason.parquet differ
diff --git a/tpcds/data/sf1/ship_mode.parquet b/tpcds/data/sf1/ship_mode.parquet
new file mode 100644
index 0000000..e4e6c94
Binary files /dev/null and b/tpcds/data/sf1/ship_mode.parquet differ
diff --git a/tpcds/data/sf1/store.parquet b/tpcds/data/sf1/store.parquet
new file mode 100644
index 0000000..4e9a7ae
Binary files /dev/null and b/tpcds/data/sf1/store.parquet differ
diff --git a/tpcds/data/sf1/store_returns.parquet 
b/tpcds/data/sf1/store_returns.parquet
new file mode 100644
index 0000000..e09a8c9
Binary files /dev/null and b/tpcds/data/sf1/store_returns.parquet differ
diff --git a/tpcds/data/sf1/store_sales.parquet 
b/tpcds/data/sf1/store_sales.parquet
new file mode 100644
index 0000000..036a85b
Binary files /dev/null and b/tpcds/data/sf1/store_sales.parquet differ
diff --git a/tpcds/data/sf1/time_dim.parquet b/tpcds/data/sf1/time_dim.parquet
new file mode 100644
index 0000000..12d2332
Binary files /dev/null and b/tpcds/data/sf1/time_dim.parquet differ
diff --git a/tpcds/data/sf1/warehouse.parquet b/tpcds/data/sf1/warehouse.parquet
new file mode 100644
index 0000000..ad85144
Binary files /dev/null and b/tpcds/data/sf1/warehouse.parquet differ
diff --git a/tpcds/data/sf1/web_page.parquet b/tpcds/data/sf1/web_page.parquet
new file mode 100644
index 0000000..dff04fc
Binary files /dev/null and b/tpcds/data/sf1/web_page.parquet differ
diff --git a/tpcds/data/sf1/web_returns.parquet 
b/tpcds/data/sf1/web_returns.parquet
new file mode 100644
index 0000000..99eadf1
Binary files /dev/null and b/tpcds/data/sf1/web_returns.parquet differ
diff --git a/tpcds/data/sf1/web_sales.parquet b/tpcds/data/sf1/web_sales.parquet
new file mode 100644
index 0000000..41b642f
Binary files /dev/null and b/tpcds/data/sf1/web_sales.parquet differ
diff --git a/tpcds/data/sf1/web_site.parquet b/tpcds/data/sf1/web_site.parquet
new file mode 100644
index 0000000..8cf6c8a
Binary files /dev/null and b/tpcds/data/sf1/web_site.parquet differ
diff --git a/tpcds/gen.sh b/tpcds/gen.sh
index c18b05b..c928e3b 100755
--- a/tpcds/gen.sh
+++ b/tpcds/gen.sh
@@ -1,6 +1,12 @@
 #!/bin/bash
-tpctools generate --benchmark tpcds \
-  --scale 100 \
-  --partitions 12 \
-  --generator-path /DSGen-software-code-3.2.0rc1/tools \
-  --output /data
\ No newline at end of file
+cd /dsgen-tools/tools
+
+for i in $(seq 1 12); do
+  mkdir -p /data/part_$i
+  ./dsdgen -scale 100 \
+           -dir /data/part_$i \
+           -parallel 12 \
+           -child $i \
+           -terminate n &
+done
+wait
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to