This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 879aae39409 [SPARK-38920][SQL][TEST] Add ORC blockSize tests to BloomFilterBenchmark 879aae39409 is described below commit 879aae39409ae92f434c3bb4101d66334f9833dd Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Sat Apr 16 19:05:22 2022 -0700 [SPARK-38920][SQL][TEST] Add ORC blockSize tests to BloomFilterBenchmark ### What changes were proposed in this pull request? This PR aims to improve `BloomFilterBenchmark` by adding more `blockSize` combination tests for ORC. - Java 8: https://github.com/dongjoon-hyun/spark/actions/runs/2178431204 - Java 11: https://github.com/dongjoon-hyun/spark/actions/runs/2178432284 - Java 17: https://github.com/dongjoon-hyun/spark/actions/runs/2178432661 ### Why are the changes needed? For Parquet, we had the benchmark already. This will provide a feature parity of the comparison. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test because this is a benchmark. Closes #36218 from dongjoon-hyun/SPARK-38920. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../BloomFilterBenchmark-jdk11-results.txt | 112 +++++++++++++---- .../BloomFilterBenchmark-jdk17-results.txt | 132 ++++++++++++++++----- .../benchmarks/BloomFilterBenchmark-results.txt | 112 +++++++++++++---- .../execution/benchmark/BloomFilterBenchmark.scala | 30 +++-- 4 files changed, 304 insertions(+), 82 deletions(-) diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt index fab16b64870..1bd32b0e7a9 100644 --- a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt @@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 20453 20495 60 4.9 204.5 1.0X -With bloom filter 22539 22694 218 4.4 225.4 0.9X +Without bloom filter 15574 15579 6 6.4 155.7 1.0X +With bloom filter 17915 17972 80 5.6 179.2 0.9X ================================================================================================ @@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 1708 1800 129 58.5 17.1 1.0X -With bloom filter 1324 1357 47 75.5 13.2 1.3X +Without bloom filter, blocksize: 2097152 1667 1675 11 60.0 16.7 1.0X +With bloom filter, blocksize: 2097152 1098 1134 50 91.1 11.0 1.5X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 4194304 1446 1514 97 69.2 14.5 1.0X +With bloom filter, blocksize: 4194304 1069 1145 108 93.6 10.7 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 6291456 1436 1468 46 69.6 14.4 1.0X +With bloom filter, blocksize: 6291456 1035 1060 36 96.6 10.3 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 8388608 1451 1488 52 68.9 14.5 1.0X +With bloom filter, blocksize: 8388608 1016 1027 15 98.4 10.2 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 12582912 1463 1463 1 68.4 14.6 1.0X +With bloom filter, blocksize: 12582912 1023 1041 24 97.7 10.2 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 16777216 1473 1505 46 67.9 14.7 1.0X +With bloom filter, blocksize: 16777216 997 1016 26 100.3 10.0 1.5X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 33554432 1440 1482 59 69.4 14.4 1.0X +With bloom filter, blocksize: 33554432 1037 1065 40 96.4 10.4 1.4X ================================================================================================ @@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 17586 17666 112 5.7 175.9 1.0X -With bloom filter 21429 21507 111 4.7 214.3 0.8X +Without bloom filter 16645 16907 371 6.0 166.4 1.0X +With bloom filter 20968 21145 250 4.8 209.7 0.8X ================================================================================================ @@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 2097152 1219 1246 39 82.1 12.2 1.0X -With bloom filter, blocksize: 2097152 379 420 48 264.0 3.8 3.2X +Without bloom filter, blocksize: 2097152 1101 1106 7 90.8 11.0 1.0X +With bloom filter, blocksize: 2097152 308 365 37 325.2 3.1 3.6X ================================================================================================ @@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 4194304 1015 1023 11 98.5 10.2 1.0X -With bloom filter, blocksize: 4194304 237 267 25 422.1 2.4 4.3X +Without bloom filter, blocksize: 4194304 933 970 33 107.2 9.3 1.0X +With bloom filter, blocksize: 4194304 269 302 32 371.1 2.7 3.5X ================================================================================================ @@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 6291456 997 1008 16 100.3 10.0 1.0X -With bloom filter, blocksize: 6291456 268 290 11 372.6 2.7 3.7X +Without bloom filter, blocksize: 6291456 977 1026 69 102.3 9.8 1.0X +With bloom filter, blocksize: 6291456 358 379 14 279.6 3.6 2.7X ================================================================================================ @@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 8388608 957 1014 81 104.5 9.6 1.0X -With bloom filter, blocksize: 8388608 361 401 33 277.0 3.6 2.6X +Without bloom filter, blocksize: 8388608 1009 1026 24 99.2 10.1 1.0X +With bloom filter, blocksize: 8388608 371 395 28 269.3 3.7 2.7X ================================================================================================ @@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 12582912 1015 1033 26 98.5 10.1 1.0X -With bloom filter, blocksize: 12582912 606 635 29 165.2 6.1 1.7X +Without bloom filter, blocksize: 12582912 972 977 6 102.9 9.7 1.0X +With bloom filter, blocksize: 12582912 695 725 30 143.9 6.9 1.4X ================================================================================================ @@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 16777216 1038 1055 23 96.3 10.4 1.0X -With bloom filter, blocksize: 16777216 901 905 4 111.0 9.0 1.2X +Without bloom filter, blocksize: 16777216 938 946 8 106.6 9.4 1.0X +With bloom filter, blocksize: 16777216 833 870 45 120.0 8.3 1.1X ================================================================================================ @@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 33554432 941 955 13 106.3 9.4 1.0X -With bloom filter, blocksize: 33554432 888 930 39 112.6 8.9 1.1X +Without bloom filter, blocksize: 33554432 929 955 24 107.6 9.3 1.0X +With bloom filter, blocksize: 33554432 1003 1010 10 99.7 10.0 0.9X diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt index 1288a06ec53..cd3ec1b3660 100644 --- a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt +++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt @@ -3,11 +3,11 @@ ORC Write ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 19097 19523 602 5.2 191.0 1.0X -With bloom filter 22213 22402 267 4.5 222.1 0.9X +Without bloom filter 15778 15792 19 6.3 157.8 1.0X +With bloom filter 17951 18076 178 5.6 179.5 0.9X ================================================================================================ @@ -15,11 +15,83 @@ ORC Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 1528 1628 142 65.4 15.3 1.0X -With bloom filter 1370 1391 29 73.0 13.7 1.1X +Without bloom filter, blocksize: 2097152 1327 1404 109 75.4 13.3 1.0X +With bloom filter, blocksize: 2097152 929 943 24 107.6 9.3 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 4194304 1342 1348 8 74.5 13.4 1.0X +With bloom filter, blocksize: 4194304 1085 1087 2 92.1 10.9 1.2X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 6291456 1325 1332 10 75.5 13.3 1.0X +With bloom filter, blocksize: 6291456 1115 1117 3 89.7 11.1 1.2X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 8388608 1203 1213 14 83.1 12.0 1.0X +With bloom filter, blocksize: 8388608 1168 1171 4 85.6 11.7 1.0X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 12582912 1774 1781 10 56.4 17.7 1.0X +With bloom filter, blocksize: 12582912 1171 1182 15 85.4 11.7 1.5X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 16777216 1723 1728 7 58.0 17.2 1.0X +With bloom filter, blocksize: 16777216 1329 1344 20 75.2 13.3 1.3X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 33554432 1847 1858 15 54.1 18.5 1.0X +With bloom filter, blocksize: 33554432 1222 1312 126 81.8 12.2 1.5X ================================================================================================ @@ -27,11 +99,11 @@ Parquet Write ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 17846 17981 191 5.6 178.5 1.0X -With bloom filter 22883 22982 140 4.4 228.8 0.8X +Without bloom filter 16902 16907 8 5.9 169.0 1.0X +With bloom filter 28237 28266 41 3.5 282.4 0.6X ================================================================================================ @@ -39,11 +111,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 2097152 947 1012 92 105.6 9.5 1.0X -With bloom filter, blocksize: 2097152 311 325 12 322.0 3.1 3.0X +Without bloom filter, blocksize: 2097152 763 796 31 131.1 7.6 1.0X +With bloom filter, blocksize: 2097152 248 261 13 403.2 2.5 3.1X ================================================================================================ @@ -51,11 +123,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 4194304 824 837 15 121.3 8.2 1.0X -With bloom filter, blocksize: 4194304 210 222 10 475.8 2.1 3.9X +Without bloom filter, blocksize: 4194304 1020 1020 0 98.0 10.2 1.0X +With bloom filter, blocksize: 4194304 193 201 11 517.5 1.9 5.3X ================================================================================================ @@ -63,11 +135,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 6291456 1210 1327 166 82.6 12.1 1.0X -With bloom filter, blocksize: 6291456 302 315 9 331.1 3.0 4.0X +Without bloom filter, blocksize: 6291456 1023 1023 1 97.8 10.2 1.0X +With bloom filter, blocksize: 6291456 298 306 8 336.0 3.0 3.4X ================================================================================================ @@ -75,11 +147,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 8388608 1244 1251 10 80.4 12.4 1.0X -With bloom filter, blocksize: 8388608 504 513 8 198.4 5.0 2.5X +Without bloom filter, blocksize: 8388608 1033 1038 6 96.8 10.3 1.0X +With bloom filter, blocksize: 8388608 459 467 4 217.7 4.6 2.2X ================================================================================================ @@ -87,11 +159,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 12582912 1384 1408 34 72.3 13.8 1.0X -With bloom filter, blocksize: 12582912 900 955 49 111.1 9.0 1.5X +Without bloom filter, blocksize: 12582912 1077 1084 11 92.9 10.8 1.0X +With bloom filter, blocksize: 12582912 734 741 9 136.2 7.3 1.5X ================================================================================================ @@ -99,11 +171,11 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 16777216 1468 1482 19 68.1 14.7 1.0X -With bloom filter, blocksize: 16777216 1302 1313 15 76.8 13.0 1.1X +Without bloom filter, blocksize: 16777216 1044 1047 4 95.7 10.4 1.0X +With bloom filter, blocksize: 16777216 825 835 11 121.2 8.2 1.3X ================================================================================================ @@ -111,10 +183,10 @@ Parquet Read ================================================================================================ OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 33554432 1444 1451 9 69.2 14.4 1.0X -With bloom filter, blocksize: 33554432 1458 1461 3 68.6 14.6 1.0X +Without bloom filter, blocksize: 33554432 1232 1237 6 81.2 12.3 1.0X +With bloom filter, blocksize: 33554432 1152 1199 67 86.8 11.5 1.1X diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt index de191252961..745249db930 100644 --- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt +++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt @@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 17889 18038 211 5.6 178.9 1.0X -With bloom filter 20780 20941 228 4.8 207.8 0.9X +Without bloom filter 18682 18792 156 5.4 186.8 1.0X +With bloom filter 21347 21396 69 4.7 213.5 0.9X ================================================================================================ @@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 1690 1694 6 59.2 16.9 1.0X -With bloom filter 1274 1291 24 78.5 12.7 1.3X +Without bloom filter, blocksize: 2097152 1656 1666 14 60.4 16.6 1.0X +With bloom filter, blocksize: 2097152 1203 1230 37 83.1 12.0 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 4194304 1625 1632 10 61.6 16.2 1.0X +With bloom filter, blocksize: 4194304 1224 1284 85 81.7 12.2 1.3X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 6291456 1599 1601 3 62.5 16.0 1.0X +With bloom filter, blocksize: 6291456 1162 1179 24 86.0 11.6 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Without bloom filter, blocksize: 8388608 1623 1638 21 61.6 16.2 1.0X +With bloom filter, blocksize: 8388608 1181 1197 23 84.7 11.8 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 12582912 1631 1643 18 61.3 16.3 1.0X +With bloom filter, blocksize: 12582912 1159 1183 34 86.3 11.6 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 16777216 1586 1597 16 63.0 15.9 1.0X +With bloom filter, blocksize: 16777216 1170 1175 7 85.5 11.7 1.4X + + +================================================================================================ +ORC Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------- +Without bloom filter, blocksize: 33554432 1646 1649 4 60.8 16.5 1.0X +With bloom filter, blocksize: 33554432 1186 1187 1 84.3 11.9 1.4X ================================================================================================ @@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Write 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter 15998 16218 311 6.3 160.0 1.0X -With bloom filter 29974 30180 291 3.3 299.7 0.5X +Without bloom filter 16633 16773 197 6.0 166.3 1.0X +With bloom filter 23442 23538 136 4.3 234.4 0.7X ================================================================================================ @@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 2097152 1032 1039 10 96.9 10.3 1.0X -With bloom filter, blocksize: 2097152 285 302 22 350.7 2.9 3.6X +Without bloom filter, blocksize: 2097152 955 965 13 104.7 9.6 1.0X +With bloom filter, blocksize: 2097152 271 289 17 368.8 2.7 3.5X ================================================================================================ @@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 4194304 927 939 11 107.9 9.3 1.0X -With bloom filter, blocksize: 4194304 239 263 27 418.3 2.4 3.9X +Without bloom filter, blocksize: 4194304 897 907 9 111.4 9.0 1.0X +With bloom filter, blocksize: 4194304 242 255 19 412.6 2.4 3.7X ================================================================================================ @@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 6291456 958 968 11 104.3 9.6 1.0X -With bloom filter, blocksize: 6291456 279 302 17 357.9 2.8 3.4X +Without bloom filter, blocksize: 6291456 923 934 11 108.3 9.2 1.0X +With bloom filter, blocksize: 6291456 271 283 11 369.0 2.7 3.4X ================================================================================================ @@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Without bloom filter, blocksize: 8388608 898 930 31 111.4 9.0 1.0X -With bloom filter, blocksize: 8388608 360 380 13 278.0 3.6 2.5X +Without bloom filter, blocksize: 8388608 916 920 3 109.1 9.2 1.0X +With bloom filter, blocksize: 8388608 442 448 9 226.4 4.4 2.1X ================================================================================================ @@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 12582912 937 954 25 106.7 9.4 1.0X -With bloom filter, blocksize: 12582912 650 696 40 153.9 6.5 1.4X +Without bloom filter, blocksize: 12582912 899 917 15 111.2 9.0 1.0X +With bloom filter, blocksize: 12582912 676 682 7 148.0 6.8 1.3X ================================================================================================ @@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 16777216 934 949 14 107.1 9.3 1.0X -With bloom filter, blocksize: 16777216 925 935 9 108.2 9.2 1.0X +Without bloom filter, blocksize: 16777216 894 913 17 111.8 8.9 1.0X +With bloom filter, blocksize: 16777216 866 890 26 115.4 8.7 1.0X ================================================================================================ @@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Read a row from 100M rows: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Without bloom filter, blocksize: 33554432 910 931 18 109.8 9.1 1.0X -With bloom filter, blocksize: 33554432 910 927 15 109.9 9.1 1.0X +Without bloom filter, blocksize: 33554432 896 921 22 111.6 9.0 1.0X +With bloom filter, blocksize: 33554432 909 924 20 110.1 9.1 1.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala index ccb65c7d3ac..5f193109ca2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala @@ -61,21 +61,27 @@ object BloomFilterBenchmark extends SqlBasedBenchmark { } private def readORCBenchmark(): Unit = { - withTempPath { dir => - val path = dir.getCanonicalPath + val blockSizes = Seq(2 * 1024 * 1024, 4 * 1024 * 1024, 6 * 1024 * 1024, 8 * 1024 * 1024, + 12 * 1024 * 1024, 16 * 1024 * 1024, 32 * 1024 * 1024) + for (blocksize <- blockSizes) { + withTempPath { dir => + val path = dir.getCanonicalPath - df.write.orc(path + "/withoutBF") - df.write.option("orc.bloom.filter.columns", "value").orc(path + "/withBF") + df.write.option("orc.block.size", blocksize).orc(path + "/withoutBF") + df.write + .option("orc.block.size", blocksize) + .option("orc.bloom.filter.columns", "value").orc(path + "/withBF") - runBenchmark(s"ORC Read") { - val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", N, output = output) - benchmark.addCase("Without bloom filter") { _ => - spark.read.orc(path + "/withoutBF").where("value = 0").noop() - } - benchmark.addCase("With bloom filter") { _ => - spark.read.orc(path + "/withBF").where("value = 0").noop() + runBenchmark(s"ORC Read") { + val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", N, output = output) + benchmark.addCase("Without bloom filter, blocksize: " + blocksize) { _ => + spark.read.orc(path + "/withoutBF").where("value = 0").noop() + } + benchmark.addCase("With bloom filter, blocksize: " + blocksize) { _ => + spark.read.orc(path + "/withBF").where("value = 0").noop() + } + benchmark.run() } - benchmark.run() } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org