AntoinePrv commented on PR #46789: URL: https://github.com/apache/arrow/pull/46789#issuecomment-2992138287
Updated benchmarks: Benchmark result on my Macbook Pro M3: `archery benchmark diff --suite-filter=parquet-encoding --benchmark-filter='ByteStreamSplit'` <details> <summary>Show benchmark results</summary> ``` -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Non-regressions: (44) -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536 6.940 GiB/sec 98.072 GiB/sec 1313.067 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 40082} BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024 7.017 GiB/sec 82.283 GiB/sec 1072.596 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2573189} BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536 5.736 GiB/sec 15.241 GiB/sec 165.694 {'family_index': 7, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32985} BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024 5.848 GiB/sec 15.384 GiB/sec 163.085 {'family_index': 7, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2152488} BM_ByteStreamSplitEncode_Double_Generic/1024 10.565 GiB/sec 17.702 GiB/sec 67.552 {'family_index': 6, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 962530} BM_ByteStreamSplitEncode_Double_Neon/4096 11.016 GiB/sec 17.869 GiB/sec 62.210 {'family_index': 17, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 252538} BM_ByteStreamSplitEncode_Double_Neon/1024 10.972 GiB/sec 17.720 GiB/sec 61.501 {'family_index': 17, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 995648} BM_ByteStreamSplitEncode_Float_Neon/32768 8.057 GiB/sec 12.437 GiB/sec 54.357 {'family_index': 16, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 44834} BM_ByteStreamSplitEncode_Float_Neon/1024 8.527 GiB/sec 12.769 GiB/sec 49.742 {'family_index': 16, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1564274} BM_ByteStreamSplitEncode_Float_Neon/4096 8.595 GiB/sec 12.806 GiB/sec 48.991 {'family_index': 16, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 393601} BM_ByteStreamSplitEncode_Float_Generic/1024 8.613 GiB/sec 12.825 GiB/sec 48.904 {'family_index': 5, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1600278} BM_ByteStreamSplitEncode_Float_Generic/65536 8.368 GiB/sec 12.420 GiB/sec 48.422 {'family_index': 5, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 24025} BM_ByteStreamSplitEncode_Float_Neon/65536 8.435 GiB/sec 12.403 GiB/sec 47.045 {'family_index': 16, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 22557} BM_ByteStreamSplitDecode_Double_Neon/4096 7.535 GiB/sec 10.042 GiB/sec 33.283 {'family_index': 15, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 172085} BM_ByteStreamSplitEncode_Double_Generic/65536 9.480 GiB/sec 12.557 GiB/sec 32.462 {'family_index': 6, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13207} BM_ByteStreamSplitDecode_Double_Generic/1024 7.542 GiB/sec 9.972 GiB/sec 32.218 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 691788} BM_ByteStreamSplitDecode_Double_Neon/1024 7.522 GiB/sec 9.885 GiB/sec 31.414 {'family_index': 15, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 691912} BM_ByteStreamSplitEncode_Double_Neon/65536 9.881 GiB/sec 12.746 GiB/sec 28.994 {'family_index': 17, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13988} BM_ByteStreamSplitEncode_Double_Neon/32768 10.037 GiB/sec 12.775 GiB/sec 27.279 {'family_index': 17, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 28680} BM_ByteStreamSplitDecode_Double_Neon/32768 7.341 GiB/sec 9.197 GiB/sec 25.290 {'family_index': 15, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 21045} BM_ByteStreamSplitDecode_Double_Generic/65536 7.306 GiB/sec 9.103 GiB/sec 24.596 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10493} BM_ByteStreamSplitDecode_Double_Neon/65536 7.321 GiB/sec 9.112 GiB/sec 24.465 {'family_index': 15, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10489} BM_ByteStreamSplitDecode_Float_Generic/65536 11.291 GiB/sec 11.684 GiB/sec 3.476 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32382} BM_ByteStreamSplitDecode_Float_Neon/65536 11.289 GiB/sec 11.679 GiB/sec 3.457 {'family_index': 14, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32372} BM_ByteStreamSplitDecode_Float_Neon/32768 11.374 GiB/sec 11.741 GiB/sec 3.232 {'family_index': 14, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 65275} BM_ByteStreamSplitDecode_Float_Neon/1024 11.613 GiB/sec 11.946 GiB/sec 2.871 {'family_index': 14, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2128688} BM_ByteStreamSplitDecode_Float_Neon/4096 11.658 GiB/sec 11.989 GiB/sec 2.838 {'family_index': 14, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 534939} BM_ByteStreamSplitDecode_Float_Generic/1024 11.667 GiB/sec 11.849 GiB/sec 1.557 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2143400} BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024 5.816 GiB/sec 5.880 GiB/sec 1.100 {'family_index': 8, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 611034} BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536 5.420 GiB/sec 5.454 GiB/sec 0.634 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3891} BM_ByteStreamSplitEncode_Float_Scalar/65536 5.701 GiB/sec 5.732 GiB/sec 0.543 {'family_index': 12, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16342} BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536 5.767 GiB/sec 5.791 GiB/sec 0.417 {'family_index': 8, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 9425} BM_ByteStreamSplitEncode_Float_Scalar/1024 5.764 GiB/sec 5.786 GiB/sec 0.386 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1058473} BM_ByteStreamSplitEncode_Double_Scalar/65536 5.702 GiB/sec 5.722 GiB/sec 0.359 {'family_index': 13, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 8188} BM_ByteStreamSplitDecode_Double_Scalar/1024 6.900 GiB/sec 6.920 GiB/sec 0.296 {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 633610} BM_ByteStreamSplitEncode_Double_Scalar/1024 5.804 GiB/sec 5.821 GiB/sec 0.277 {'family_index': 13, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 531846} BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024 5.867 GiB/sec 5.880 GiB/sec 0.236 {'family_index': 9, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 268931} BM_ByteStreamSplitDecode_Float_Scalar/65536 6.894 GiB/sec 6.908 GiB/sec 0.198 {'family_index': 10, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19890} BM_ByteStreamSplitDecode_Double_Scalar/65536 6.367 GiB/sec 6.373 GiB/sec 0.088 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 9125} BM_ByteStreamSplitDecode_Float_Scalar/1024 6.931 GiB/sec 6.927 GiB/sec -0.051 {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1268944} BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024 5.882 GiB/sec 5.878 GiB/sec -0.081 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 270610} BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024 6.858 GiB/sec 6.851 GiB/sec -0.113 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 720150} BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536 3.378 GiB/sec 3.355 GiB/sec -0.687 {'family_index': 9, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2422} BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536 6.658 GiB/sec 6.583 GiB/sec -1.127 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10875} ``` </details> Benchmark result on a Linux cloud instance: `archery benchmark diff --suite-filter=parquet-encoding --benchmark-filter='ByteStreamSplit' --cmake-extras -DARROW_SIMD_LEVEL=AVX2` <details> <summary>Show benchmark results</summary> ``` -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Non-regressions: (38) -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536 4.614 GiB/sec 49.412 GiB/sec 970.959 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 26754} BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024 4.595 GiB/sec 48.644 GiB/sec 958.684 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1692347} BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536 4.170 GiB/sec 5.562 GiB/sec 33.389 {'family_index': 7, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 23946} BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024 4.162 GiB/sec 5.492 GiB/sec 31.954 {'family_index': 7, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1474518} BM_ByteStreamSplitEncode_Double_Generic/1024 6.221 GiB/sec 7.877 GiB/sec 26.620 {'family_index': 6, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 565557} BM_ByteStreamSplitEncode_Double_Sse2/1024 6.559 GiB/sec 7.875 GiB/sec 20.069 {'family_index': 17, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 600683} BM_ByteStreamSplitEncode_Double_Generic/65536 5.760 GiB/sec 6.111 GiB/sec 6.083 {'family_index': 6, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7824} BM_ByteStreamSplitDecode_Double_Avx2/65536 10.240 GiB/sec 10.852 GiB/sec 5.978 {'family_index': 19, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15511} BM_ByteStreamSplitEncode_Float_Avx2/65536 9.019 GiB/sec 9.512 GiB/sec 5.457 {'family_index': 20, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 27108} BM_ByteStreamSplitEncode_Double_Sse2/65536 5.830 GiB/sec 6.110 GiB/sec 4.793 {'family_index': 17, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 8353} BM_ByteStreamSplitDecode_Float_Sse2/65536 4.137 GiB/sec 4.294 GiB/sec 3.779 {'family_index': 14, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11844} BM_ByteStreamSplitDecode_Double_Sse2/1024 7.265 GiB/sec 7.514 GiB/sec 3.420 {'family_index': 15, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 682842} BM_ByteStreamSplitEncode_Double_Scalar/65536 3.460 GiB/sec 3.574 GiB/sec 3.291 {'family_index': 13, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4957} BM_ByteStreamSplitDecode_Float_Sse2/1024 4.390 GiB/sec 4.532 GiB/sec 3.240 {'family_index': 14, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 795509} BM_ByteStreamSplitDecode_Double_Generic/65536 10.515 GiB/sec 10.836 GiB/sec 3.046 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15030} BM_ByteStreamSplitDecode_Float_Generic/65536 15.745 GiB/sec 16.186 GiB/sec 2.801 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 45781} BM_ByteStreamSplitEncode_Double_Scalar/1024 3.962 GiB/sec 4.041 GiB/sec 2.000 {'family_index': 13, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 366047} BM_ByteStreamSplitDecode_Float_Scalar/1024 5.508 GiB/sec 5.589 GiB/sec 1.464 {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1016018} BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024 3.758 GiB/sec 3.800 GiB/sec 1.129 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 172890} BM_ByteStreamSplitEncode_Float_Scalar/1024 4.347 GiB/sec 4.396 GiB/sec 1.128 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 808562} BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536 3.621 GiB/sec 3.656 GiB/sec 0.981 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2595} BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024 4.126 GiB/sec 4.162 GiB/sec 0.873 {'family_index': 9, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 185392} BM_ByteStreamSplitEncode_Float_Generic/65536 9.530 GiB/sec 9.587 GiB/sec 0.600 {'family_index': 5, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 27050} BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536 4.011 GiB/sec 4.033 GiB/sec 0.539 {'family_index': 8, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6769} BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024 4.256 GiB/sec 4.270 GiB/sec 0.342 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 448606} BM_ByteStreamSplitDecode_Double_Sse2/65536 7.236 GiB/sec 7.253 GiB/sec 0.241 {'family_index': 15, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10389} BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536 4.232 GiB/sec 4.218 GiB/sec -0.316 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6943} BM_ByteStreamSplitDecode_Float_Scalar/65536 5.566 GiB/sec 5.548 GiB/sec -0.324 {'family_index': 10, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15955} BM_ByteStreamSplitDecode_Double_Scalar/1024 5.027 GiB/sec 4.996 GiB/sec -0.623 {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 461421} BM_ByteStreamSplitDecode_Double_Scalar/65536 4.685 GiB/sec 4.652 GiB/sec -0.700 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6719} BM_ByteStreamSplitDecode_Float_Avx2/1024 17.004 GiB/sec 16.880 GiB/sec -0.731 {'family_index': 18, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3126341} BM_ByteStreamSplitDecode_Float_Avx2/65536 16.708 GiB/sec 16.531 GiB/sec -1.059 {'family_index': 18, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 48032} BM_ByteStreamSplitDecode_Double_Generic/1024 11.109 GiB/sec 10.981 GiB/sec -1.149 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1020809} BM_ByteStreamSplitEncode_Float_Scalar/65536 3.723 GiB/sec 3.677 GiB/sec -1.232 {'family_index': 12, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10428} BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024 4.266 GiB/sec 4.211 GiB/sec -1.287 {'family_index': 8, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 445795} BM_ByteStreamSplitDecode_Double_Avx2/1024 11.214 GiB/sec 11.010 GiB/sec -1.817 {'family_index': 19, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1028473} BM_ByteStreamSplitDecode_Float_Generic/1024 17.161 GiB/sec 16.750 GiB/sec -2.397 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2992305} BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536 3.933 GiB/sec 3.821 GiB/sec -2.846 {'family_index': 9, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2739} ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Regressions: (4) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ benchmark baseline contender change % counters BM_ByteStreamSplitEncode_Float_Generic/1024 9.709 GiB/sec 9.085 GiB/sec -6.433 {'family_index': 5, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1734205} BM_ByteStreamSplitEncode_Float_Avx2/1024 9.765 GiB/sec 9.135 GiB/sec -6.450 {'family_index': 20, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1793045} BM_ByteStreamSplitEncode_Float_Sse2/65536 9.150 GiB/sec 6.217 GiB/sec -32.060 {'family_index': 16, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 26799} BM_ByteStreamSplitEncode_Float_Sse2/1024 9.493 GiB/sec 6.056 GiB/sec -36.207 {'family_index': 16, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1745048} ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org