pitrou commented on PR #46789: URL: https://github.com/apache/arrow/pull/46789#issuecomment-2965978829
Here are the benchmark numbers on my local machine (AMD Ryzen 9 3900X, gcc 14.3): ``` -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Non-regressions: (42) -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024 3.828 GiB/sec 60.992 GiB/sec 1493.306 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1391248} BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536 3.833 GiB/sec 54.779 GiB/sec 1329.148 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 22088} BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024 4.412 GiB/sec 7.365 GiB/sec 66.918 {'family_index': 7, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1693608} BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536 4.584 GiB/sec 7.482 GiB/sec 63.227 {'family_index': 7, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 26266} BM_ByteStreamSplitEncode_Double_Generic/1024 7.398 GiB/sec 8.593 GiB/sec 16.151 {'family_index': 6, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 703188} BM_ByteStreamSplitEncode_Double_Sse2/1024 7.449 GiB/sec 8.622 GiB/sec 15.743 {'family_index': 17, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 730024} BM_ByteStreamSplitEncode_Double_Avx2/1024 7.522 GiB/sec 8.611 GiB/sec 14.487 {'family_index': 21, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 683492} BM_ByteStreamSplitEncode_Double_Generic/65536 7.098 GiB/sec 7.898 GiB/sec 11.280 {'family_index': 6, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10152} BM_ByteStreamSplitEncode_Double_Avx2/65536 7.343 GiB/sec 7.867 GiB/sec 7.130 {'family_index': 21, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10406} BM_ByteStreamSplitEncode_Double_Sse2/65536 7.451 GiB/sec 7.867 GiB/sec 5.583 {'family_index': 17, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10614} BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536 3.661 GiB/sec 3.858 GiB/sec 5.388 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2655} BM_ByteStreamSplitEncode_Double_Scalar/65536 4.831 GiB/sec 5.087 GiB/sec 5.282 {'family_index': 13, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7018} BM_ByteStreamSplitDecode_Float_Sse2/1024 7.210 GiB/sec 7.584 GiB/sec 5.176 {'family_index': 14, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1313805} BM_ByteStreamSplitDecode_Double_Scalar/1024 3.866 GiB/sec 4.065 GiB/sec 5.160 {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 356025} BM_ByteStreamSplitDecode_Double_Scalar/65536 3.806 GiB/sec 3.995 GiB/sec 4.977 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5438} BM_ByteStreamSplitDecode_Float_Avx2/65536 18.926 GiB/sec 19.850 GiB/sec 4.884 {'family_index': 18, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 54121} BM_ByteStreamSplitDecode_Float_Scalar/65536 3.882 GiB/sec 4.065 GiB/sec 4.724 {'family_index': 10, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11103} BM_ByteStreamSplitEncode_Double_Scalar/1024 4.989 GiB/sec 5.217 GiB/sec 4.565 {'family_index': 13, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 455047} BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024 3.847 GiB/sec 4.018 GiB/sec 4.443 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 175342} BM_ByteStreamSplitDecode_Float_Scalar/1024 3.888 GiB/sec 4.059 GiB/sec 4.406 {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 710875} BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536 4.733 GiB/sec 4.930 GiB/sec 4.161 {'family_index': 8, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7786} BM_ByteStreamSplitDecode_Double_Generic/65536 12.680 GiB/sec 13.207 GiB/sec 4.155 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18332} BM_ByteStreamSplitDecode_Double_Avx2/65536 12.736 GiB/sec 13.263 GiB/sec 4.134 {'family_index': 19, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18314} BM_ByteStreamSplitDecode_Float_Avx2/1024 18.630 GiB/sec 19.393 GiB/sec 4.094 {'family_index': 18, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3463316} BM_ByteStreamSplitEncode_Float_Avx2/65536 12.804 GiB/sec 13.321 GiB/sec 4.036 {'family_index': 20, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Avx2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 36806} BM_ByteStreamSplitDecode_Float_Generic/1024 18.800 GiB/sec 19.559 GiB/sec 4.035 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3427881} BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024 3.809 GiB/sec 3.958 GiB/sec 3.906 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 400054} BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024 4.822 GiB/sec 5.010 GiB/sec 3.900 {'family_index': 8, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 506548} BM_ByteStreamSplitDecode_Float_Generic/65536 19.346 GiB/sec 20.089 GiB/sec 3.838 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 55111} BM_ByteStreamSplitDecode_Double_Avx2/1024 13.357 GiB/sec 13.862 GiB/sec 3.777 {'family_index': 19, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1204226} BM_ByteStreamSplitEncode_Float_Scalar/1024 4.955 GiB/sec 5.137 GiB/sec 3.678 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 906261} BM_ByteStreamSplitEncode_Float_Generic/65536 12.745 GiB/sec 13.189 GiB/sec 3.480 {'family_index': 5, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 36559} BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536 3.807 GiB/sec 3.938 GiB/sec 3.429 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6135} BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024 4.889 GiB/sec 5.056 GiB/sec 3.411 {'family_index': 9, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 224832} BM_ByteStreamSplitDecode_Float_Sse2/65536 7.650 GiB/sec 7.907 GiB/sec 3.355 {'family_index': 14, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 21896} BM_ByteStreamSplitEncode_Float_Avx2/1024 12.565 GiB/sec 12.949 GiB/sec 3.052 {'family_index': 20, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Avx2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2277598} BM_ByteStreamSplitEncode_Float_Scalar/65536 4.927 GiB/sec 5.073 GiB/sec 2.976 {'family_index': 12, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 14124} BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536 4.739 GiB/sec 4.866 GiB/sec 2.672 {'family_index': 9, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3369} BM_ByteStreamSplitEncode_Float_Generic/1024 12.523 GiB/sec 12.857 GiB/sec 2.667 {'family_index': 5, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2296580} BM_ByteStreamSplitDecode_Double_Generic/1024 13.364 GiB/sec 13.672 GiB/sec 2.301 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1222754} BM_ByteStreamSplitDecode_Double_Sse2/65536 8.820 GiB/sec 8.815 GiB/sec -0.055 {'family_index': 15, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12677} BM_ByteStreamSplitDecode_Double_Sse2/1024 8.979 GiB/sec 8.958 GiB/sec -0.234 {'family_index': 15, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 814584} --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Regressions: (2) --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters BM_ByteStreamSplitEncode_Float_Sse2/65536 11.041 GiB/sec 8.209 GiB/sec -25.647 {'family_index': 16, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Sse2/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 30890} BM_ByteStreamSplitEncode_Float_Sse2/1024 11.055 GiB/sec 8.168 GiB/sec -26.114 {'family_index': 16, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Sse2/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2024308} ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org