AntoinePrv commented on PR #46789: URL: https://github.com/apache/arrow/pull/46789#issuecomment-2965645069
`archery benchmark diff --suite-filter=parquet-encoding --benchmark-filter='ByteStreamSplit'` <details> <summary>Show benchmark results</summary> ``` -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Non-regressions: (44) -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- benchmark baseline contender change % counters BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536 6.967 GiB/sec 96.028 GiB/sec 1278.358 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 40039} BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024 7.008 GiB/sec 80.835 GiB/sec 1053.458 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2561954} BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536 5.514 GiB/sec 15.146 GiB/sec 174.667 {'family_index': 7, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 31055} BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024 5.527 GiB/sec 14.929 GiB/sec 170.123 {'family_index': 7, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<2>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2072177} BM_ByteStreamSplitEncode_Double_Generic/1024 10.424 GiB/sec 17.349 GiB/sec 66.429 {'family_index': 6, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 961842} BM_ByteStreamSplitEncode_Double_Neon/4096 10.955 GiB/sec 17.807 GiB/sec 62.539 {'family_index': 17, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 252562} BM_ByteStreamSplitEncode_Double_Neon/1024 10.984 GiB/sec 17.760 GiB/sec 61.688 {'family_index': 17, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 996966} BM_ByteStreamSplitEncode_Float_Neon/32768 8.316 GiB/sec 12.530 GiB/sec 50.674 {'family_index': 16, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 48757} BM_ByteStreamSplitEncode_Float_Neon/1024 8.517 GiB/sec 12.788 GiB/sec 50.146 {'family_index': 16, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1559934} BM_ByteStreamSplitEncode_Float_Neon/4096 8.585 GiB/sec 12.839 GiB/sec 49.545 {'family_index': 16, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 393889} BM_ByteStreamSplitEncode_Float_Neon/65536 8.379 GiB/sec 12.405 GiB/sec 48.038 {'family_index': 16, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitEncode_Float_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 24227} BM_ByteStreamSplitEncode_Float_Generic/65536 8.378 GiB/sec 12.370 GiB/sec 47.660 {'family_index': 5, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 24709} BM_ByteStreamSplitEncode_Float_Generic/1024 8.725 GiB/sec 12.785 GiB/sec 46.533 {'family_index': 5, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1601790} BM_ByteStreamSplitEncode_Double_Generic/65536 8.978 GiB/sec 12.445 GiB/sec 38.616 {'family_index': 6, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13637} BM_ByteStreamSplitEncode_Double_Neon/32768 9.748 GiB/sec 13.113 GiB/sec 34.513 {'family_index': 17, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 27479} BM_ByteStreamSplitEncode_Double_Neon/65536 9.869 GiB/sec 12.707 GiB/sec 28.754 {'family_index': 17, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitEncode_Double_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13956} BM_ByteStreamSplitDecode_Float_Neon/1024 11.112 GiB/sec 11.741 GiB/sec 5.661 {'family_index': 14, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2107120} BM_ByteStreamSplitDecode_Float_Neon/65536 11.203 GiB/sec 11.668 GiB/sec 4.150 {'family_index': 14, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32359} BM_ByteStreamSplitDecode_Float_Neon/32768 11.282 GiB/sec 11.697 GiB/sec 3.683 {'family_index': 14, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 64967} BM_ByteStreamSplitDecode_Float_Generic/65536 11.235 GiB/sec 11.541 GiB/sec 2.724 {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32346} BM_ByteStreamSplitDecode_Float_Neon/4096 11.636 GiB/sec 11.951 GiB/sec 2.703 {'family_index': 14, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 529601} BM_ByteStreamSplitEncode_Double_Scalar/65536 5.630 GiB/sec 5.708 GiB/sec 1.396 {'family_index': 13, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7926} BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024 5.782 GiB/sec 5.855 GiB/sec 1.257 {'family_index': 8, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 610900} BM_ByteStreamSplitDecode_Float_Generic/1024 11.655 GiB/sec 11.797 GiB/sec 1.222 {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2142232} BM_ByteStreamSplitDecode_Float_Scalar/65536 6.898 GiB/sec 6.930 GiB/sec 0.466 {'family_index': 10, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19864} BM_ByteStreamSplitDecode_Double_Neon/4096 7.516 GiB/sec 7.543 GiB/sec 0.362 {'family_index': 15, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/4096', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 171879} BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024 5.877 GiB/sec 5.889 GiB/sec 0.212 {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 270124} BM_ByteStreamSplitEncode_Double_Scalar/1024 5.799 GiB/sec 5.808 GiB/sec 0.159 {'family_index': 13, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 532964} BM_ByteStreamSplitDecode_Double_Generic/1024 7.526 GiB/sec 7.521 GiB/sec -0.056 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 691256} BM_ByteStreamSplitDecode_Double_Neon/32768 7.300 GiB/sec 7.296 GiB/sec -0.058 {'family_index': 15, 'per_family_instance_index': 2, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/32768', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 20832} BM_ByteStreamSplitDecode_Float_Scalar/1024 6.897 GiB/sec 6.891 GiB/sec -0.079 {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1272218} BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536 5.760 GiB/sec 5.755 GiB/sec -0.091 {'family_index': 8, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 9473} BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536 3.345 GiB/sec 3.340 GiB/sec -0.154 {'family_index': 9, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2353} BM_ByteStreamSplitDecode_Double_Neon/1024 7.551 GiB/sec 7.529 GiB/sec -0.289 {'family_index': 15, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 671811} BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024 5.874 GiB/sec 5.855 GiB/sec -0.322 {'family_index': 9, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_FLBA_Generic<16>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 267179} BM_ByteStreamSplitEncode_Float_Scalar/65536 5.694 GiB/sec 5.673 GiB/sec -0.357 {'family_index': 12, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16373} BM_ByteStreamSplitDecode_Double_Scalar/65536 6.361 GiB/sec 6.338 GiB/sec -0.373 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 9129} BM_ByteStreamSplitDecode_Double_Generic/65536 7.312 GiB/sec 7.280 GiB/sec -0.448 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_Double_Generic/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10472} BM_ByteStreamSplitDecode_Double_Scalar/1024 6.890 GiB/sec 6.856 GiB/sec -0.484 {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_Double_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 629468} BM_ByteStreamSplitEncode_Float_Scalar/1024 5.791 GiB/sec 5.757 GiB/sec -0.581 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitEncode_Float_Scalar/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1062070} BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024 6.877 GiB/sec 6.825 GiB/sec -0.751 {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/1024', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 720046} BM_ByteStreamSplitDecode_Double_Neon/65536 7.319 GiB/sec 7.253 GiB/sec -0.906 {'family_index': 15, 'per_family_instance_index': 3, 'run_name': 'BM_ByteStreamSplitDecode_Double_Neon/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10472} BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536 6.619 GiB/sec 6.547 GiB/sec -1.080 {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<7>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10868} BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536 5.438 GiB/sec 5.358 GiB/sec -1.470 {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'BM_ByteStreamSplitDecode_FLBA_Generic<16>/65536', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3899} ``` <details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org