gf2121 commented on pull request #2113: URL: https://github.com/apache/lucene-solr/pull/2113#issuecomment-741520801
> You need to either return a value from the benchmark methods or call blackhole.consume, otherwise the JVM will detect that everything is unused outside of the scope and optimize it away. That should get you some different results. Thank you for being thorough! Thank you for the clue! Based on your guidance, I tried some more benchmark, but find array val is alway faster... here are the codes and results (code is used to shows the way that i tried to prevent jvm optimize, so only one method is enough). 1. return an array result ``` public long[] decode0() { for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = (TMP[tmpIdx+0] & MASKS16_1[0]) << 14; l0 |= (TMP[tmpIdx+1] & MASKS16_1[0]) << 13; l0 |= (TMP[tmpIdx+2] & MASKS16_1[0]) << 12; l0 |= (TMP[tmpIdx+3] & MASKS16_1[0]) << 11; l0 |= (TMP[tmpIdx+4] & MASKS16_1[0]) << 10; l0 |= (TMP[tmpIdx+5] & MASKS16_1[0]) << 9; l0 |= (TMP[tmpIdx+6] & MASKS16_1[0]) << 8; l0 |= (TMP[tmpIdx+7] & MASKS16_1[0]) << 7; l0 |= (TMP[tmpIdx+8] & MASKS16_1[0]) << 6; l0 |= (TMP[tmpIdx+9] & MASKS16_1[0]) << 5; l0 |= (TMP[tmpIdx+10] & MASKS16_1[0]) << 4; l0 |= (TMP[tmpIdx+11] & MASKS16_1[0]) << 3; l0 |= (TMP[tmpIdx+12] & MASKS16_1[0]) << 2; l0 |= (TMP[tmpIdx+13] & MASKS16_1[0]) << 1; l0 |= (TMP[tmpIdx+14] & MASKS16_1[0]) << 0; ARR[longsIdx+0] = l0; } return ARR; } ``` method | speed (ops/s) ------------ | ------------- MyBenchmark.decode0 | 92215691.271 ± 1149229.830 MyBenchmark.decode1 | 62019521.428 ± 4268837.164 MyBenchmark.decode2 | 62595196.347 ± 1434012.058 2. return an long result ``` public long decode0() { for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = (TMP[tmpIdx+0] & MASKS16_1[0]) << 14; l0 |= (TMP[tmpIdx+1] & MASKS16_1[0]) << 13; l0 |= (TMP[tmpIdx+2] & MASKS16_1[0]) << 12; l0 |= (TMP[tmpIdx+3] & MASKS16_1[0]) << 11; l0 |= (TMP[tmpIdx+4] & MASKS16_1[0]) << 10; l0 |= (TMP[tmpIdx+5] & MASKS16_1[0]) << 9; l0 |= (TMP[tmpIdx+6] & MASKS16_1[0]) << 8; l0 |= (TMP[tmpIdx+7] & MASKS16_1[0]) << 7; l0 |= (TMP[tmpIdx+8] & MASKS16_1[0]) << 6; l0 |= (TMP[tmpIdx+9] & MASKS16_1[0]) << 5; l0 |= (TMP[tmpIdx+10] & MASKS16_1[0]) << 4; l0 |= (TMP[tmpIdx+11] & MASKS16_1[0]) << 3; l0 |= (TMP[tmpIdx+12] & MASKS16_1[0]) << 2; l0 |= (TMP[tmpIdx+13] & MASKS16_1[0]) << 1; l0 |= (TMP[tmpIdx+14] & MASKS16_1[0]) << 0; ARR[longsIdx+0] = l0; } return ARR[31]; } ``` method | speed (ops/s) ------------ | ------------- MyBenchmark.decode0 | 92470935.234 ± 3525240.576 MyBenchmark.decode1 | 62389057.277 ± 567747.489 MyBenchmark.decode2 | 62141559.925 ± 1012364.417 3. blackwhole consume last ``` public void decode0(Blackhole blackhole) { for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = (TMP[tmpIdx+0] & MASKS16_1[0]) << 14; l0 |= (TMP[tmpIdx+1] & MASKS16_1[0]) << 13; l0 |= (TMP[tmpIdx+2] & MASKS16_1[0]) << 12; l0 |= (TMP[tmpIdx+3] & MASKS16_1[0]) << 11; l0 |= (TMP[tmpIdx+4] & MASKS16_1[0]) << 10; l0 |= (TMP[tmpIdx+5] & MASKS16_1[0]) << 9; l0 |= (TMP[tmpIdx+6] & MASKS16_1[0]) << 8; l0 |= (TMP[tmpIdx+7] & MASKS16_1[0]) << 7; l0 |= (TMP[tmpIdx+8] & MASKS16_1[0]) << 6; l0 |= (TMP[tmpIdx+9] & MASKS16_1[0]) << 5; l0 |= (TMP[tmpIdx+10] & MASKS16_1[0]) << 4; l0 |= (TMP[tmpIdx+11] & MASKS16_1[0]) << 3; l0 |= (TMP[tmpIdx+12] & MASKS16_1[0]) << 2; l0 |= (TMP[tmpIdx+13] & MASKS16_1[0]) << 1; l0 |= (TMP[tmpIdx+14] & MASKS16_1[0]) << 0; ARR[longsIdx+0] = l0; } blackhole.consume(ARR[30]); blackhole.consume(ARR[31]); } ``` method | speed (ops/s) ------------ | ------------- MyBenchmark.decode0 | 79570016.826 ± 1210338.335 MyBenchmark.decode1 | 58225242.201 ± 905039.184 MyBenchmark.decode2 | 58524381.688 ± 585220.494 4. blackwhole consume in loop ``` public void decode0(Blackhole blackhole) { for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = (TMP[tmpIdx+0] & MASKS16_1[0]) << 14; l0 |= (TMP[tmpIdx+1] & MASKS16_1[0]) << 13; l0 |= (TMP[tmpIdx+2] & MASKS16_1[0]) << 12; l0 |= (TMP[tmpIdx+3] & MASKS16_1[0]) << 11; l0 |= (TMP[tmpIdx+4] & MASKS16_1[0]) << 10; l0 |= (TMP[tmpIdx+5] & MASKS16_1[0]) << 9; l0 |= (TMP[tmpIdx+6] & MASKS16_1[0]) << 8; l0 |= (TMP[tmpIdx+7] & MASKS16_1[0]) << 7; l0 |= (TMP[tmpIdx+8] & MASKS16_1[0]) << 6; l0 |= (TMP[tmpIdx+9] & MASKS16_1[0]) << 5; l0 |= (TMP[tmpIdx+10] & MASKS16_1[0]) << 4; l0 |= (TMP[tmpIdx+11] & MASKS16_1[0]) << 3; l0 |= (TMP[tmpIdx+12] & MASKS16_1[0]) << 2; l0 |= (TMP[tmpIdx+13] & MASKS16_1[0]) << 1; l0 |= (TMP[tmpIdx+14] & MASKS16_1[0]) << 0; blackhole.consume(l0); ARR[longsIdx+0] = l0; } } ``` method | speed (ops/s) ------------ | ------------- MyBenchmark.decode0 | 62292723.905 ± 185021.358 MyBenchmark.decode1 | 43453980.399 ± 645575.911 MyBenchmark.decode2 | 43369008.884 ± 751782.611 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org