guiyanakuang commented on a change in pull request #958:
URL: https://github.com/apache/orc/pull/958#discussion_r742507137
##########
File path: c++/src/LzoDecompressor.cc
##########
@@ -312,13 +312,11 @@ namespace orc {
output += SIZE_OF_INT;
matchAddress += increment32;
- *reinterpret_cast<int32_t*>(output) =
- *reinterpret_cast<int32_t*>(matchAddress);
+ memcpy(output, matchAddress, SIZE_OF_INT);
Review comment:
@wgtmac you are right, the compiler does optimize memcpy, the
performance of both ways is similar in different compilers, in older versions
of the compiler expand the assignment will be faster.
I agree to wrap a bit_cast function for binary copy between different types.
@dongjoon-hyun, So I don't think there's any performance loss here compared
to the original.



```c++
#include <string.h>
static void use_memcpy(benchmark::State& state) {
auto size = state.range(0);
char buf[size];
for (int i = 0; i < 8; ++i) {
buf[i] = 'a';
}
for (auto _ : state) {
char *output = buf + 8;
char *matchAddress = buf;
char *matchOutputLimit = buf + size;
while (output < matchOutputLimit) {
memcpy(output, matchAddress, 8);
matchAddress += 8;
output += 8;
}
}
}
static void use_expanded_assignment(benchmark::State& state) {
auto size = state.range(0);
char buf[size];
for (int i = 0; i < 8; ++i) {
buf[i] = 'a';
}
for (auto _ : state) {
char *output = buf + 8;
char *matchAddress = buf;
char *matchOutputLimit = buf + size;
while (output < matchOutputLimit) {
output[0] = *matchOutputLimit;
output[1] = *(matchOutputLimit + 1);
output[2] = *(matchOutputLimit + 2);
output[3] = *(matchOutputLimit + 3);
output[4] = *(matchOutputLimit + 4);
output[5] = *(matchOutputLimit + 5);
output[6] = *(matchOutputLimit + 6);
output[7] = *(matchOutputLimit + 7);
matchAddress += 8;
output += 8;
}
}
}
static void use_reinterpret_assignment(benchmark::State& state) {
auto size = state.range(0);
char buf[size];
for (int i = 0; i < 8; ++i) {
buf[i] = 'a';
}
for (auto _ : state) {
char *output = buf + 8;
char *matchAddress = buf;
char *matchOutputLimit = buf + size;
while (output < matchOutputLimit) {
*reinterpret_cast<int64_t*>(output) =
*reinterpret_cast<int64_t*>(matchAddress);
matchAddress += 8;
output += 8;
}
}
}
BENCHMARK(use_memcpy)->Arg(100000);
BENCHMARK(use_expanded_assignment)->Arg(100000);
BENCHMARK(use_reinterpret_assignment)->Arg(100000);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]