guiyanakuang commented on a change in pull request #958:
URL: https://github.com/apache/orc/pull/958#discussion_r742507137



##########
File path: c++/src/LzoDecompressor.cc
##########
@@ -312,13 +312,11 @@ namespace orc {
               output += SIZE_OF_INT;
               matchAddress += increment32;
 
-              *reinterpret_cast<int32_t*>(output) =
-                *reinterpret_cast<int32_t*>(matchAddress);
+              memcpy(output, matchAddress, SIZE_OF_INT);

Review comment:
       @wgtmac  you are right, the compiler does optimize memcpy, the 
performance of both ways is similar in different compilers, in older versions 
of the compiler expand the assignment will be faster.
   I agree to wrap a bit_cast function for binary copy between different types. 
   
   @dongjoon-hyun, So I don't think there's any performance loss here compared 
to the original.
   
   
![WX20211104-104627](https://user-images.githubusercontent.com/4069905/140250827-6282739b-c060-43fa-b348-87ede15129fc.png)
   
![WX20211104-105010](https://user-images.githubusercontent.com/4069905/140250854-cf6da388-18d8-42f0-8cd6-18468633acc3.png)
   
![WX20211104-105348](https://user-images.githubusercontent.com/4069905/140250863-6c99cfcb-0b72-4ee0-a6b0-ac31344ac771.png)
   
   ```c++
   #include <string.h>
   
   static void use_memcpy(benchmark::State& state) {
     auto size = state.range(0);
     char buf[size];
     for (int i = 0; i < 8; ++i) {
       buf[i] = 'a';
     }
     for (auto _ : state) {
       char *output = buf + 8;
       char *matchAddress = buf;
       char *matchOutputLimit = buf + size;
       while (output < matchOutputLimit) {
         memcpy(output, matchAddress, 8);
         matchAddress += 8;
         output += 8;
       }
     }
   }
   
   static void use_expanded_assignment(benchmark::State& state) {
     auto size = state.range(0);
     char buf[size];
     for (int i = 0; i < 8; ++i) {
       buf[i] = 'a';
     }
     for (auto _ : state) {
       char *output = buf + 8;
       char *matchAddress = buf;
       char *matchOutputLimit = buf + size;
       while (output < matchOutputLimit) {
         output[0] = *matchOutputLimit;
         output[1] = *(matchOutputLimit + 1);
         output[2] = *(matchOutputLimit + 2);
         output[3] = *(matchOutputLimit + 3);
         output[4] = *(matchOutputLimit + 4);
         output[5] = *(matchOutputLimit + 5);
         output[6] = *(matchOutputLimit + 6);
         output[7] = *(matchOutputLimit + 7);
         matchAddress += 8;
         output += 8;
       }
     }
   }
   
   static void use_reinterpret_assignment(benchmark::State& state) {
     auto size = state.range(0);
     char buf[size];
     for (int i = 0; i < 8; ++i) {
       buf[i] = 'a';
     }
     for (auto _ : state) {
       char *output = buf + 8;
       char *matchAddress = buf;
       char *matchOutputLimit = buf + size;
       while (output < matchOutputLimit) {
         *reinterpret_cast<int64_t*>(output) =
                   *reinterpret_cast<int64_t*>(matchAddress);
         matchAddress += 8;
         output += 8;
       }
     }
   }
   
   BENCHMARK(use_memcpy)->Arg(100000);
   
   BENCHMARK(use_expanded_assignment)->Arg(100000);
   
   BENCHMARK(use_reinterpret_assignment)->Arg(100000);
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to