[GitHub] [orc] stiga-huang commented on a change in pull request #1056: ORC-1122: [C++] Add buffer to decode the whole run in RleDecoderV2

2022-03-09 Thread GitBox


stiga-huang commented on a change in pull request #1056:
URL: https://github.com/apache/orc/pull/1056#discussion_r822592473



##
File path: c++/src/RleDecoderV2.cc
##
@@ -703,80 +680,61 @@ uint64_t RleDecoderV2::nextDelta(int64_t* const data,
 runLength = static_cast(firstByte & 0x01) << 8;
 runLength |= readByte();
 ++runLength; // account for first value
-runRead = deltaBase = 0;
+runRead = 0;
 
+int64_t prevValue;
 // read the first value stored as vint
 if (isSigned) {
-  firstValue = static_cast(readVslong());
+  prevValue = readVslong();
 } else {
-  firstValue = static_cast(readVulong());
+  prevValue = static_cast(readVulong());
 }
 
-prevValue = firstValue;
+literals[0] = prevValue;
 
 // read the fixed delta value stored as vint (deltas can be negative even
 // if all number are positive)
-deltaBase = static_cast(readVslong());
-  }
-
-  uint64_t nRead = std::min(runLength - runRead, numValues);
-
-  uint64_t pos = offset;
-  for ( ; pos < offset + nRead; ++pos) {
-// skip null positions
-if (!notNull || notNull[pos]) break;
-  }
-  if (runRead == 0 && pos < offset + nRead) {
-data[pos++] = firstValue;
-++runRead;
-  }
-
-  if (bitSize == 0) {
-// add fixed deltas to adjacent values
-for ( ; pos < offset + nRead; ++pos) {
-  // skip null positions
-  if (notNull && !notNull[pos]) {
-continue;
-  }
-  prevValue = data[pos] = prevValue + deltaBase;
-  ++runRead;
-}
-  } else {
-for ( ; pos < offset + nRead; ++pos) {
-  // skip null positions
-  if (!notNull || notNull[pos]) break;
-}
-if (runRead < 2 && pos < offset + nRead) {
-  // add delta base and first value
-  prevValue = data[pos++] = firstValue + deltaBase;
-  ++runRead;
-}
+int64_t deltaBase = readVslong();
 
-// write the unpacked values, add it to previous value and store final
-// value to result buffer. if the delta base value is negative then it
-// is a decreasing sequence else an increasing sequence
-uint64_t remaining = (offset + nRead) - pos;
-runRead += readLongs(data, pos, remaining, bitSize, notNull);
-
-if (deltaBase < 0) {
-  for ( ; pos < offset + nRead; ++pos) {
-// skip null positions
-if (notNull && !notNull[pos]) {
-  continue;
-}
-prevValue = data[pos] = prevValue - data[pos];
+if (bitSize == 0) {
+  // add fixed deltas to adjacent values
+  for (uint64_t i = 1; i < runLength; ++i) {
+prevValue = literals[i] = prevValue + deltaBase;
   }
 } else {
-  for ( ; pos < offset + nRead; ++pos) {
-// skip null positions
-if (notNull && !notNull[pos]) {
-  continue;
+  prevValue = literals[1] = prevValue + deltaBase;
+  // write the unpacked values, add it to previous value and store final
+  // value to result buffer. if the delta base value is negative then it
+  // is a decreasing sequence else an increasing sequence
+  readLongs(literals.data(), 2, runLength - 2, bitSize);
+  if (deltaBase < 0) {
+for (uint64_t i = 2; i < runLength; ++i) {
+  prevValue = literals[i] = prevValue - literals[i];
+}
+  } else {
+for (uint64_t i = 2; i < runLength; ++i) {
+  prevValue = literals[i] = prevValue + literals[i];
 }
-prevValue = data[pos] = prevValue + data[pos];
   }
 }
   }
-  return nRead;
+
+  return copyDataFromBuffer(data, offset, numValues, notNull);
 }
 
+uint64_t RleDecoderV2::copyDataFromBuffer(int64_t* data, uint64_t offset,
+  uint64_t numValues, const char* 
notNull) {
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+  if (notNull) {
+for (uint64_t i = offset; i < (offset + nRead); ++i) {
+  if (notNull[i]) {
+data[i] = literals[runRead++];
+  }
+}
+  } else {
+memcpy(data + offset, literals.data() + runRead, nRead * sizeof(int64_t));
+runRead += nRead;
+  }
+  return nRead;
+}

Review comment:
   Done.

##
File path: c++/src/RleDecoderV2.cc
##
@@ -703,80 +680,61 @@ uint64_t RleDecoderV2::nextDelta(int64_t* const data,
 runLength = static_cast(firstByte & 0x01) << 8;
 runLength |= readByte();
 ++runLength; // account for first value
-runRead = deltaBase = 0;
+runRead = 0;
 
+int64_t prevValue;
 // read the first value stored as vint
 if (isSigned) {
-  firstValue = static_cast(readVslong());
+  prevValue = readVslong();
 } else {
-  firstValue = static_cast(readVulong());
+  prevValue = static_cast(readVulong());
 }
 
-prevValue = firstValue;
+literals[0] = prevValue;
 
 // read the fixed delta value stored as vint (deltas can be negative even
 // if all number are positive)
-deltaBase = static_cast(readVslong());
-  }
-
-  uint64_t nRead = std

[GitHub] [orc] stiga-huang commented on a change in pull request #1056: ORC-1122: [C++] Add buffer to decode the whole run in RleDecoderV2

2022-03-04 Thread GitBox


stiga-huang commented on a change in pull request #1056:
URL: https://github.com/apache/orc/pull/1056#discussion_r820005368



##
File path: c++/src/RLEv2.hh
##
@@ -25,6 +25,7 @@
 
 #include 
 
+#define MAX_LITERAL_SIZE 512

Review comment:
   Yeah, so the encoder and the decoder use the same limit.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org