wgtmac commented on a change in pull request #1056: URL: https://github.com/apache/orc/pull/1056#discussion_r821429524
########## File path: c++/src/RleDecoderV2.cc ########## @@ -703,80 +680,61 @@ uint64_t RleDecoderV2::nextDelta(int64_t* const data, runLength = static_cast<uint64_t>(firstByte & 0x01) << 8; runLength |= readByte(); ++runLength; // account for first value - runRead = deltaBase = 0; + runRead = 0; + int64_t prevValue; // read the first value stored as vint if (isSigned) { - firstValue = static_cast<int64_t>(readVslong()); + prevValue = readVslong(); } else { - firstValue = static_cast<int64_t>(readVulong()); + prevValue = static_cast<int64_t>(readVulong()); } - prevValue = firstValue; + literals[0] = prevValue; // read the fixed delta value stored as vint (deltas can be negative even // if all number are positive) - deltaBase = static_cast<int64_t>(readVslong()); - } - - uint64_t nRead = std::min(runLength - runRead, numValues); - - uint64_t pos = offset; - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (!notNull || notNull[pos]) break; - } - if (runRead == 0 && pos < offset + nRead) { - data[pos++] = firstValue; - ++runRead; - } - - if (bitSize == 0) { - // add fixed deltas to adjacent values - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; - } - prevValue = data[pos] = prevValue + deltaBase; - ++runRead; - } - } else { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (!notNull || notNull[pos]) break; - } - if (runRead < 2 && pos < offset + nRead) { - // add delta base and first value - prevValue = data[pos++] = firstValue + deltaBase; - ++runRead; - } + int64_t deltaBase = readVslong(); - // write the unpacked values, add it to previous value and store final - // value to result buffer. if the delta base value is negative then it - // is a decreasing sequence else an increasing sequence - uint64_t remaining = (offset + nRead) - pos; - runRead += readLongs(data, pos, remaining, bitSize, notNull); - - if (deltaBase < 0) { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; - } - prevValue = data[pos] = prevValue - data[pos]; + if (bitSize == 0) { + // add fixed deltas to adjacent values + for (uint64_t i = 1; i < runLength; ++i) { + prevValue = literals[i] = prevValue + deltaBase; } } else { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; + prevValue = literals[1] = prevValue + deltaBase; + // write the unpacked values, add it to previous value and store final + // value to result buffer. if the delta base value is negative then it + // is a decreasing sequence else an increasing sequence + readLongs(literals.data(), 2, runLength - 2, bitSize); + if (deltaBase < 0) { + for (uint64_t i = 2; i < runLength; ++i) { + prevValue = literals[i] = prevValue - literals[i]; + } + } else { + for (uint64_t i = 2; i < runLength; ++i) { + prevValue = literals[i] = prevValue + literals[i]; } - prevValue = data[pos] = prevValue + data[pos]; } } } - return nRead; + + return copyDataFromBuffer(data, offset, numValues, notNull); } +uint64_t RleDecoderV2::copyDataFromBuffer(int64_t* data, uint64_t offset, + uint64_t numValues, const char* notNull) { + uint64_t nRead = std::min(runLength - runRead, numValues); + if (notNull) { + for (uint64_t i = offset; i < (offset + nRead); ++i) { + if (notNull[i]) { + data[i] = literals[runRead++]; + } + } + } else { + memcpy(data + offset, literals.data() + runRead, nRead * sizeof(int64_t)); + runRead += nRead; + } + return nRead; +} Review comment: Please add a blank line after this. ########## File path: c++/src/RleDecoderV2.cc ########## @@ -703,80 +680,61 @@ uint64_t RleDecoderV2::nextDelta(int64_t* const data, runLength = static_cast<uint64_t>(firstByte & 0x01) << 8; runLength |= readByte(); ++runLength; // account for first value - runRead = deltaBase = 0; + runRead = 0; + int64_t prevValue; // read the first value stored as vint if (isSigned) { - firstValue = static_cast<int64_t>(readVslong()); + prevValue = readVslong(); } else { - firstValue = static_cast<int64_t>(readVulong()); + prevValue = static_cast<int64_t>(readVulong()); } - prevValue = firstValue; + literals[0] = prevValue; // read the fixed delta value stored as vint (deltas can be negative even // if all number are positive) - deltaBase = static_cast<int64_t>(readVslong()); - } - - uint64_t nRead = std::min(runLength - runRead, numValues); - - uint64_t pos = offset; - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (!notNull || notNull[pos]) break; - } - if (runRead == 0 && pos < offset + nRead) { - data[pos++] = firstValue; - ++runRead; - } - - if (bitSize == 0) { - // add fixed deltas to adjacent values - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; - } - prevValue = data[pos] = prevValue + deltaBase; - ++runRead; - } - } else { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (!notNull || notNull[pos]) break; - } - if (runRead < 2 && pos < offset + nRead) { - // add delta base and first value - prevValue = data[pos++] = firstValue + deltaBase; - ++runRead; - } + int64_t deltaBase = readVslong(); - // write the unpacked values, add it to previous value and store final - // value to result buffer. if the delta base value is negative then it - // is a decreasing sequence else an increasing sequence - uint64_t remaining = (offset + nRead) - pos; - runRead += readLongs(data, pos, remaining, bitSize, notNull); - - if (deltaBase < 0) { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; - } - prevValue = data[pos] = prevValue - data[pos]; + if (bitSize == 0) { + // add fixed deltas to adjacent values + for (uint64_t i = 1; i < runLength; ++i) { + prevValue = literals[i] = prevValue + deltaBase; } } else { - for ( ; pos < offset + nRead; ++pos) { - // skip null positions - if (notNull && !notNull[pos]) { - continue; + prevValue = literals[1] = prevValue + deltaBase; + // write the unpacked values, add it to previous value and store final + // value to result buffer. if the delta base value is negative then it + // is a decreasing sequence else an increasing sequence + readLongs(literals.data(), 2, runLength - 2, bitSize); Review comment: Is runLength here guaranteed to be >= 2? ########## File path: c++/src/RleDecoderV2.cc ########## @@ -96,7 +95,7 @@ void RleDecoderV2::readLongsWithoutNulls(int64_t *data, uint64_t offset, uint64_ return; default: // Fallback to the default implementation for deprecated bit size. - readLongs(data, offset, len, fbs); + readLongsSlow(data, offset, len, fbs); Review comment: I'd prefer calling it rolledUnpackLongs. WDYT? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org