This is an automated email from the ASF dual-hosted git repository. caiconghui pushed a commit to branch orc-2.1 in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
commit 8df403f5e396a45aeb6cdc0b2f8f2d6a9b8d4f62 Author: Qi Chen <[email protected]> AuthorDate: Thu May 22 10:28:28 2025 +0800 [Optimize] Optimize stripe footer multiple reads. (#317) --- c++/src/Reader.cc | 9 ++------- c++/src/StripeStream.cc | 5 +++-- c++/src/StripeStream.hh | 8 +++++--- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 619bea0385d..8ec57dc5b1d 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -723,7 +723,7 @@ namespace orc { return std::unique_ptr<StripeInformation>(new StripeInformationImpl( stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(), stripeInfo.footerlength(), stripeInfo.numberofrows(), contents->stream.get(), - *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics)); + *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics, nullptr)); } FileVersion ReaderImpl::getFormatVersion() const { @@ -1229,7 +1229,7 @@ namespace orc { currentStripeInfo.offset(), currentStripeInfo.indexlength(), currentStripeInfo.datalength(), currentStripeInfo.footerlength(), currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); + contents->compression, contents->blockSize, contents->readerMetrics, ¤tStripeFooter)); contents->stream->beforeReadStripe(std::move(currentStripeInformation), selectedColumns); if (sargsApplier) { @@ -1260,11 +1260,6 @@ namespace orc { if (stringDictFilter != nullptr) { std::list<std::string> dictFilterColumnNames; - std::unique_ptr<StripeInformation> currentStripeInformation(new StripeInformationImpl( - currentStripeInfo.offset(), currentStripeInfo.indexlength(), - currentStripeInfo.datalength(), currentStripeInfo.footerlength(), - currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation), dictFilterColumnNames); std::unordered_map<uint64_t, std::string> columnIdToNameMap; diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc index 1f43da4f243..91acb3cddf0 100644 --- a/c++/src/StripeStream.cc +++ b/c++/src/StripeStream.cc @@ -130,13 +130,14 @@ namespace orc { } void StripeInformationImpl::ensureStripeFooterLoaded() const { - if (stripeFooter.get() == nullptr) { + if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) { std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(compression, std::make_unique<SeekableFileInputStream>( stream, offset + indexLength + dataLength, footerLength, memory), blockSize, memory, metrics); - stripeFooter = std::make_unique<proto::StripeFooter>(); + managedStripeFooter = std::make_unique<proto::StripeFooter>(); + stripeFooter = managedStripeFooter.get(); if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) { throw ParseError("Failed to parse the stripe footer"); } diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh index 74bebda6f25..74fabb06e26 100644 --- a/c++/src/StripeStream.hh +++ b/c++/src/StripeStream.hh @@ -132,15 +132,16 @@ namespace orc { MemoryPool& memory; CompressionKind compression; uint64_t blockSize; - mutable std::unique_ptr<proto::StripeFooter> stripeFooter; ReaderMetrics* metrics; + mutable proto::StripeFooter* stripeFooter; + mutable std::unique_ptr<proto::StripeFooter> managedStripeFooter; void ensureStripeFooterLoaded() const; public: StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength, uint64_t _footerLength, uint64_t _numRows, InputStream* _stream, MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize, - ReaderMetrics* _metrics) + ReaderMetrics* _metrics, proto::StripeFooter* _stripeFooter) : offset(_offset), indexLength(_indexLength), dataLength(_dataLength), @@ -150,7 +151,8 @@ namespace orc { memory(_memory), compression(_compression), blockSize(_blockSize), - metrics(_metrics) { + metrics(_metrics), + stripeFooter(_stripeFooter) { // PASS } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
