This is an automated email from the ASF dual-hosted git repository.

caiconghui pushed a commit to branch orc-2.1
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git

commit 8df403f5e396a45aeb6cdc0b2f8f2d6a9b8d4f62
Author: Qi Chen <[email protected]>
AuthorDate: Thu May 22 10:28:28 2025 +0800

    [Optimize] Optimize stripe footer multiple reads. (#317)
---
 c++/src/Reader.cc       | 9 ++-------
 c++/src/StripeStream.cc | 5 +++--
 c++/src/StripeStream.hh | 8 +++++---
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 619bea0385d..8ec57dc5b1d 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -723,7 +723,7 @@ namespace orc {
     return std::unique_ptr<StripeInformation>(new StripeInformationImpl(
         stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(),
         stripeInfo.footerlength(), stripeInfo.numberofrows(), 
contents->stream.get(),
-        *contents->pool, contents->compression, contents->blockSize, 
contents->readerMetrics));
+        *contents->pool, contents->compression, contents->blockSize, 
contents->readerMetrics, nullptr));
   }
 
   FileVersion ReaderImpl::getFormatVersion() const {
@@ -1229,7 +1229,7 @@ namespace orc {
             currentStripeInfo.offset(), currentStripeInfo.indexlength(),
             currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
             currentStripeInfo.numberofrows(), contents->stream.get(), 
*contents->pool,
-            contents->compression, contents->blockSize, 
contents->readerMetrics));
+            contents->compression, contents->blockSize, 
contents->readerMetrics, &currentStripeFooter));
       contents->stream->beforeReadStripe(std::move(currentStripeInformation), 
selectedColumns);
 
       if (sargsApplier) {
@@ -1260,11 +1260,6 @@ namespace orc {
 
       if (stringDictFilter != nullptr) {
         std::list<std::string> dictFilterColumnNames;
-        std::unique_ptr<StripeInformation> currentStripeInformation(new 
StripeInformationImpl(
-            currentStripeInfo.offset(), currentStripeInfo.indexlength(),
-            currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
-            currentStripeInfo.numberofrows(), contents->stream.get(), 
*contents->pool,
-            contents->compression, contents->blockSize, 
contents->readerMetrics));
         
stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation),
                                                     dictFilterColumnNames);
         std::unordered_map<uint64_t, std::string> columnIdToNameMap;
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 1f43da4f243..91acb3cddf0 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -130,13 +130,14 @@ namespace orc {
   }
 
   void StripeInformationImpl::ensureStripeFooterLoaded() const {
-    if (stripeFooter.get() == nullptr) {
+    if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) {
       std::unique_ptr<SeekableInputStream> pbStream =
           createDecompressor(compression,
                              std::make_unique<SeekableFileInputStream>(
                                  stream, offset + indexLength + dataLength, 
footerLength, memory),
                              blockSize, memory, metrics);
-      stripeFooter = std::make_unique<proto::StripeFooter>();
+      managedStripeFooter = std::make_unique<proto::StripeFooter>();
+      stripeFooter = managedStripeFooter.get();
       if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
         throw ParseError("Failed to parse the stripe footer");
       }
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index 74bebda6f25..74fabb06e26 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -132,15 +132,16 @@ namespace orc {
     MemoryPool& memory;
     CompressionKind compression;
     uint64_t blockSize;
-    mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
     ReaderMetrics* metrics;
+    mutable proto::StripeFooter* stripeFooter;
+    mutable std::unique_ptr<proto::StripeFooter> managedStripeFooter;
     void ensureStripeFooterLoaded() const;
 
    public:
     StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t 
_dataLength,
                           uint64_t _footerLength, uint64_t _numRows, 
InputStream* _stream,
                           MemoryPool& _memory, CompressionKind _compression, 
uint64_t _blockSize,
-                          ReaderMetrics* _metrics)
+                          ReaderMetrics* _metrics, proto::StripeFooter* 
_stripeFooter)
         : offset(_offset),
           indexLength(_indexLength),
           dataLength(_dataLength),
@@ -150,7 +151,8 @@ namespace orc {
           memory(_memory),
           compression(_compression),
           blockSize(_blockSize),
-          metrics(_metrics) {
+          metrics(_metrics),
+          stripeFooter(_stripeFooter) {
       // PASS
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to