This is an automated email from the ASF dual-hosted git repository.

gongxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new f1d6691df1e Optimize OrcWriter: precompute varlena indices to skip 
invalid loops
f1d6691df1e is described below

commit f1d6691df1e32cb424dd6d49648b852adf89ac9f
Author: GongXun <[email protected]>
AuthorDate: Thu Oct 9 16:57:32 2025 +0800

    Optimize OrcWriter: precompute varlena indices to skip invalid loops
    
    Reduces per-tuple branching and cache touches, especially on schemas with 
many fixed-length/byval columns
    
    1. Add OrcWriter member: std::vector<int> varlena_slowpath_indices_
    2. Precompute non-byval, typlen == -1 (varlena) column indices in 
constructor from tuple_desc
    3. Update PrepareWriteTuple to iterate only precomputed indices, skipping 
fixed-length and byval columns
---
 contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc | 18 ++++++++++++------
 contrib/pax_storage/src/cpp/storage/orc/porc.h        |  3 +++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc 
b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
index 6c8d49502e5..5c8b52272d0 100644
--- a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
+++ b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
@@ -249,6 +249,16 @@ OrcWriter::OrcWriter(
 
   group_stats_.Initialize(writer_options.enable_min_max_col_idxs,
                           writer_options.enable_bf_col_idxs);
+
+  // Precompute slowpath indices for varlena columns (non-byval and typlen == 
-1)
+  varlena_slowpath_indices_.clear();
+  varlena_slowpath_indices_.reserve(writer_options.rel_tuple_desc->natts);
+  for (int i = 0; i < writer_options.rel_tuple_desc->natts; ++i) {
+    auto attrs = TupleDescAttr(writer_options.rel_tuple_desc, i);
+    if (!attrs->attbyval && attrs->attlen == -1) {
+      varlena_slowpath_indices_.push_back(i);
+    }
+  }
 }
 
 OrcWriter::~OrcWriter() {}
@@ -311,8 +321,6 @@ void OrcWriter::Flush() {
 std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
     TupleTableSlot *table_slot) {
   TupleDesc tuple_desc;
-  int16 type_len;
-  bool type_by_val;
   bool is_null;
   Datum tts_value;
   char type_storage;
@@ -323,18 +331,16 @@ std::vector<std::pair<int, Datum>> 
OrcWriter::PrepareWriteTuple(
   Assert(tuple_desc);
   const auto &required_stats_cols = group_stats_.GetRequiredStatsColsMask();
 
-  for (int i = 0; i < tuple_desc->natts; i++) {
+  for (int i : varlena_slowpath_indices_) {
     bool save_origin_datum;
     auto attrs = TupleDescAttr(tuple_desc, i);
-    type_len = attrs->attlen;
-    type_by_val = attrs->attbyval;
     is_null = table_slot->tts_isnull[i];
     tts_value = table_slot->tts_values[i];
     type_storage = attrs->attstorage;
 
     AssertImply(attrs->attisdropped, is_null);
 
-    if (is_null || type_by_val || type_len != -1) {
+    if (is_null) {
       continue;
     }
 
diff --git a/contrib/pax_storage/src/cpp/storage/orc/porc.h 
b/contrib/pax_storage/src/cpp/storage/orc/porc.h
index 4bfccb6dec3..69a10ffaad7 100644
--- a/contrib/pax_storage/src/cpp/storage/orc/porc.h
+++ b/contrib/pax_storage/src/cpp/storage/orc/porc.h
@@ -138,6 +138,9 @@ class OrcWriter : public MicroPartitionWriter {
   ::pax::porc::proto::Footer file_footer_;
   ::pax::porc::proto::PostScript post_script_;
   ::pax::MicroPartitionStats group_stats_;
+
+  // indices of columns that are non-byval and have typlen == -1 (varlena)
+  std::vector<int> varlena_slowpath_indices_;
 };
 
 class OrcReader : public MicroPartitionReader {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to