zanmato1984 commented on code in PR #43931:
URL: https://github.com/apache/arrow/pull/43931#discussion_r1741989272


##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -132,6 +147,8 @@ struct ARROW_EXPORT DictionaryKeyEncoder : 
FixedWidthKeyEncoder {
   Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t 
length,
                                             MemoryPool* pool) override;
 
+  // Uses `GetEncoderInfo` in `FixedWidthKeyEncoder`

Review Comment:
   What's this line for?



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row
+  // doesn't need to maintain the column offsets. In this case, the
+  // offsets_.size() would be also be empty.
+  int32_t fixed_width_length_{kInvalidFixedWidthOffset};
+  int32_t fixed_with_row_count_{0};
   // offsets_ vector stores the starting position (offset) of each encoded row
   // within the bytes_ vector. This allows for quick access to individual rows.
   //
   // The size would be num_rows + 1 if not empty, the last element is the total
   // length of the bytes_ vector.
+  //
+  // When all columns in a row are Fixed-width or NA, the offsets_ can be

Review Comment:
   ```suggestion
     // When all columns in a row are fixed-width or NA, the offsets_ can be
   ```



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row
+  // doesn't need to maintain the column offsets. In this case, the
+  // offsets_.size() would be also be empty.
+  int32_t fixed_width_length_{kInvalidFixedWidthOffset};
+  int32_t fixed_with_row_count_{0};

Review Comment:
   Typo?



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row
+  // doesn't need to maintain the column offsets. In this case, the
+  // offsets_.size() would be also be empty.
+  int32_t fixed_width_length_{kInvalidFixedWidthOffset};
+  int32_t fixed_with_row_count_{0};

Review Comment:
   ```suggestion
     int32_t fixed_width_row_count_{0};
   ```



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row

Review Comment:
   ```suggestion
     // When all columns in a row are fixed-width or NA, the encoded row
   ```



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row
+  // doesn't need to maintain the column offsets. In this case, the

Review Comment:
   ```suggestion
     // doesn't need to maintain the row offsets. In this case, the
   ```



##########
cpp/src/arrow/compute/row/row_encoder_internal.h:
##########
@@ -348,23 +373,49 @@ class ARROW_EXPORT RowEncoder {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
                          encoded_nulls_.size());
     }
-    int32_t row_length = offsets_[i + 1] - offsets_[i];
-    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
offsets_[i]),
+    int32_t row_length = 0;
+    int32_t row_offset = 0;
+    if (IsFixedWidth()) {
+      row_length = fixed_width_length_;
+      row_offset = i * fixed_width_length_;
+    } else {
+      row_length = offsets_[i + 1] - offsets_[i];
+      row_offset = offsets_[i];
+    }
+    return std::string(reinterpret_cast<const char*>(bytes_.data() + 
row_offset),
                        row_length);
   }
 
   int32_t num_rows() const {
+    if (IsFixedWidth()) {
+      return fixed_with_row_count_;
+    }
     return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
+  Status EncodeAndAppendForFixedWidth(const ExecSpan& batch);
+  bool IsFixedWidth() const noexcept {
+    return fixed_width_length_ != kInvalidFixedWidthOffset;
+  }
+
+ private:
+  static constexpr int32_t kInvalidFixedWidthOffset = 1;
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // When all columns in a row are Fixed-width or NA, the encoded row
+  // doesn't need to maintain the column offsets. In this case, the
+  // offsets_.size() would be also be empty.

Review Comment:
   ```suggestion
     // offsets_.size() would be also empty.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to