This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fcf9dd6aa5 GH-49385: Clarify empty schema contract on stream_reader 
(#49386)
fcf9dd6aa5 is described below

commit fcf9dd6aa50731c2b7d83e495ecbc04b002968d6
Author: emkornfield <[email protected]>
AuthorDate: Wed Mar 4 11:14:40 2026 -0800

    GH-49385: Clarify empty schema contract on stream_reader (#49386)
    
    ### Rationale for this change
    
    StreamReader inherently does not support empty schemas. Guard this case
    with an exception.
    
    ### What changes are included in this PR?
    
    Added validation around the parquet reader passed in.
    
    ### Are these changes tested?
    
    Yes added unit tests.
    
    ### Are there any user-facing changes?
    
    A change that might be debatable is the constructor for this class can
    now throw, but it was never marked noexcept.
    
    **This PR contains a "Critical Fix".**
    
    * GitHub Issue: #49385
---
 cpp/src/parquet/stream_reader.cc      |  3 +++
 cpp/src/parquet/stream_reader.h       |  1 +
 cpp/src/parquet/stream_reader_test.cc | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/cpp/src/parquet/stream_reader.cc b/cpp/src/parquet/stream_reader.cc
index d3353aa334..6a8dfa8f63 100644
--- a/cpp/src/parquet/stream_reader.cc
+++ b/cpp/src/parquet/stream_reader.cc
@@ -50,6 +50,9 @@ StreamReader::StreamReader(std::unique_ptr<ParquetFileReader> 
reader)
   auto schema = file_metadata_->schema();
   auto group_node = schema->group_node();
 
+  if (schema->num_columns() == 0) {
+    throw ParquetException("StreamReader does not support empty schemas.");
+  }
   nodes_.resize(schema->num_columns());
 
   for (auto i = 0; i < schema->num_columns(); ++i) {
diff --git a/cpp/src/parquet/stream_reader.h b/cpp/src/parquet/stream_reader.h
index a7dadac92c..a5f6e534d8 100644
--- a/cpp/src/parquet/stream_reader.h
+++ b/cpp/src/parquet/stream_reader.h
@@ -65,6 +65,7 @@ class PARQUET_EXPORT StreamReader {
   //      assigned afterwards.
   StreamReader() = default;
 
+  /// Reader must have at least one field defined in its schema.
   explicit StreamReader(std::unique_ptr<ParquetFileReader> reader);
 
   ~StreamReader() = default;
diff --git a/cpp/src/parquet/stream_reader_test.cc 
b/cpp/src/parquet/stream_reader_test.cc
index 04140f6ad0..8db21fb9e8 100644
--- a/cpp/src/parquet/stream_reader_test.cc
+++ b/cpp/src/parquet/stream_reader_test.cc
@@ -24,8 +24,10 @@
 #include <memory>
 
 #include "arrow/io/file.h"
+#include "arrow/io/memory.h"
 #include "arrow/util/decimal.h"
 #include "parquet/exception.h"
+#include "parquet/file_writer.h"
 #include "parquet/test_util.h"
 
 namespace parquet {
@@ -251,6 +253,22 @@ TEST_F(TestStreamReader, DefaultConstructed) {
   EXPECT_EQ(0, os.SkipRows(100));
 }
 
+TEST(StreamReaderEmptySchema, ThrowsOnConstruction) {
+  PARQUET_ASSIGN_OR_THROW(auto buffer_os, 
::arrow::io::BufferOutputStream::Create());
+
+  auto empty_schema = std::static_pointer_cast<schema::GroupNode>(
+      schema::GroupNode::Make("schema", Repetition::REQUIRED, 
schema::NodeVector{}));
+
+  auto file_writer = ParquetFileWriter::Open(buffer_os, empty_schema);
+  file_writer->Close();
+
+  PARQUET_ASSIGN_OR_THROW(auto buffer, buffer_os->Finish());
+  auto buffer_reader = std::make_shared<::arrow::io::BufferReader>(buffer);
+  auto file_reader = ParquetFileReader::Open(buffer_reader);
+
+  EXPECT_THROW(StreamReader{std::move(file_reader)}, ParquetException);
+}
+
 TEST_F(TestStreamReader, TypeChecking) {
   bool b;
   std::string s;

Reply via email to