This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d7dc586c11 GH-45048: [C++][Parquet] Deprecate unused `chunk_size`
parameter in `parquet::arrow::FileWriter::NewRowGroup()` (#45088)
d7dc586c11 is described below
commit d7dc586c11b5ce903cadc946c55dcacadcc85668
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Jan 15 14:30:10 2025 +0100
GH-45048: [C++][Parquet] Deprecate unused `chunk_size` parameter in
`parquet::arrow::FileWriter::NewRowGroup()` (#45088)
### Rationale for this change
Just noticed that the implementation doesn't use the parameter.
### What changes are included in this PR?
Remove the parameter from `NewRowGroup()`
### Are these changes tested?
### Are there any user-facing changes?
The `chunk_size` parameter is now deprecated.
* GitHub Issue: #45048
Lead-authored-by: Krisztian Szucs <[email protected]>
Co-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
c_glib/parquet-glib/arrow-file-writer.cpp | 7 ++-----
c_glib/parquet-glib/arrow-file-writer.h | 4 +---
c_glib/test/parquet/test-arrow-file-writer.rb | 4 ++--
cpp/src/arrow/dataset/file_parquet_test.cc | 3 +--
cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 10 +++++-----
cpp/src/parquet/arrow/writer.cc | 4 ++--
cpp/src/parquet/arrow/writer.h | 9 +++++++--
python/pyarrow/_parquet.pxd | 2 +-
8 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 2b8e2bdeac..738fb4fd82 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -574,7 +574,6 @@
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
/**
* gparquet_arrow_file_writer_new_row_group:
* @writer: A #GParquetArrowFileWriter.
- * @chunk_size: The max number of rows in a row group.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Start a new row group.
@@ -584,13 +583,11 @@
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
* Since: 18.0.0
*/
gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
- gsize chunk_size,
- GError **error)
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
GError **error)
{
auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
return garrow::check(error,
- parquet_arrow_file_writer->NewRowGroup(chunk_size),
+ parquet_arrow_file_writer->NewRowGroup(),
"[parquet][arrow][file-writer][new-row-group]");
}
diff --git a/c_glib/parquet-glib/arrow-file-writer.h
b/c_glib/parquet-glib/arrow-file-writer.h
index 2c82f7c1f8..4986430c95 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -135,9 +135,7 @@
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
GPARQUET_AVAILABLE_IN_18_0
gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
- gsize chunk_size,
- GError **error);
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
GError **error);
GPARQUET_AVAILABLE_IN_18_0
gboolean
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb
b/c_glib/test/parquet/test-arrow-file-writer.rb
index d8344bf1c5..418de4782d 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -89,10 +89,10 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
def test_write_chunked_array
schema = build_schema("enabled" => :boolean)
writer = Parquet::ArrowFileWriter.new(schema, @file.path)
- writer.new_row_group(2)
+ writer.new_row_group
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
writer.write_chunked_array(chunked_array)
- writer.new_row_group(1)
+ writer.new_row_group
chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
writer.write_chunked_array(chunked_array)
writer.close
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc
b/cpp/src/arrow/dataset/file_parquet_test.cc
index 2c05dcd9be..536fcdb21c 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -85,7 +85,6 @@ class ParquetFormatHelper {
static Status WriteRecordBatch(const RecordBatch& batch,
parquet::arrow::FileWriter* writer) {
auto schema = batch.schema();
- auto size = batch.num_rows();
if (!schema->Equals(*writer->schema(), false)) {
return Status::Invalid("RecordBatch schema does not match this writer's.
batch:'",
@@ -93,7 +92,7 @@ class ParquetFormatHelper {
"'");
}
- RETURN_NOT_OK(writer->NewRowGroup(size));
+ RETURN_NOT_OK(writer->NewRowGroup());
for (int i = 0; i < batch.num_columns(); i++) {
RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
}
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 856c032c35..cedcebbfb6 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(),
&writer));
- ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+ ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
// writer->Close() should be idempotent
@@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO,
SingleColumnRequiredChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(),
&writer));
for (int i = 0; i < 4; i++) {
- ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+ ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size,
chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
@@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO,
SingleColumnOptionalChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(),
&writer));
for (int i = 0; i < 4; i++) {
- ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+ ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size,
chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
@@ -5149,7 +5149,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public
TestParquetIO<TestType> {
::arrow::default_memory_pool(),
ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
arrow_schema, default_arrow_writer_properties(), &writer));
- ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+ ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
}
@@ -5481,7 +5481,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
// Operations on closed writer are invalid
ASSERT_OK(writer->Close());
- ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
+ ASSERT_RAISES(Invalid, writer->NewRowGroup());
ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 463713df1b..c6d86648c1 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
default_arrow_reader_properties(),
&schema_manifest_);
}
- Status NewRowGroup(int64_t chunk_size) override {
+ Status NewRowGroup() override {
RETURN_NOT_OK(CheckClosed());
if (row_group_writer_ != nullptr) {
PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
@@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
}
auto WriteRowGroup = [&](int64_t offset, int64_t size) {
- RETURN_NOT_OK(NewRowGroup(size));
+ RETURN_NOT_OK(NewRowGroup());
for (int i = 0; i < table.num_columns(); i++) {
RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
}
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 4e1ddafd9a..e36b8f252c 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
/// \brief Start a new row group.
///
/// Returns an error if not all columns have been written.
+ virtual ::arrow::Status NewRowGroup() = 0;
+
+ /// \brief Start a new row group.
///
- /// \param chunk_size the number of rows in the next row group.
- virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
+ /// \deprecated Deprecated in 19.0.0.
+ ARROW_DEPRECATED(
+ "Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size`
argument.")
+ virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return
NewRowGroup(); }
/// \brief Write ColumnChunk in row group using an array.
virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index d6aebd8284..71e93ce0a4 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -556,7 +556,7 @@ cdef extern from "parquet/arrow/writer.h" namespace
"parquet::arrow" nogil:
const
shared_ptr[ArrowWriterProperties]& arrow_properties)
CStatus WriteTable(const CTable& table, int64_t chunk_size)
- CStatus NewRowGroup(int64_t chunk_size)
+ CStatus NewRowGroup()
CStatus Close()
CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]&
key_value_metadata)