This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 58c8902 ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema() 58c8902 is described below commit 58c89028e5fb31507d973e80a155e903309a8ebe Author: Wes McKinney <wesm+...@apache.org> AuthorDate: Tue Jun 25 20:18:46 2019 -0500 ARROW-5702: [C++] parquet::arrow::FileReader::GetSchema() This adds this method to `parquet::arrow::FileReader` : ```cpp /// \brief Return arrow schema for all the columns. ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out); ``` might be useful for e.g. #4627 Author: Wes McKinney <wesm+...@apache.org> Author: Romain Francois <rom...@rstudio.com> Closes #4668 from romainfrancois/ARROW-5702/FileReader_GetSchema and squashes the following commits: 66dac6fe5 <Wes McKinney> Fix compilation, use GetSchema in a unit test c0f0655f7 <Romain Francois> + parquet::arrow::FileReader::GetSchema() method --- cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 5 +++++ cpp/src/parquet/arrow/reader.cc | 14 +++++++++++--- cpp/src/parquet/arrow/reader.h | 3 +++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc index 5781ad5..2c5c5df 100644 --- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc @@ -2686,6 +2686,11 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) { std::shared_ptr<::arrow::Table> table; ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table)); + std::shared_ptr<::arrow::Schema> schema; + ASSERT_OK_NO_THROW(arrow_reader->GetSchema(&schema)); + ASSERT_EQ(1, schema->num_fields()); + ASSERT_TRUE(schema->field(0)->type()->Equals(*decimal_type)); + ASSERT_EQ(1, table->num_columns()); constexpr int32_t expected_length = 24; diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 484719e..3fe37b0 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -485,9 +485,13 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) { Status FileReader::Impl::GetSchema(const std::vector<int>& indices, std::shared_ptr<::arrow::Schema>* out) { - auto descr = reader_->metadata()->schema(); - auto parquet_key_value_metadata = reader_->metadata()->key_value_metadata(); - return FromParquetSchema(descr, indices, parquet_key_value_metadata, out); + return FromParquetSchema(reader_->metadata()->schema(), indices, + reader_->metadata()->key_value_metadata(), out); +} + +Status FileReader::Impl::GetSchema(std::shared_ptr<::arrow::Schema>* out) { + return FromParquetSchema(reader_->metadata()->schema(), + reader_->metadata()->key_value_metadata(), out); } Status FileReader::Impl::ReadColumnChunk(int column_index, int row_group_index, @@ -739,6 +743,10 @@ Status FileReader::GetColumn(int i, std::unique_ptr<ColumnReader>* out) { return impl_->GetColumn(i, iterator_factory, out); } +Status FileReader::GetSchema(std::shared_ptr<::arrow::Schema>* out) { + return impl_->GetSchema(out); +} + Status FileReader::GetSchema(const std::vector<int>& indices, std::shared_ptr<::arrow::Schema>* out) { return impl_->GetSchema(indices, out); diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h index 48c9237..97e93b9 100644 --- a/cpp/src/parquet/arrow/reader.h +++ b/cpp/src/parquet/arrow/reader.h @@ -168,6 +168,9 @@ class PARQUET_EXPORT FileReader { // Returns error status if the column of interest is not flat. ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out); + /// \brief Return arrow schema for all the columns. + ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out); + /// \brief Return arrow schema by apply selection of column indices. /// \returns error status if passed wrong indices. ::arrow::Status GetSchema(const std::vector<int>& indices,