This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 17b5b2d feat: add Metadata method to Reader and its implementations
(#235)
17b5b2d is described below
commit 17b5b2d30e67e314af31bbec5cc70f9798199e1c
Author: chao liu <[email protected]>
AuthorDate: Tue Sep 23 17:54:37 2025 +0800
feat: add Metadata method to Reader and its implementations (#235)
- Introduced a new virtual method `Metadata()` in the `Reader` class to
retrieve file metadata.
- Implemented `Metadata()` in `AvroReader` to return key-value pairs
from the Avro file's metadata.
- Implemented `Metadata()` in `ParquetReader` to extract and return
key-value pairs from the Parquet file's metadata.
---------
Co-authored-by: nullccxsy <[email protected]>
---
src/iceberg/avro/avro_reader.cc | 22 ++++++++++++++++++++++
src/iceberg/avro/avro_reader.h | 2 ++
src/iceberg/file_reader.h | 3 +++
src/iceberg/parquet/parquet_reader.cc | 26 ++++++++++++++++++++++++++
src/iceberg/parquet/parquet_reader.h | 2 ++
5 files changed, 55 insertions(+)
diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc
index 048cd49..6452612 100644
--- a/src/iceberg/avro/avro_reader.cc
+++ b/src/iceberg/avro/avro_reader.cc
@@ -173,6 +173,24 @@ class AvroReader::Impl {
return arrow_schema;
}
+ Result<std::unordered_map<std::string, std::string>> Metadata() {
+ if (reader_ == nullptr) {
+ return Invalid("Reader is not opened");
+ }
+
+ const auto& metadata = reader_->metadata();
+
+ std::unordered_map<std::string, std::string> metadata_map;
+ metadata_map.reserve(metadata.size());
+
+ for (const auto& pair : metadata) {
+ metadata_map.insert_or_assign(pair.first,
+ std::string(pair.second.begin(),
pair.second.end()));
+ }
+
+ return metadata_map;
+ }
+
private:
Status InitReadContext() {
context_ = std::make_unique<ReadContext>();
@@ -241,6 +259,10 @@ Result<std::optional<ArrowArray>> AvroReader::Next() {
return impl_->Next(); }
Result<ArrowSchema> AvroReader::Schema() { return impl_->Schema(); }
+Result<std::unordered_map<std::string, std::string>> AvroReader::Metadata() {
+ return impl_->Metadata();
+}
+
Status AvroReader::Open(const ReaderOptions& options) {
impl_ = std::make_unique<Impl>();
return impl_->Open(options);
diff --git a/src/iceberg/avro/avro_reader.h b/src/iceberg/avro/avro_reader.h
index 07737bb..24f95f5 100644
--- a/src/iceberg/avro/avro_reader.h
+++ b/src/iceberg/avro/avro_reader.h
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT AvroReader : public Reader {
Result<ArrowSchema> Schema() final;
+ Result<std::unordered_map<std::string, std::string>> Metadata() final;
+
private:
class Impl;
std::unique_ptr<Impl> impl_;
diff --git a/src/iceberg/file_reader.h b/src/iceberg/file_reader.h
index 8a59e33..d25a5e4 100644
--- a/src/iceberg/file_reader.h
+++ b/src/iceberg/file_reader.h
@@ -54,6 +54,9 @@ class ICEBERG_EXPORT Reader {
/// \brief Get the schema of the data.
virtual Result<ArrowSchema> Schema() = 0;
+
+ /// \brief Get the metadata of the file.
+ virtual Result<std::unordered_map<std::string, std::string>> Metadata() = 0;
};
/// \brief A split of the file to read.
diff --git a/src/iceberg/parquet/parquet_reader.cc
b/src/iceberg/parquet/parquet_reader.cc
index 4c86802..e57b98e 100644
--- a/src/iceberg/parquet/parquet_reader.cc
+++ b/src/iceberg/parquet/parquet_reader.cc
@@ -26,6 +26,7 @@
#include <arrow/record_batch.h>
#include <arrow/result.h>
#include <arrow/type.h>
+#include <arrow/util/key_value_metadata.h>
#include <parquet/arrow/reader.h>
#include <parquet/arrow/schema.h>
#include <parquet/file_reader.h>
@@ -185,6 +186,27 @@ class ParquetReader::Impl {
return arrow_schema;
}
+ Result<std::unordered_map<std::string, std::string>> Metadata() {
+ if (reader_ == nullptr) {
+ return Invalid("Reader is not opened");
+ }
+
+ auto metadata = reader_->parquet_reader()->metadata();
+ if (!metadata) {
+ return Invalid("Failed to get Parquet file metadata");
+ }
+
+ const auto& kv_metadata = metadata->key_value_metadata();
+ if (!kv_metadata) {
+ return std::unordered_map<std::string, std::string>{};
+ }
+
+ std::unordered_map<std::string, std::string> metadata_map;
+ kv_metadata->ToUnorderedMap(&metadata_map);
+
+ return metadata_map;
+ }
+
private:
Status InitReadContext() {
context_ = std::make_unique<ReadContext>();
@@ -251,6 +273,10 @@ Result<std::optional<ArrowArray>> ParquetReader::Next() {
return impl_->Next();
Result<ArrowSchema> ParquetReader::Schema() { return impl_->Schema(); }
+Result<std::unordered_map<std::string, std::string>> ParquetReader::Metadata()
{
+ return impl_->Metadata();
+}
+
Status ParquetReader::Open(const ReaderOptions& options) {
impl_ = std::make_unique<Impl>();
return impl_->Open(options);
diff --git a/src/iceberg/parquet/parquet_reader.h
b/src/iceberg/parquet/parquet_reader.h
index 23d34df..0604230 100644
--- a/src/iceberg/parquet/parquet_reader.h
+++ b/src/iceberg/parquet/parquet_reader.h
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT ParquetReader : public Reader {
Result<ArrowSchema> Schema() final;
+ Result<std::unordered_map<std::string, std::string>> Metadata() final;
+
private:
class Impl;
std::unique_ptr<Impl> impl_;