This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 9805fae0 feat: implement struct like for partition values (#354)
9805fae0 is described below
commit 9805fae08eb18698bf7a8682739a5524e0f5974c
Author: Gang Wu <[email protected]>
AuthorDate: Fri Nov 28 22:43:03 2025 +0800
feat: implement struct like for partition values (#354)
- Added PartitionValues to extend StructLike
- Implemented PartitionMap
- Implemented PartitionSet
---
src/iceberg/CMakeLists.txt | 1 +
src/iceberg/manifest_adapter.cc | 6 +-
src/iceberg/manifest_adapter.h | 2 +-
src/iceberg/manifest_entry.h | 4 +-
src/iceberg/manifest_reader_internal.cc | 15 +-
src/iceberg/meson.build | 1 +
src/iceberg/partition_summary.cc | 17 +-
src/iceberg/partition_summary_internal.h | 2 +-
src/iceberg/row/meson.build | 7 +-
src/iceberg/row/partition_values.cc | 91 +++
src/iceberg/row/partition_values.h | 75 +++
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/manifest_reader_writer_test.cc | 2 +-
src/iceberg/test/manifest_writer_versions_test.cc | 6 +-
src/iceberg/test/meson.build | 1 +
src/iceberg/test/partition_value_test.cc | 670 ++++++++++++++++++++++
src/iceberg/type_fwd.h | 1 +
src/iceberg/util/meson.build | 1 +
src/iceberg/util/partition_value_util.h | 265 +++++++++
19 files changed, 1140 insertions(+), 28 deletions(-)
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 9c3e9c2a..fb0c6ad7 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -46,6 +46,7 @@ set(ICEBERG_SOURCES
partition_summary.cc
row/arrow_array_wrapper.cc
row/manifest_wrapper.cc
+ row/partition_values.cc
row/struct_like.cc
schema.cc
schema_field.cc
diff --git a/src/iceberg/manifest_adapter.cc b/src/iceberg/manifest_adapter.cc
index fa2efd43..94bedced 100644
--- a/src/iceberg/manifest_adapter.cc
+++ b/src/iceberg/manifest_adapter.cc
@@ -165,17 +165,17 @@ ManifestEntryAdapter::~ManifestEntryAdapter() {
Status ManifestEntryAdapter::AppendPartitionValues(
ArrowArray* array, const std::shared_ptr<StructType>& partition_type,
- const std::vector<Literal>& partition_values) {
+ const PartitionValues& partition_values) {
if (array->n_children != partition_type->fields().size()) [[unlikely]] {
return InvalidArrowData("Arrow array of partition does not match partition
type.");
}
- if (partition_values.size() != partition_type->fields().size()) [[unlikely]]
{
+ if (partition_values.num_fields() != partition_type->fields().size())
[[unlikely]] {
return InvalidArrowData("Literal list of partition does not match
partition type.");
}
auto fields = partition_type->fields();
for (size_t i = 0; i < fields.size(); i++) {
- const auto& partition_value = partition_values[i];
+ const auto& partition_value = partition_values.ValueAt(i)->get();
const auto& partition_field = fields[i];
auto child_array = array->children[i];
if (partition_value.IsNull()) {
diff --git a/src/iceberg/manifest_adapter.h b/src/iceberg/manifest_adapter.h
index 979d81a3..be76c284 100644
--- a/src/iceberg/manifest_adapter.h
+++ b/src/iceberg/manifest_adapter.h
@@ -85,7 +85,7 @@ class ICEBERG_EXPORT ManifestEntryAdapter : public
ManifestAdapter {
const DataFile& file);
static Status AppendPartitionValues(ArrowArray* array,
const std::shared_ptr<StructType>&
partition_type,
- const std::vector<Literal>&
partition_values);
+ const PartitionValues& partition_values);
virtual Result<std::optional<int64_t>> GetSequenceNumber(
const ManifestEntry& entry) const;
diff --git a/src/iceberg/manifest_entry.h b/src/iceberg/manifest_entry.h
index 7225f4a7..9702a487 100644
--- a/src/iceberg/manifest_entry.h
+++ b/src/iceberg/manifest_entry.h
@@ -26,11 +26,11 @@
#include <string>
#include <vector>
-#include "iceberg/expression/literal.h"
#include "iceberg/file_format.h"
#include "iceberg/iceberg_export.h"
#include "iceberg/partition_spec.h"
#include "iceberg/result.h"
+#include "iceberg/row/partition_values.h"
#include "iceberg/schema_field.h"
#include "iceberg/type.h"
@@ -79,7 +79,7 @@ struct ICEBERG_EXPORT DataFile {
/// Field id: 102
/// Partition data tuple, schema based on the partition spec output using
partition
/// field ids
- std::vector<Literal> partition;
+ PartitionValues partition;
/// Field id: 103
/// Number of records in this file, or the cardinality of a deletion vector
int64_t record_count = 0;
diff --git a/src/iceberg/manifest_reader_internal.cc
b/src/iceberg/manifest_reader_internal.cc
index b6007d1c..9898e6c9 100644
--- a/src/iceberg/manifest_reader_internal.cc
+++ b/src/iceberg/manifest_reader_internal.cc
@@ -297,27 +297,26 @@ Status ParseLiteral(ArrowArrayView* view_of_partition,
int64_t row_idx,
std::vector<ManifestEntry>& manifest_entries) {
if (view_of_partition->storage_type == ArrowType::NANOARROW_TYPE_BOOL) {
auto value = ArrowArrayViewGetUIntUnsafe(view_of_partition, row_idx);
- manifest_entries[row_idx].data_file->partition.emplace_back(
- Literal::Boolean(value != 0));
+
manifest_entries[row_idx].data_file->partition.AddValue(Literal::Boolean(value
!= 0));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_INT32) {
auto value = ArrowArrayViewGetIntUnsafe(view_of_partition, row_idx);
-
manifest_entries[row_idx].data_file->partition.emplace_back(Literal::Int(value));
+
manifest_entries[row_idx].data_file->partition.AddValue(Literal::Int(value));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_INT64) {
auto value = ArrowArrayViewGetIntUnsafe(view_of_partition, row_idx);
-
manifest_entries[row_idx].data_file->partition.emplace_back(Literal::Long(value));
+
manifest_entries[row_idx].data_file->partition.AddValue(Literal::Long(value));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_FLOAT) {
auto value = ArrowArrayViewGetDoubleUnsafe(view_of_partition, row_idx);
-
manifest_entries[row_idx].data_file->partition.emplace_back(Literal::Float(value));
+
manifest_entries[row_idx].data_file->partition.AddValue(Literal::Float(value));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_DOUBLE) {
auto value = ArrowArrayViewGetDoubleUnsafe(view_of_partition, row_idx);
-
manifest_entries[row_idx].data_file->partition.emplace_back(Literal::Double(value));
+
manifest_entries[row_idx].data_file->partition.AddValue(Literal::Double(value));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_STRING) {
auto value = ArrowArrayViewGetStringUnsafe(view_of_partition, row_idx);
- manifest_entries[row_idx].data_file->partition.emplace_back(
+ manifest_entries[row_idx].data_file->partition.AddValue(
Literal::String(std::string(value.data, value.size_bytes)));
} else if (view_of_partition->storage_type ==
ArrowType::NANOARROW_TYPE_BINARY) {
auto buffer = ArrowArrayViewGetBytesUnsafe(view_of_partition, row_idx);
- manifest_entries[row_idx].data_file->partition.emplace_back(
+ manifest_entries[row_idx].data_file->partition.AddValue(
Literal::Binary(std::vector<uint8_t>(buffer.data.as_char,
buffer.data.as_char +
buffer.size_bytes)));
} else {
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 15905107..bdb5dbf3 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -68,6 +68,7 @@ iceberg_sources = files(
'partition_summary.cc',
'row/arrow_array_wrapper.cc',
'row/manifest_wrapper.cc',
+ 'row/partition_values.cc',
'row/struct_like.cc',
'schema.cc',
'schema_field.cc',
diff --git a/src/iceberg/partition_summary.cc b/src/iceberg/partition_summary.cc
index cf279fd1..d0ed4a95 100644
--- a/src/iceberg/partition_summary.cc
+++ b/src/iceberg/partition_summary.cc
@@ -23,6 +23,7 @@
#include "iceberg/manifest_list.h"
#include "iceberg/partition_summary_internal.h"
#include "iceberg/result.h"
+#include "iceberg/row/partition_values.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h"
@@ -74,18 +75,18 @@ PartitionSummary::PartitionSummary(const StructType&
partition_type) {
}
}
-Status PartitionSummary::Update(const std::vector<Literal>& partition_values) {
- if (partition_values.size() != field_stats_.size()) [[unlikely]] {
+Status PartitionSummary::Update(const PartitionValues& partition_values) {
+ if (partition_values.num_fields() != field_stats_.size()) [[unlikely]] {
return InvalidArgument("partition values size {} does not match field
stats size {}",
- partition_values.size(), field_stats_.size());
+ partition_values.num_fields(), field_stats_.size());
}
- for (size_t i = 0; i < partition_values.size(); i++) {
+ for (size_t i = 0; i < partition_values.num_fields(); i++) {
+ ICEBERG_ASSIGN_OR_RAISE(auto val, partition_values.ValueAt(i));
ICEBERG_ASSIGN_OR_RAISE(
- auto literal,
- partition_values[i].CastTo(
-
internal::checked_pointer_cast<PrimitiveType>(field_stats_[i].type())));
- ICEBERG_RETURN_UNEXPECTED(field_stats_[i].Update(literal));
+ auto lit,
val.get().CastTo(internal::checked_pointer_cast<PrimitiveType>(
+ field_stats_[i].type())));
+ ICEBERG_RETURN_UNEXPECTED(field_stats_[i].Update(lit));
}
return {};
}
diff --git a/src/iceberg/partition_summary_internal.h
b/src/iceberg/partition_summary_internal.h
index 167d1f01..de86954b 100644
--- a/src/iceberg/partition_summary_internal.h
+++ b/src/iceberg/partition_summary_internal.h
@@ -58,7 +58,7 @@ class PartitionSummary {
explicit PartitionSummary(const StructType& partition_type);
/// \brief Update the partition summary with partition values.
- Status Update(const std::vector<Literal>& partition_values);
+ Status Update(const PartitionValues& partition_values);
/// \brief Get the list of partition field summaries.
Result<std::vector<PartitionFieldSummary>> Summaries() const;
diff --git a/src/iceberg/row/meson.build b/src/iceberg/row/meson.build
index c7248d14..bcfb6dcd 100644
--- a/src/iceberg/row/meson.build
+++ b/src/iceberg/row/meson.build
@@ -16,6 +16,11 @@
# under the License.
install_headers(
- ['arrow_array_wrapper.h', 'manifest_wrapper.h', 'struct_like.h'],
+ [
+ 'arrow_array_wrapper.h',
+ 'manifest_wrapper.h',
+ 'partition_values.h',
+ 'struct_like.h',
+ ],
subdir: 'iceberg/row',
)
diff --git a/src/iceberg/row/partition_values.cc
b/src/iceberg/row/partition_values.cc
new file mode 100644
index 00000000..712c801a
--- /dev/null
+++ b/src/iceberg/row/partition_values.cc
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/row/partition_values.h"
+
+namespace iceberg {
+
+PartitionValues& PartitionValues::operator=(const PartitionValues& other) {
+ if (this != &other) {
+ values_ = other.values_;
+ }
+ return *this;
+}
+
+bool PartitionValues::operator==(const PartitionValues& other) const {
+ return values_ == other.values_;
+}
+
+Result<Scalar> PartitionValues::GetField(size_t pos) const {
+ if (pos >= values_.size()) {
+ return InvalidArgument(
+ "Position {} is out of bounds for PartitionValues with {} fields", pos,
+ values_.size());
+ }
+
+ const auto& literal = values_[pos];
+
+ // Handle null values
+ if (literal.IsNull()) {
+ return Scalar{std::monostate{}};
+ }
+
+ // Convert Literal to Scalar based on type
+ switch (literal.type()->type_id()) {
+ case TypeId::kBoolean:
+ return Scalar{std::get<bool>(literal.value())};
+ case TypeId::kInt:
+ case TypeId::kDate:
+ return Scalar{std::get<int32_t>(literal.value())};
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz:
+ return Scalar{std::get<int64_t>(literal.value())};
+ case TypeId::kFloat:
+ return Scalar{std::get<float>(literal.value())};
+ case TypeId::kDouble:
+ return Scalar{std::get<double>(literal.value())};
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(literal.value());
+ return Scalar{std::string_view(str)};
+ }
+ case TypeId::kBinary:
+ case TypeId::kFixed: {
+ const auto& bytes = std::get<std::vector<uint8_t>>(literal.value());
+ return Scalar{
+ std::string_view(reinterpret_cast<const char*>(bytes.data()),
bytes.size())};
+ }
+ case TypeId::kDecimal:
+ return Scalar{std::get<Decimal>(literal.value())};
+ default:
+ return NotSupported("Cannot convert literal of type {} to Scalar",
+ literal.type()->ToString());
+ }
+}
+
+Result<std::reference_wrapper<const Literal>> PartitionValues::ValueAt(size_t
pos) const {
+ if (pos >= values_.size()) {
+ return InvalidArgument("Cannot get partition value at {} from {} fields",
pos,
+ values_.size());
+ }
+ return std::cref(values_[pos]);
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/row/partition_values.h
b/src/iceberg/row/partition_values.h
new file mode 100644
index 00000000..9a0653ff
--- /dev/null
+++ b/src/iceberg/row/partition_values.h
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/row/partition_values.h
+/// Wrapper classes for partition value related data structures.
+
+#include <functional>
+#include <span>
+#include <utility>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/iceberg_export.h"
+#include "iceberg/row/struct_like.h"
+
+namespace iceberg {
+
+/// \brief StructLike wrapper for a vector of literals that represent
partition values.
+class ICEBERG_EXPORT PartitionValues : public StructLike {
+ public:
+ PartitionValues() = default;
+ explicit PartitionValues(std::vector<Literal> values) :
values_(std::move(values)) {}
+ explicit PartitionValues(Literal value) : values_({std::move(value)}) {}
+
+ PartitionValues(const PartitionValues& other) : values_(other.values_) {}
+ PartitionValues& operator=(const PartitionValues& other);
+
+ PartitionValues(PartitionValues&&) noexcept = default;
+ PartitionValues& operator=(PartitionValues&&) noexcept = default;
+
+ ~PartitionValues() override = default;
+
+ Result<Scalar> GetField(size_t pos) const override;
+
+ size_t num_fields() const override { return values_.size(); }
+
+ /// \brief Get the partition field value at the given position.
+ /// \param pos The position of the field in the struct.
+ /// \return A reference to the partition field value.
+ Result<std::reference_wrapper<const Literal>> ValueAt(size_t pos) const;
+
+ /// \brief Add a value to the partition values.
+ /// \param value The value to add.
+ void AddValue(Literal value) { values_.emplace_back(std::move(value)); }
+
+ /// \brief Reset the partition values.
+ /// \param values The values to reset to.
+ void Reset(std::vector<Literal> values) { values_ = std::move(values); }
+
+ std::span<const Literal> values() const { return values_; }
+
+ bool operator==(const PartitionValues& other) const;
+
+ private:
+ std::vector<Literal> values_;
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 41b22507..21ccd4d6 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -72,6 +72,7 @@ add_iceberg_test(schema_test
transform_test.cc
partition_field_test.cc
partition_spec_test.cc
+ partition_value_test.cc
sort_field_test.cc
sort_order_test.cc
snapshot_test.cc
diff --git a/src/iceberg/test/manifest_reader_writer_test.cc
b/src/iceberg/test/manifest_reader_writer_test.cc
index c05b64ee..82057696 100644
--- a/src/iceberg/test/manifest_reader_writer_test.cc
+++ b/src/iceberg/test/manifest_reader_writer_test.cc
@@ -139,7 +139,7 @@ class ManifestV1Test : public ManifestReaderWriterTestBase {
entry.data_file = std::make_shared<DataFile>();
entry.data_file->file_path = test_dir_prefix + paths[i];
entry.data_file->file_format = FileFormatType::kParquet;
- entry.data_file->partition.emplace_back(Literal::Int(partitions[i]));
+ entry.data_file->partition.AddValue(Literal::Int(partitions[i]));
entry.data_file->record_count = 1;
entry.data_file->file_size_in_bytes = 1375;
entry.data_file->column_sizes = {{1, 49}, {2, 49}, {3, 49}, {4, 49}};
diff --git a/src/iceberg/test/manifest_writer_versions_test.cc
b/src/iceberg/test/manifest_writer_versions_test.cc
index b792c86d..6e3dfde1 100644
--- a/src/iceberg/test/manifest_writer_versions_test.cc
+++ b/src/iceberg/test/manifest_writer_versions_test.cc
@@ -27,7 +27,6 @@
#include "iceberg/arrow/arrow_file_io.h"
#include "iceberg/avro/avro_register.h"
-#include "iceberg/expression/literal.h"
#include "iceberg/file_format.h"
#include "iceberg/manifest_entry.h"
#include "iceberg/manifest_list.h"
@@ -35,6 +34,7 @@
#include "iceberg/manifest_writer.h"
#include "iceberg/metrics.h"
#include "iceberg/partition_spec.h"
+#include "iceberg/row/partition_values.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/table_metadata.h"
@@ -54,8 +54,8 @@ constexpr FileFormatType kFormat = FileFormatType::kAvro;
constexpr int32_t kSortOrderId = 2;
constexpr int64_t kFirstRowId = 100L;
-const std::vector<Literal> kPartition = {Literal::String("cheesy"),
Literal::Int(10),
- Literal::Int(3)};
+const PartitionValues kPartition =
+ PartitionValues({Literal::String("cheesy"), Literal::Int(10),
Literal::Int(3)});
const std::vector<int32_t> kEqualityIds = {1};
const auto kMetrics = Metrics{
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 00cd649e..c3a401b5 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -33,6 +33,7 @@ iceberg_tests = {
'name_mapping_test.cc',
'partition_field_test.cc',
'partition_spec_test.cc',
+ 'partition_value_test.cc',
'schema_field_test.cc',
'schema_test.cc',
'schema_util_test.cc',
diff --git a/src/iceberg/test/partition_value_test.cc
b/src/iceberg/test/partition_value_test.cc
new file mode 100644
index 00000000..c72a8b6b
--- /dev/null
+++ b/src/iceberg/test/partition_value_test.cc
@@ -0,0 +1,670 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/row/partition_values.h"
+#include "iceberg/util/partition_value_util.h"
+
+namespace iceberg {
+
+// PartitionValues Tests
+
+TEST(PartitionValuesTest, DefaultConstruction) {
+ PartitionValues partition;
+ EXPECT_EQ(partition.num_fields(), 0);
+}
+
+TEST(PartitionValuesTest, ConstructionFromVector) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+ EXPECT_EQ(partition.num_fields(), 2);
+}
+
+TEST(PartitionValuesTest, ConstructionFromSingleLiteral) {
+ PartitionValues partition(Literal::Int(42));
+ EXPECT_EQ(partition.num_fields(), 1);
+}
+
+TEST(PartitionValuesTest, CopyConstructor) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition1(std::move(values));
+
+ PartitionValues partition2(partition1);
+ EXPECT_EQ(partition2.num_fields(), 2);
+ EXPECT_EQ(partition1, partition2);
+}
+
+TEST(PartitionValuesTest, MoveConstructor) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition1(std::move(values));
+
+ PartitionValues partition2(std::move(partition1));
+ EXPECT_EQ(partition2.num_fields(), 2);
+}
+
+TEST(PartitionValuesTest, CopyAssignment) {
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ partition1 = partition2;
+ EXPECT_EQ(partition1.num_fields(), 2);
+ EXPECT_EQ(partition1, partition2);
+}
+
+TEST(PartitionValuesTest, MoveAssignment) {
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ partition1 = std::move(partition2);
+ EXPECT_EQ(partition1.num_fields(), 2);
+}
+
+TEST(PartitionValuesTest, GetFieldInt) {
+ std::vector<Literal> values = {Literal::Int(42)};
+ PartitionValues partition(std::move(values));
+
+ auto result = partition.GetField(0);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_TRUE(std::holds_alternative<int32_t>(*result));
+ EXPECT_EQ(std::get<int32_t>(*result), 42);
+}
+
+TEST(PartitionValuesTest, GetFieldString) {
+ std::vector<Literal> values = {Literal::String("hello")};
+ PartitionValues partition(std::move(values));
+
+ auto result = partition.GetField(0);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_TRUE(std::holds_alternative<std::string_view>(*result));
+ EXPECT_EQ(std::get<std::string_view>(*result), "hello");
+}
+
+TEST(PartitionValuesTest, GetFieldMultipleTypes) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::Long(2L),
+ Literal::String("test"),
Literal::Boolean(true),
+ Literal::Float(3.14f),
Literal::Double(2.82e-04)};
+ PartitionValues partition(std::move(values));
+
+ auto result0 = partition.GetField(0);
+ ASSERT_TRUE(result0.has_value());
+ EXPECT_EQ(std::get<int32_t>(*result0), 1);
+
+ auto result1 = partition.GetField(1);
+ ASSERT_TRUE(result1.has_value());
+ EXPECT_EQ(std::get<int64_t>(*result1), 2L);
+
+ auto result2 = partition.GetField(2);
+ ASSERT_TRUE(result2.has_value());
+ EXPECT_EQ(std::get<std::string_view>(*result2), "test");
+
+ auto result3 = partition.GetField(3);
+ ASSERT_TRUE(result3.has_value());
+ EXPECT_EQ(std::get<bool>(*result3), true);
+
+ auto result4 = partition.GetField(4);
+ ASSERT_TRUE(result4.has_value());
+ EXPECT_FLOAT_EQ(std::get<float>(*result4), 3.14f);
+
+ auto result5 = partition.GetField(5);
+ ASSERT_TRUE(result5.has_value());
+ EXPECT_DOUBLE_EQ(std::get<double>(*result5), 2.82e-04);
+}
+
+TEST(PartitionValuesTest, GetFieldOutOfBounds) {
+ std::vector<Literal> values = {Literal::Int(1)};
+ PartitionValues partition(std::move(values));
+
+ auto result = partition.GetField(1);
+ EXPECT_FALSE(result.has_value());
+}
+
+TEST(PartitionValuesTest, ValueAt) {
+ std::vector<Literal> values = {Literal::Int(42)};
+ PartitionValues partition(std::move(values));
+
+ auto result = partition.ValueAt(0);
+ ASSERT_TRUE(result.has_value());
+ const Literal& literal_ref = result->get();
+ EXPECT_EQ(literal_ref, Literal::Int(42));
+}
+
+TEST(PartitionValuesTest, ValueOutOfBounds) {
+ std::vector<Literal> values = {Literal::Int(1)};
+ PartitionValues partition(std::move(values));
+
+ auto result = partition.ValueAt(1);
+ EXPECT_FALSE(result.has_value());
+}
+
+TEST(PartitionValuesTest, AddValue) {
+ PartitionValues partition;
+ EXPECT_EQ(partition.num_fields(), 0);
+
+ partition.AddValue(Literal::Int(1));
+ EXPECT_EQ(partition.num_fields(), 1);
+
+ partition.AddValue(Literal::String("test"));
+ EXPECT_EQ(partition.num_fields(), 2);
+}
+
+TEST(PartitionValuesTest, Reset) {
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ PartitionValues partition(std::move(values1));
+ EXPECT_EQ(partition.num_fields(), 1);
+
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+ partition.Reset(std::move(values2));
+ EXPECT_EQ(partition.num_fields(), 2);
+}
+
+TEST(PartitionValuesTest, EqualityEqual) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ EXPECT_EQ(partition1, partition2);
+}
+
+TEST(PartitionValuesTest, EqualityNotEqual) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ EXPECT_NE(partition1, partition2);
+}
+
+TEST(PartitionValuesTest, EqualityDifferentSize) {
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ EXPECT_NE(partition1, partition2);
+}
+
+// Hash and Equality Functors Tests
+
+TEST(PartitionValuesHashTest, ConsistentHash) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ PartitionValuesHash hasher;
+ size_t hash1 = hasher(partition);
+ size_t hash2 = hasher(partition);
+
+ EXPECT_EQ(hash1, hash2);
+}
+
+TEST(PartitionValuesHashTest, DifferentPartitionsDifferentHashes) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionValuesHash hasher;
+ size_t hash1 = hasher(partition1);
+ size_t hash2 = hasher(partition2);
+
+ EXPECT_NE(hash1, hash2);
+}
+
+TEST(PartitionValuesHashTest, EqualPartitionsSameHash) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionValuesHash hasher;
+ size_t hash1 = hasher(partition1);
+ size_t hash2 = hasher(partition2);
+
+ EXPECT_EQ(hash1, hash2);
+}
+
+TEST(PartitionValuesEqualTest, EqualPartitions) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionValuesEqual equal;
+ EXPECT_TRUE(equal(partition1, partition2));
+}
+
+TEST(PartitionValuesEqualTest, NotEqualPartitions) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionValuesEqual equal;
+ EXPECT_FALSE(equal(partition1, partition2));
+}
+
+TEST(PartitionKeyHashTest, ConsistentHash) {
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+ PartitionKey key{1, std::move(partition)};
+
+ PartitionKeyHash hasher;
+ size_t hash1 = hasher(key);
+ size_t hash2 = hasher(key);
+
+ EXPECT_EQ(hash1, hash2);
+}
+
+TEST(PartitionKeyHashTest, DifferentSpecIdsDifferentHashes) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionKey key1{1, std::move(partition1)};
+ PartitionKey key2{2, std::move(partition2)};
+
+ PartitionKeyHash hasher;
+ size_t hash1 = hasher(key1);
+ size_t hash2 = hasher(key2);
+
+ EXPECT_NE(hash1, hash2);
+}
+
+TEST(PartitionKeyEqualTest, EqualKeys) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionKey key1{1, std::move(partition1)};
+ PartitionKey key2{1, std::move(partition2)};
+
+ PartitionKeyEqual equal;
+ EXPECT_TRUE(equal(key1, key2));
+}
+
+TEST(PartitionKeyEqualTest, NotEqualKeys) {
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test")};
+
+ PartitionValues partition1(std::move(values1));
+ PartitionValues partition2(std::move(values2));
+
+ PartitionKey key1{1, std::move(partition1)};
+ PartitionKey key2{1, std::move(partition2)};
+
+ PartitionKeyEqual equal;
+ EXPECT_FALSE(equal(key1, key2));
+}
+
+// PartitionMap Tests
+
+TEST(PartitionMapTest, DefaultConstruction) {
+ PartitionMap<std::string> map;
+ EXPECT_EQ(map.size(), 0);
+ EXPECT_TRUE(map.empty());
+}
+
+TEST(PartitionMapTest, PutAndGet) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ bool updated = map.put(1, std::move(partition), "value1");
+ EXPECT_FALSE(updated);
+ EXPECT_EQ(map.size(), 1);
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ auto result = map.get(1, partition2);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result.value().get(), "value1");
+}
+
+TEST(PartitionMapTest, PutUpdate) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ map.put(1, std::move(partition), "value1");
+ EXPECT_EQ(map.size(), 1);
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ bool updated = map.put(1, std::move(partition2), "value2");
+ EXPECT_TRUE(updated);
+ EXPECT_EQ(map.size(), 1);
+
+ std::vector<Literal> values3 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition3(std::move(values3));
+
+ auto result = map.get(1, partition3);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result.value().get(), "value2");
+}
+
+TEST(PartitionMapTest, GetNonExistent) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ auto result = map.get(1, partition);
+ EXPECT_FALSE(result.has_value());
+}
+
+TEST(PartitionMapTest, Contains) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ map.put(1, std::move(partition), "value1");
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ EXPECT_TRUE(map.contains(1, partition2));
+
+ std::vector<Literal> values3 = {Literal::Int(2), Literal::String("test")};
+ PartitionValues partition3(std::move(values3));
+
+ EXPECT_FALSE(map.contains(1, partition3));
+}
+
+TEST(PartitionMapTest, Remove) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ map.put(1, std::move(partition), "value1");
+ EXPECT_EQ(map.size(), 1);
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ bool removed = map.remove(1, partition2);
+ EXPECT_TRUE(removed);
+ EXPECT_EQ(map.size(), 0);
+}
+
+TEST(PartitionMapTest, RemoveNonExistent) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ bool removed = map.remove(1, partition);
+ EXPECT_FALSE(removed);
+}
+
+TEST(PartitionMapTest, Clear) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test1")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test2")};
+
+ map.put(1, PartitionValues(std::move(values1)), "value1");
+ map.put(1, PartitionValues(std::move(values2)), "value2");
+
+ EXPECT_EQ(map.size(), 2);
+
+ map.clear();
+ EXPECT_EQ(map.size(), 0);
+ EXPECT_TRUE(map.empty());
+}
+
+TEST(PartitionMapTest, MultipleSpecIds) {
+ PartitionMap<std::string> map;
+
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ map.put(1, PartitionValues(std::move(values1)), "spec1_value");
+ map.put(2, PartitionValues(std::move(values2)), "spec2_value");
+
+ EXPECT_EQ(map.size(), 2);
+
+ std::vector<Literal> values3 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values4 = {Literal::Int(1), Literal::String("test")};
+
+ auto result1 = map.get(1, PartitionValues(std::move(values3)));
+ auto result2 = map.get(2, PartitionValues(std::move(values4)));
+
+ ASSERT_TRUE(result1.has_value());
+ ASSERT_TRUE(result2.has_value());
+ EXPECT_EQ(result1.value().get(), "spec1_value");
+ EXPECT_EQ(result2.value().get(), "spec2_value");
+}
+
+TEST(PartitionMapTest, Iteration) {
+ PartitionMap<int> map;
+
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2)};
+ std::vector<Literal> values3 = {Literal::Int(3)};
+
+ map.put(1, PartitionValues(std::move(values1)), 10);
+ map.put(1, PartitionValues(std::move(values2)), 20);
+ map.put(1, PartitionValues(std::move(values3)), 30);
+
+ int sum = 0;
+ for (const auto& [key, value] : map) {
+ sum += value;
+ }
+
+ EXPECT_EQ(sum, 60);
+}
+
+TEST(PartitionMapTest, ConstIteration) {
+ PartitionMap<int> map;
+
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2)};
+
+ map.put(1, PartitionValues(std::move(values1)), 10);
+ map.put(1, PartitionValues(std::move(values2)), 20);
+
+ const auto& const_map = map;
+ int sum = 0;
+ for (const auto& [key, value] : const_map) {
+ sum += value;
+ }
+
+ EXPECT_EQ(sum, 30);
+}
+
+// PartitionSet Tests
+
+TEST(PartitionSetTest, DefaultConstruction) {
+ PartitionSet set;
+ EXPECT_EQ(set.size(), 0);
+ EXPECT_TRUE(set.empty());
+}
+
+TEST(PartitionSetTest, AddElement) {
+ PartitionSet set;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ bool added = set.add(1, std::move(partition));
+ EXPECT_TRUE(added);
+ EXPECT_EQ(set.size(), 1);
+}
+
+TEST(PartitionSetTest, AddDuplicate) {
+ PartitionSet set;
+
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ set.add(1, PartitionValues(std::move(values1)));
+ bool added = set.add(1, PartitionValues(std::move(values2)));
+
+ EXPECT_FALSE(added);
+ EXPECT_EQ(set.size(), 1);
+}
+
+TEST(PartitionSetTest, Contains) {
+ PartitionSet set;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ set.add(1, std::move(partition));
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ EXPECT_TRUE(set.contains(1, partition2));
+
+ std::vector<Literal> values3 = {Literal::Int(2), Literal::String("test")};
+ PartitionValues partition3(std::move(values3));
+
+ EXPECT_FALSE(set.contains(1, partition3));
+}
+
+TEST(PartitionSetTest, Remove) {
+ PartitionSet set;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ set.add(1, std::move(partition));
+ EXPECT_EQ(set.size(), 1);
+
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition2(std::move(values2));
+
+ bool removed = set.remove(1, partition2);
+ EXPECT_TRUE(removed);
+ EXPECT_EQ(set.size(), 0);
+}
+
+TEST(PartitionSetTest, RemoveNonExistent) {
+ PartitionSet set;
+
+ std::vector<Literal> values = {Literal::Int(1), Literal::String("test")};
+ PartitionValues partition(std::move(values));
+
+ bool removed = set.remove(1, partition);
+ EXPECT_FALSE(removed);
+}
+
+TEST(PartitionSetTest, Clear) {
+ PartitionSet set;
+
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test1")};
+ std::vector<Literal> values2 = {Literal::Int(2), Literal::String("test2")};
+
+ set.add(1, PartitionValues(std::move(values1)));
+ set.add(1, PartitionValues(std::move(values2)));
+
+ EXPECT_EQ(set.size(), 2);
+
+ set.clear();
+ EXPECT_EQ(set.size(), 0);
+ EXPECT_TRUE(set.empty());
+}
+
+TEST(PartitionSetTest, MultipleSpecIds) {
+ PartitionSet set;
+
+ std::vector<Literal> values1 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values2 = {Literal::Int(1), Literal::String("test")};
+
+ set.add(1, PartitionValues(std::move(values1)));
+ set.add(2, PartitionValues(std::move(values2)));
+
+ EXPECT_EQ(set.size(), 2);
+
+ std::vector<Literal> values3 = {Literal::Int(1), Literal::String("test")};
+ std::vector<Literal> values4 = {Literal::Int(1), Literal::String("test")};
+
+ EXPECT_TRUE(set.contains(1, PartitionValues(std::move(values3))));
+ EXPECT_TRUE(set.contains(2, PartitionValues(std::move(values4))));
+}
+
+TEST(PartitionSetTest, Iteration) {
+ PartitionSet set;
+
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2)};
+ std::vector<Literal> values3 = {Literal::Int(3)};
+
+ set.add(1, PartitionValues(std::move(values1)));
+ set.add(1, PartitionValues(std::move(values2)));
+ set.add(1, PartitionValues(std::move(values3)));
+
+ int count = 0;
+ for (const auto& [spec_id, partition] : set) {
+ EXPECT_EQ(spec_id, 1);
+ count++;
+ }
+
+ EXPECT_EQ(count, 3);
+}
+
+TEST(PartitionSetTest, ConstIteration) {
+ PartitionSet set;
+
+ std::vector<Literal> values1 = {Literal::Int(1)};
+ std::vector<Literal> values2 = {Literal::Int(2)};
+
+ set.add(1, PartitionValues(std::move(values1)));
+ set.add(1, PartitionValues(std::move(values2)));
+
+ const auto& const_set = set;
+ int count = 0;
+ for (const auto& [spec_id, partition] : const_set) {
+ EXPECT_EQ(spec_id, 1);
+ count++;
+ }
+
+ EXPECT_EQ(count, 2);
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 81681ebc..68a4543f 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -71,6 +71,7 @@ class MapType;
class NestedType;
class PartitionField;
class PartitionSpec;
+class PartitionValues;
class PrimitiveType;
class Schema;
class SchemaField;
diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build
index 1f755be6..19c5ed1d 100644
--- a/src/iceberg/util/meson.build
+++ b/src/iceberg/util/meson.build
@@ -25,6 +25,7 @@ install_headers(
'formatter.h',
'int128.h',
'macros.h',
+ 'partition_value_util.h',
'string_util.h',
'timepoint.h',
'truncate_util.h',
diff --git a/src/iceberg/util/partition_value_util.h
b/src/iceberg/util/partition_value_util.h
new file mode 100644
index 00000000..0113623b
--- /dev/null
+++ b/src/iceberg/util/partition_value_util.h
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/row/partition_values.h
+/// Wrapper classes for partition value related data structures.
+
+#include <functional>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "iceberg/row/partition_values.h"
+
+namespace iceberg {
+
+constexpr size_t kHashPrime = 0x9e3779b9;
+
+using PartitionKey = std::pair<int32_t, PartitionValues>;
+
+/// \brief Lightweight lookup key for heterogeneous lookup without copying
+/// PartitionValues.
+struct PartitionKeyRef {
+ int32_t spec_id;
+ const PartitionValues& values;
+
+ PartitionKeyRef(int32_t id, const PartitionValues& vals) : spec_id(id),
values(vals) {}
+
+ explicit PartitionKeyRef(const PartitionKey& key)
+ : spec_id(key.first), values(key.second) {}
+};
+
+/// \brief Hash functor for PartitionValues.
+struct PartitionValuesHash {
+ std::size_t operator()(const PartitionValues& partition) const noexcept {
+ std::size_t hash = 0;
+ LiteralHash literal_hash;
+ for (const auto& literal : partition.values()) {
+ hash ^= literal_hash(literal) + kHashPrime + (hash << 6) + (hash >> 2);
+ }
+ return hash;
+ }
+};
+
+/// \brief Equality functor for PartitionValues.
+struct PartitionValuesEqual {
+ bool operator()(const PartitionValues& lhs, const PartitionValues& rhs)
const {
+ return lhs == rhs;
+ }
+};
+
+/// \brief Transparent hash functor for PartitionKey with heterogeneous lookup
support.
+struct PartitionKeyHash {
+ using is_transparent = void;
+
+ std::size_t operator()(const PartitionKey& key) const noexcept {
+ std::size_t hash = std::hash<int32_t>{}(key.first);
+ hash ^= PartitionValuesHash{}(key.second) + kHashPrime + (hash << 6) +
(hash >> 2);
+ return hash;
+ }
+
+ std::size_t operator()(const PartitionKeyRef& key) const noexcept {
+ std::size_t hash = std::hash<int32_t>{}(key.spec_id);
+ hash ^= PartitionValuesHash{}(key.values) + kHashPrime + (hash << 6) +
(hash >> 2);
+ return hash;
+ }
+};
+
+/// \brief Transparent equality functor for PartitionKey with heterogeneous
lookup
+/// support.
+struct PartitionKeyEqual {
+ using is_transparent = void;
+
+ // Equality for PartitionKey vs PartitionKey
+ bool operator()(const PartitionKey& lhs, const PartitionKey& rhs) const {
+ return lhs.first == rhs.first && lhs.second == rhs.second;
+ }
+
+ // Equality for PartitionKey vs PartitionKeyRef (heterogeneous lookup)
+ bool operator()(const PartitionKey& lhs, const PartitionKeyRef& rhs) const {
+ return lhs.first == rhs.spec_id && lhs.second == rhs.values;
+ }
+
+ // Equality for PartitionKeyRef vs PartitionKey (heterogeneous lookup)
+ bool operator()(const PartitionKeyRef& lhs, const PartitionKey& rhs) const {
+ return lhs.spec_id == rhs.first && lhs.values == rhs.second;
+ }
+
+ // Equality for PartitionKeyRef vs PartitionKeyRef (heterogeneous lookup)
+ bool operator()(const PartitionKeyRef& lhs, const PartitionKeyRef& rhs)
const {
+ return lhs.spec_id == rhs.spec_id && lhs.values == rhs.values;
+ }
+};
+
+/// \brief A map that uses a pair of spec ID and partition tuple as keys.
+///
+/// \tparam V the type of values
+template <typename V>
+class PartitionMap {
+ public:
+ using map_type =
+ std::unordered_map<PartitionKey, V, PartitionKeyHash, PartitionKeyEqual>;
+ using iterator = typename map_type::iterator;
+ using const_iterator = typename map_type::const_iterator;
+
+ PartitionMap() = default;
+
+ /// \brief Get the number of entries in the map.
+ size_t size() const { return map_.size(); }
+
+ /// \brief Check if the map is empty.
+ bool empty() const { return map_.empty(); }
+
+ /// \brief Clear all entries from the map.
+ void clear() { map_.clear(); }
+
+ /// \brief Check if the map contains a key.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return true if the key exists, false otherwise.
+ bool contains(int32_t spec_id, const PartitionValues& values) const {
+ return map_.contains(PartitionKeyRef{spec_id, values});
+ }
+
+ /// \brief Get the value associated with a key.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return Reference to the value if found, std::nullopt otherwise.
+ std::optional<std::reference_wrapper<V>> get(int32_t spec_id,
+ const PartitionValues& values) {
+ auto it = map_.find(PartitionKeyRef{spec_id, values});
+ return it != map_.end() ? std::make_optional(std::ref(it->second)) :
std::nullopt;
+ }
+
+ /// \brief Get the value associated with a key (const version).
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return Reference to the value if found, std::nullopt otherwise.
+ std::optional<std::reference_wrapper<const V>> get(
+ int32_t spec_id, const PartitionValues& values) const {
+ auto it = map_.find(PartitionKeyRef{spec_id, values});
+ return it != map_.end() ? std::make_optional(std::cref(it->second)) :
std::nullopt;
+ }
+
+ /// \brief Insert or update a value in the map.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \param value The value to insert.
+ /// \return true if the entry was updated, false if it was inserted.
+ bool put(int32_t spec_id, PartitionValues values, V value) {
+ auto it = map_.find(PartitionKeyRef{spec_id, values});
+ if (it != map_.end()) {
+ it->second = std::move(value);
+ return true;
+ }
+ map_.emplace(PartitionKey{spec_id, std::move(values)}, std::move(value));
+ return false;
+ }
+
+ /// \brief Remove an entry from the map.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return true if the entry was removed, false if it didn't exist.
+ bool remove(int32_t spec_id, const PartitionValues& values) {
+ auto it = map_.find(PartitionKeyRef{spec_id, values});
+ if (it != map_.end()) {
+ map_.erase(it);
+ return true;
+ }
+ return false;
+ }
+
+ /// \brief Get iterator to the beginning.
+ iterator begin() { return map_.begin(); }
+ const_iterator begin() const { return map_.begin(); }
+ const_iterator cbegin() const { return map_.cbegin(); }
+
+ /// \brief Get iterator to the end.
+ iterator end() { return map_.end(); }
+ const_iterator end() const { return map_.end(); }
+ const_iterator cend() const { return map_.cend(); }
+
+ private:
+ map_type map_;
+};
+
+/// \brief A set that uses a pair of spec ID and partition tuple as elements.
+class PartitionSet {
+ public:
+ using set_type = std::unordered_set<PartitionKey, PartitionKeyHash,
PartitionKeyEqual>;
+ using iterator = typename set_type::iterator;
+ using const_iterator = typename set_type::const_iterator;
+
+ PartitionSet() = default;
+
+ /// \brief Get the number of elements in the set.
+ size_t size() const { return set_.size(); }
+
+ /// \brief Check if the set is empty.
+ bool empty() const { return set_.empty(); }
+
+ /// \brief Clear all elements from the set.
+ void clear() { set_.clear(); }
+
+ /// \brief Check if the set contains an element.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return true if the element exists, false otherwise.
+ bool contains(int32_t spec_id, const PartitionValues& values) const {
+ return set_.contains(PartitionKeyRef{spec_id, values});
+ }
+
+ /// \brief Add an element to the set.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return true if the element was added, false if it already existed.
+ bool add(int32_t spec_id, PartitionValues values) {
+ auto [_, inserted] = set_.emplace(spec_id, std::move(values));
+ return inserted;
+ }
+
+ /// \brief Remove an element from the set.
+ /// \param spec_id The partition spec ID.
+ /// \param values The partition values.
+ /// \return true if the element was removed, false if it didn't exist.
+ bool remove(int32_t spec_id, const PartitionValues& values) {
+ auto it = set_.find(PartitionKeyRef{spec_id, values});
+ if (it != set_.end()) {
+ set_.erase(it);
+ return true;
+ }
+ return false;
+ }
+
+ /// \brief Get iterator to the beginning.
+ iterator begin() { return set_.begin(); }
+ const_iterator begin() const { return set_.begin(); }
+ const_iterator cbegin() const { return set_.cbegin(); }
+
+ /// \brief Get iterator to the end.
+ iterator end() { return set_.end(); }
+ const_iterator end() const { return set_.end(); }
+ const_iterator cend() const { return set_.cend(); }
+
+ private:
+ set_type set_;
+};
+
+} // namespace iceberg