wgtmac commented on code in PR #185:
URL: https://github.com/apache/iceberg-cpp/pull/185#discussion_r2343018109
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
+ // 4-byte int data can be deserialized as long
+ std::vector<uint8_t> int_data = {32, 0, 0, 0};
+ auto long_result = Literal::Deserialize(int_data, int64());
+ ASSERT_TRUE(long_result.has_value());
+ EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
+ EXPECT_EQ(long_result->ToString(), "32");
+
+ auto long_bytes = long_result->Serialize();
+ ASSERT_TRUE(long_bytes.has_value());
+ EXPECT_EQ(long_bytes->size(), 8);
+
+ // 4-byte float data can be deserialized as double
+ std::vector<uint8_t> float_data = {0, 0, 128, 63};
+ auto double_result = Literal::Deserialize(float_data, float64());
+ ASSERT_TRUE(double_result.has_value());
+ EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble);
+ EXPECT_EQ(double_result->ToString(), "1.000000");
+
+ auto double_bytes = double_result->Serialize();
+ ASSERT_TRUE(double_bytes.has_value());
+ EXPECT_EQ(double_bytes->size(), 8);
+}
+
+struct LiteralRoundTripParam {
+ std::string test_name;
+ std::vector<uint8_t> input_bytes;
+ Literal expected_literal;
+ std::shared_ptr<PrimitiveType> type;
+};
+
+class LiteralSerializationParamTest
+ : public ::testing::TestWithParam<LiteralRoundTripParam> {};
+
+TEST_P(LiteralSerializationParamTest, RoundTrip) {
+ const auto& param = GetParam();
+
+ // Deserialize from bytes
+ Result<Literal> literal_result = Literal::Deserialize(param.input_bytes,
param.type);
+ ASSERT_TRUE(literal_result.has_value())
+ << "Deserialization failed: " << literal_result.error().message;
+
+ // Check type and value
+ EXPECT_EQ(literal_result->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString());
+
+ // Serialize back to bytes
+ Result<std::vector<uint8_t>> bytes_result = literal_result->Serialize();
+ ASSERT_TRUE(bytes_result.has_value())
+ << "Serialization failed: " << bytes_result.error().message;
+ EXPECT_EQ(*bytes_result, param.input_bytes);
+
+ // Deserialize again to verify idempotency
+ Result<Literal> final_literal = Literal::Deserialize(*bytes_result,
param.type);
+ ASSERT_TRUE(final_literal.has_value())
+ << "Final deserialization failed: " << final_literal.error().message;
+ EXPECT_EQ(final_literal->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(final_literal->ToString(), param.expected_literal.ToString());
Review Comment:
```suggestion
EXPECT_EQ(*final_literal, param.expected_literal);
```
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
Review Comment:
```suggestion
if (data.size() == sizeof(T)) [[unlikely]] {
```
Shouldn't they be strictly equal?
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
+ // 4-byte int data can be deserialized as long
+ std::vector<uint8_t> int_data = {32, 0, 0, 0};
+ auto long_result = Literal::Deserialize(int_data, int64());
+ ASSERT_TRUE(long_result.has_value());
+ EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
+ EXPECT_EQ(long_result->ToString(), "32");
+
+ auto long_bytes = long_result->Serialize();
+ ASSERT_TRUE(long_bytes.has_value());
+ EXPECT_EQ(long_bytes->size(), 8);
+
+ // 4-byte float data can be deserialized as double
+ std::vector<uint8_t> float_data = {0, 0, 128, 63};
+ auto double_result = Literal::Deserialize(float_data, float64());
+ ASSERT_TRUE(double_result.has_value());
+ EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble);
+ EXPECT_EQ(double_result->ToString(), "1.000000");
+
+ auto double_bytes = double_result->Serialize();
+ ASSERT_TRUE(double_bytes.has_value());
+ EXPECT_EQ(double_bytes->size(), 8);
+}
+
+struct LiteralRoundTripParam {
+ std::string test_name;
+ std::vector<uint8_t> input_bytes;
+ Literal expected_literal;
+ std::shared_ptr<PrimitiveType> type;
+};
+
+class LiteralSerializationParamTest
+ : public ::testing::TestWithParam<LiteralRoundTripParam> {};
+
+TEST_P(LiteralSerializationParamTest, RoundTrip) {
+ const auto& param = GetParam();
+
+ // Deserialize from bytes
+ Result<Literal> literal_result = Literal::Deserialize(param.input_bytes,
param.type);
+ ASSERT_TRUE(literal_result.has_value())
+ << "Deserialization failed: " << literal_result.error().message;
+
+ // Check type and value
+ EXPECT_EQ(literal_result->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString());
+
+ // Serialize back to bytes
+ Result<std::vector<uint8_t>> bytes_result = literal_result->Serialize();
+ ASSERT_TRUE(bytes_result.has_value())
+ << "Serialization failed: " << bytes_result.error().message;
+ EXPECT_EQ(*bytes_result, param.input_bytes);
+
+ // Deserialize again to verify idempotency
+ Result<Literal> final_literal = Literal::Deserialize(*bytes_result,
param.type);
+ ASSERT_TRUE(final_literal.has_value())
+ << "Final deserialization failed: " << final_literal.error().message;
+ EXPECT_EQ(final_literal->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(final_literal->ToString(), param.expected_literal.ToString());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ BinarySerializationTests, LiteralSerializationParamTest,
Review Comment:
Can we eliminate `Tests` and `Test` from their names to be shorter?
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
Review Comment:
I think we can blindly use `std::vector<uint8_t>` for fixed type.
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
Review Comment:
```suggestion
```
We have checked it in ReadLittleEndian, so no need to check it again here.
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
Review Comment:
```suggestion
int64_t value;
if (data.size() < 8) {
// Type was promoted from int to long
ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
value = static_cast<int64_t>(int_value);
} else {
ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
value = long_value;
}
return Literal::Value{value};
```
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
+ // 4-byte int data can be deserialized as long
+ std::vector<uint8_t> int_data = {32, 0, 0, 0};
+ auto long_result = Literal::Deserialize(int_data, int64());
+ ASSERT_TRUE(long_result.has_value());
+ EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
+ EXPECT_EQ(long_result->ToString(), "32");
+
+ auto long_bytes = long_result->Serialize();
+ ASSERT_TRUE(long_bytes.has_value());
+ EXPECT_EQ(long_bytes->size(), 8);
+
+ // 4-byte float data can be deserialized as double
+ std::vector<uint8_t> float_data = {0, 0, 128, 63};
+ auto double_result = Literal::Deserialize(float_data, float64());
+ ASSERT_TRUE(double_result.has_value());
+ EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble);
+ EXPECT_EQ(double_result->ToString(), "1.000000");
+
+ auto double_bytes = double_result->Serialize();
+ ASSERT_TRUE(double_bytes.has_value());
+ EXPECT_EQ(double_bytes->size(), 8);
+}
+
+struct LiteralRoundTripParam {
+ std::string test_name;
+ std::vector<uint8_t> input_bytes;
+ Literal expected_literal;
+ std::shared_ptr<PrimitiveType> type;
+};
+
+class LiteralSerializationParamTest
+ : public ::testing::TestWithParam<LiteralRoundTripParam> {};
+
+TEST_P(LiteralSerializationParamTest, RoundTrip) {
+ const auto& param = GetParam();
+
+ // Deserialize from bytes
+ Result<Literal> literal_result = Literal::Deserialize(param.input_bytes,
param.type);
+ ASSERT_TRUE(literal_result.has_value())
+ << "Deserialization failed: " << literal_result.error().message;
+
+ // Check type and value
+ EXPECT_EQ(literal_result->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString());
+
+ // Serialize back to bytes
+ Result<std::vector<uint8_t>> bytes_result = literal_result->Serialize();
+ ASSERT_TRUE(bytes_result.has_value())
+ << "Serialization failed: " << bytes_result.error().message;
+ EXPECT_EQ(*bytes_result, param.input_bytes);
+
+ // Deserialize again to verify idempotency
+ Result<Literal> final_literal = Literal::Deserialize(*bytes_result,
param.type);
+ ASSERT_TRUE(final_literal.has_value())
+ << "Final deserialization failed: " << final_literal.error().message;
+ EXPECT_EQ(final_literal->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(final_literal->ToString(), param.expected_literal.ToString());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ BinarySerializationTests, LiteralSerializationParamTest,
+ ::testing::Values(
+ // Basic types
+ LiteralRoundTripParam{"BooleanTrue", {1}, Literal::Boolean(true),
boolean()},
+ LiteralRoundTripParam{"BooleanFalse", {0}, Literal::Boolean(false),
boolean()},
+ LiteralRoundTripParam{"Int", {32, 0, 0, 0}, Literal::Int(32), int32()},
+ LiteralRoundTripParam{
+ "Long", {32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64()},
+ LiteralRoundTripParam{"Float", {0, 0, 128, 63}, Literal::Float(1.0f),
float32()},
+ LiteralRoundTripParam{
+ "Double", {0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0),
float64()},
+ LiteralRoundTripParam{"String",
+ {105, 99, 101, 98, 101, 114, 103},
+ Literal::String("iceberg"),
+ string()},
+ LiteralRoundTripParam{"BinaryData",
+ {0x01, 0x02, 0x03, 0xFF},
+ Literal::Binary({0x01, 0x02, 0x03, 0xFF}),
+ binary()},
+ // Edge cases that fit the round-trip pattern
+ LiteralRoundTripParam{
+ "NegativeInt", {224, 255, 255, 255}, Literal::Int(-32), int32()},
+ LiteralRoundTripParam{"NegativeLong",
+ {224, 255, 255, 255, 255, 255, 255, 255},
+ Literal::Long(-32),
+ int64()},
+ // IEEE 754 representation for NaN and Infinity (in little-endian)
+ LiteralRoundTripParam{"FloatInfinity",
+ {0, 0, 128, 127},
+
Literal::Float(std::numeric_limits<float>::infinity()),
+ float32()},
+ LiteralRoundTripParam{"FloatNaN",
+ {0, 0, 192, 127},
+
Literal::Float(std::numeric_limits<float>::quiet_NaN()),
+ float32()}
+ // TODO(Li Feiyang): Add tests for Date, Time, Timestamp, TimestampTz
+ ),
+
+ [](const testing::TestParamInfo<LiteralSerializationParamTest::ParamType>&
info) {
+ return info.param.test_name;
+ });
+
+TEST(LiteralSerializationEdgeCaseTest, EmptyStringSerialization) {
Review Comment:
```suggestion
TEST(LiteralSerializationTest, EmptyString) {
```
##########
src/iceberg/util/conversions.h:
##########
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <span>
+#include <vector>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+/// \file iceberg/util/conversions.h
+/// \brief Conversion utilities for primitive types
+
+namespace iceberg {
+
+/// \brief Conversion utilities for primitive types
+class ICEBERG_EXPORT Conversions {
+ public:
+ /// \brief Convert a literal value to bytes
+ static Result<std::vector<uint8_t>> ToBytes(const PrimitiveType& type,
+ const Literal::Value& value);
+
+ static Result<std::vector<uint8_t>> ToBytes(const Literal& literal);
Review Comment:
Please add docstring.
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
+ // 4-byte int data can be deserialized as long
+ std::vector<uint8_t> int_data = {32, 0, 0, 0};
+ auto long_result = Literal::Deserialize(int_data, int64());
+ ASSERT_TRUE(long_result.has_value());
+ EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
+ EXPECT_EQ(long_result->ToString(), "32");
Review Comment:
I think it is not a good practice to have an indirection like this.
```suggestion
EXPECT_EQ(std::get<int64_t>(long_result->value()), 32L);
```
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
Review Comment:
This case is better to be placed after the roundtrip test.
##########
src/iceberg/util/endian.h:
##########
@@ -19,6 +19,8 @@
#pragma once
+#include <algorithm>
+#include <array>
Review Comment:
```suggestion
```
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
Review Comment:
Can we add a template specialization of ToBytesImpl for TypeId::kBoolean?
Then you can simply write `DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)`
##########
src/iceberg/util/conversions.h:
##########
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <span>
+#include <vector>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+/// \file iceberg/util/conversions.h
+/// \brief Conversion utilities for primitive types
+
+namespace iceberg {
+
+/// \brief Conversion utilities for primitive types
+class ICEBERG_EXPORT Conversions {
+ public:
+ /// \brief Convert a literal value to bytes
+ static Result<std::vector<uint8_t>> ToBytes(const PrimitiveType& type,
+ const Literal::Value& value);
+
+ static Result<std::vector<uint8_t>> ToBytes(const Literal& literal);
+
+ /// \brief Convert bytes to a literal value
+ static Result<Literal::Value> FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data);
+
+ static Result<Literal> FromBytes(std::shared_ptr<PrimitiveType> type,
Review Comment:
ditto
##########
test/literal_test.cc:
##########
@@ -383,4 +383,118 @@ TEST(LiteralTest, DoubleZeroComparison) {
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
}
+// Type promotion tests
+TEST(LiteralSerializationTest, TypePromotion) {
+ // 4-byte int data can be deserialized as long
+ std::vector<uint8_t> int_data = {32, 0, 0, 0};
+ auto long_result = Literal::Deserialize(int_data, int64());
+ ASSERT_TRUE(long_result.has_value());
+ EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
+ EXPECT_EQ(long_result->ToString(), "32");
+
+ auto long_bytes = long_result->Serialize();
+ ASSERT_TRUE(long_bytes.has_value());
+ EXPECT_EQ(long_bytes->size(), 8);
+
+ // 4-byte float data can be deserialized as double
+ std::vector<uint8_t> float_data = {0, 0, 128, 63};
+ auto double_result = Literal::Deserialize(float_data, float64());
+ ASSERT_TRUE(double_result.has_value());
+ EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble);
+ EXPECT_EQ(double_result->ToString(), "1.000000");
+
+ auto double_bytes = double_result->Serialize();
+ ASSERT_TRUE(double_bytes.has_value());
+ EXPECT_EQ(double_bytes->size(), 8);
+}
+
+struct LiteralRoundTripParam {
+ std::string test_name;
+ std::vector<uint8_t> input_bytes;
+ Literal expected_literal;
+ std::shared_ptr<PrimitiveType> type;
+};
+
+class LiteralSerializationParamTest
+ : public ::testing::TestWithParam<LiteralRoundTripParam> {};
+
+TEST_P(LiteralSerializationParamTest, RoundTrip) {
+ const auto& param = GetParam();
+
+ // Deserialize from bytes
+ Result<Literal> literal_result = Literal::Deserialize(param.input_bytes,
param.type);
+ ASSERT_TRUE(literal_result.has_value())
+ << "Deserialization failed: " << literal_result.error().message;
+
+ // Check type and value
+ EXPECT_EQ(literal_result->type()->type_id(),
param.expected_literal.type()->type_id());
+ EXPECT_EQ(literal_result->ToString(), param.expected_literal.ToString());
Review Comment:
```suggestion
EXPECT_EQ(*literal_result, param.expected_literal);
```
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
+ }
+
+ case TypeId::kFloat: {
+ if (data.size() != sizeof(float)) {
+ return InvalidArgument("Float requires {} bytes, got {}",
sizeof(float),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<float>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDouble: {
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto double_value,
ReadLittleEndian<double>(data));
+ return Literal::Value{double_value};
+ } else if (data.size() == 4) {
+ // Type was promoted from float to double
+ ICEBERG_ASSIGN_OR_RAISE(auto float_value,
ReadLittleEndian<float>(data));
+ return Literal::Value{static_cast<double>(float_value)};
+ } else {
+ return InvalidArgument("Double requires 4 or 8 bytes, got {}",
data.size());
+ }
+ }
+
+ case TypeId::kString: {
+ return Literal::Value{
+ std::string(reinterpret_cast<const char*>(data.data()),
data.size())};
+ }
+
+ case TypeId::kBinary: {
+ return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
+ }
+
+ case TypeId::kFixed: {
+ if (data.size() == 16) {
+ std::array<uint8_t, 16> fixed_bytes;
+ std::ranges::copy(data, fixed_bytes.begin());
+ return Literal::Value{fixed_bytes};
+ } else {
+ return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Deserialization for type {} is not supported",
+ type.ToString());
+ }
+}
+
+Result<Literal> Conversions::FromBytes(std::shared_ptr<PrimitiveType> type,
+ std::span<const uint8_t> data) {
+ if (!type) {
+ return InvalidArgument("Type cannot be null");
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto value, FromBytes(*type, data));
+
+ // If we got a null value (monostate), create a null Literal
+ if (std::holds_alternative<std::monostate>(value)) {
Review Comment:
Will it ever happen?
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
Review Comment:
ditto
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
Review Comment:
```suggestion
return InvalidArgument("Cannot deserialize empty value");
```
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
Review Comment:
I believe we can also specialize all types below.
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
+ }
+
+ case TypeId::kFloat: {
+ if (data.size() != sizeof(float)) {
+ return InvalidArgument("Float requires {} bytes, got {}",
sizeof(float),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<float>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDouble: {
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto double_value,
ReadLittleEndian<double>(data));
+ return Literal::Value{double_value};
+ } else if (data.size() == 4) {
+ // Type was promoted from float to double
+ ICEBERG_ASSIGN_OR_RAISE(auto float_value,
ReadLittleEndian<float>(data));
+ return Literal::Value{static_cast<double>(float_value)};
+ } else {
+ return InvalidArgument("Double requires 4 or 8 bytes, got {}",
data.size());
+ }
+ }
+
+ case TypeId::kString: {
+ return Literal::Value{
+ std::string(reinterpret_cast<const char*>(data.data()),
data.size())};
+ }
+
+ case TypeId::kBinary: {
+ return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
+ }
+
+ case TypeId::kFixed: {
+ if (data.size() == 16) {
+ std::array<uint8_t, 16> fixed_bytes;
+ std::ranges::copy(data, fixed_bytes.begin());
+ return Literal::Value{fixed_bytes};
+ } else {
+ return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
Review Comment:
Please check data.size() against type.
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
+ }
+
+ case TypeId::kFloat: {
+ if (data.size() != sizeof(float)) {
+ return InvalidArgument("Float requires {} bytes, got {}",
sizeof(float),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<float>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDouble: {
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto double_value,
ReadLittleEndian<double>(data));
+ return Literal::Value{double_value};
+ } else if (data.size() == 4) {
+ // Type was promoted from float to double
+ ICEBERG_ASSIGN_OR_RAISE(auto float_value,
ReadLittleEndian<float>(data));
+ return Literal::Value{static_cast<double>(float_value)};
+ } else {
+ return InvalidArgument("Double requires 4 or 8 bytes, got {}",
data.size());
+ }
+ }
+
+ case TypeId::kString: {
+ return Literal::Value{
+ std::string(reinterpret_cast<const char*>(data.data()),
data.size())};
+ }
+
+ case TypeId::kBinary: {
+ return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
+ }
+
+ case TypeId::kFixed: {
+ if (data.size() == 16) {
+ std::array<uint8_t, 16> fixed_bytes;
Review Comment:
Please do not use std::array<uint8_t, 16> for fixed type, this makes cases
complex.
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
+ }
+
+ case TypeId::kFloat: {
+ if (data.size() != sizeof(float)) {
+ return InvalidArgument("Float requires {} bytes, got {}",
sizeof(float),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<float>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDouble: {
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto double_value,
ReadLittleEndian<double>(data));
+ return Literal::Value{double_value};
+ } else if (data.size() == 4) {
+ // Type was promoted from float to double
+ ICEBERG_ASSIGN_OR_RAISE(auto float_value,
ReadLittleEndian<float>(data));
+ return Literal::Value{static_cast<double>(float_value)};
+ } else {
+ return InvalidArgument("Double requires 4 or 8 bytes, got {}",
data.size());
+ }
Review Comment:
```suggestion
if (data.size() < 8) {
// Type was promoted from float to double
ICEBERG_ASSIGN_OR_RAISE(auto float_value,
ReadLittleEndian<float>(data));
return Literal::Value{static_cast<double>(float_value)};
} else {
ICEBERG_ASSIGN_OR_RAISE(auto double_value,
ReadLittleEndian<double>(data));
return Literal::Value{double_value};
}
```
Let's simplify this
##########
src/iceberg/util/conversions.cc:
##########
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/conversions.h"
+
+#include <array>
+#include <cstring>
+#include <span>
+#include <string>
+
+#include "iceberg/util/endian.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+/// \brief Write a value in little-endian format and return as vector.
+template <EndianConvertible T>
+std::vector<uint8_t> WriteLittleEndian(T value) {
+ value = ToLittleEndian(value);
+ const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
+ std::vector<uint8_t> result;
+ result.insert(result.end(), bytes, bytes + sizeof(T));
+ return result;
+}
+
+/// \brief Read a value in little-endian format from the data.
+template <EndianConvertible T>
+Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
+ if (data.size() < sizeof(T)) [[unlikely]] {
+ return InvalidArgument("Insufficient data to read {} bytes, got {}",
sizeof(T),
+ data.size());
+ }
+
+ T value;
+ std::memcpy(&value, data.data(), sizeof(T));
+ return FromLittleEndian(value);
+}
+
+template <TypeId type_id>
+Result<std::vector<uint8_t>> ToBytesImpl(const Literal::Value& value) {
+ using CppType = typename LiteralTraits<type_id>::ValueType;
+ return WriteLittleEndian(std::get<CppType>(value));
+}
+
+#define DISPATCH_LITERAL_TO_BYTES(type_id) \
+ case type_id: \
+ return ToBytesImpl<type_id>(value);
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
+ const Literal::Value& value)
{
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
+ case TypeId::kBoolean: {
+ return std::vector<uint8_t>{std::get<bool>(value) ?
static_cast<uint8_t>(0x01)
+ :
static_cast<uint8_t>(0x00)};
+ }
+
+ case TypeId::kString: {
+ const auto& str = std::get<std::string>(value);
+ return std::vector<uint8_t>(str.begin(), str.end());
+ }
+
+ case TypeId::kBinary: {
+ return std::get<std::vector<uint8_t>>(value);
+ }
+
+ case TypeId::kFixed: {
+ if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
+ const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
+ return std::vector<uint8_t>(fixed_bytes.begin(), fixed_bytes.end());
+ } else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
+ return std::get<std::vector<uint8_t>>(value);
+ } else {
+ std::string actual_type = std::visit(
+ [](auto&& arg) -> std::string { return typeid(arg).name(); },
value);
+ return InvalidArgument("Invalid value type for Fixed literal, got
type: {}",
+ actual_type);
+ }
+ }
+ // TODO(Li Feiyang): Add support for UUID and Decimal
+
+ default:
+ return NotSupported("Serialization for type {} is not supported",
type.ToString());
+ }
+}
+
+#undef DISPATCH_LITERAL_TO_BYTES
+
+Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
+ // Cannot serialize special values
+ if (literal.IsAboveMax()) {
+ return NotSupported("Cannot serialize AboveMax");
+ }
+ if (literal.IsBelowMin()) {
+ return NotSupported("Cannot serialize BelowMin");
+ }
+ if (literal.IsNull()) {
+ return NotSupported("Cannot serialize null");
+ }
+
+ return ToBytes(*literal.type(), literal.value());
+}
+
+Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
+ std::span<const uint8_t> data) {
+ if (data.empty()) {
+ return InvalidArgument("Data cannot be empty");
+ }
+
+ const auto type_id = type.type_id();
+
+ switch (type_id) {
+ case TypeId::kBoolean: {
+ if (data.size() != 1) {
+ return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
+ return Literal::Value{static_cast<bool>(value != 0x00)};
+ }
+
+ case TypeId::kInt: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Int requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kDate: {
+ if (data.size() != sizeof(int32_t)) {
+ return InvalidArgument("Date requires {} bytes, got {}",
sizeof(int32_t),
+ data.size());
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
+ return Literal::Value{value};
+ }
+
+ case TypeId::kLong:
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz: {
+ int64_t value;
+ if (data.size() == 8) {
+ ICEBERG_ASSIGN_OR_RAISE(auto long_value,
ReadLittleEndian<int64_t>(data));
+ value = long_value;
+ } else if (data.size() == 4) {
+ // Type was promoted from int to long
+ ICEBERG_ASSIGN_OR_RAISE(auto int_value,
ReadLittleEndian<int32_t>(data));
+ value = static_cast<int64_t>(int_value);
+ } else {
+ return InvalidArgument("{} requires 4 or 8 bytes, got {}",
ToString(type_id),
+ data.size());
+ }
+
+ return Literal::Value{value};
+ }
+
+ case TypeId::kFloat: {
+ if (data.size() != sizeof(float)) {
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]