This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new d1ccda8b1 feat(cpp): add float16 to c++ (#3487)
d1ccda8b1 is described below
commit d1ccda8b12388f37f53955749e13a869270c759c
Author: Pratyush Singh <[email protected]>
AuthorDate: Tue Mar 24 12:00:35 2026 +0000
feat(cpp): add float16 to c++ (#3487)
## Why?
Implement float16_t (IEEE 754 binary16 / half-precision) as a primitive
type in the C++ runtime, as required by issue
https://github.com/apache/fory/issues/3208. No C++ standard type
represents float16, so the framework needs its own strong type with
correct IEEE 754 semantics and serialiser integration.
## What does this PR do?
cpp/fory/util/float16.h / float16.cc — new fory::float16_t strong type:
- Trivial, standard-layout, exactly 2 bytes; internal storage is
uint16_t bits accessed only via to_bits()/from_bits()
- from_float / to_float — IEEE 754 compliant conversion with
round-to-nearest ties-to-even, correct handling of ±0, ±Inf, NaN
(payload preserved, signaling→quiet), subnormals, overflow→Inf,
underflow→subnormal/±0
- Classification: is_nan, is_inf (two overloads), is_zero, signbit,
is_subnormal, is_normal, is_finite
- Arithmetic: add, sub, mul, div, neg, abs; optional math: sqrt, min,
max, copysign, floor, ceil, trunc, round, round_to_even; compound
assignment and binary operator overloads
- Comparisons: equal, less, less_eq, greater, greater_eq, compare (NaN
unordered, +0 == −0); comparison operator overloads (==, !=, <, <=, >,
>=)
cpp/fory/serialization/struct_serializer.h — serializer integration:
- Serializer<float16_t> specialization wired to TypeId::FLOAT16 (type ID
17)
cpp/fory/util/float16_test.cc — exhaustive tests (1300+ lines, 61 test
cases):
- Stress-tests all 65 536 bit patterns for round-trip correctness
- Ties-to-even rounding, subnormal gradual underflow, overflow→Inf, NaN
payload preservation
- Buffer wire-format goldens (little-endian), serializer round-trips
(scalar, vector, map, optional), type ID check
- Full comparison test suite including NaN unordered and ±0 equality
edge cases
## Related issues
- Closes #3208
## AI Contribution Checklist
- [X] Substantial AI assistance was used in this PR: `yes`
- [X] If `yes`, I included a completed [AI Contribution
Checklist](https://github.com/apache/fory/blob/main/AI_POLICY.md#9-contributor-checklist-for-ai-assisted-prs)
in this PR description and the required `AI Usage Disclosure`.
- [X] Substantial AI assistance was used in this PR: yes
- [X] If yes, I included a completed AI Contribution Checklist in this
PR description and the required AI Usage Disclosure.
- [X] If yes, I can explain and defend all important changes without AI
help.
- [X] If yes, I reviewed AI-assisted code changes line by line before
submission.
- [X] If yes, I ran adequate human verification and recorded evidence
(checks run locally or in CI, pass/fail summary, and confirmation I
reviewed results).
- [X] If yes, I added/updated tests and specs where required.
- [X] If yes, I validated protocol/performance impacts with evidence
when applicable.
- [X] If yes, I verified licensing and provenance compliance.
AI Usage Disclosure
- substantial_ai_assistance: yes
- scope: tests
- affected_files_or_subsystems: cpp/fory/util/float16_test.cc
- human_verification: bazel test //cpp/fory/util:float16_test — 61/61
passed locally; contributor reviewed all diffs line by line
- performance_verification: N/A
- provenance_license_confirmation: Apache-2.0-compatible provenance
confirmed; no incompatible third-party code introduced
## Does this PR introduce any user-facing change?
- [X] Does this PR introduce any public API change?
- [X] Does this PR introduce any binary protocol compatibility change?
---
compiler/fory_compiler/generators/cpp.py | 4 +-
cpp/fory/serialization/array_serializer.h | 116 ++
cpp/fory/serialization/basic_serializer.h | 72 ++
cpp/fory/serialization/collection_serializer.h | 108 ++
cpp/fory/serialization/context.h | 5 +
cpp/fory/serialization/struct_serializer.h | 29 +-
cpp/fory/util/BUILD | 11 +
cpp/fory/util/CMakeLists.txt | 10 +
cpp/fory/util/buffer.h | 19 +
cpp/fory/util/float16.cc | 180 +++
cpp/fory/util/float16.h | 276 +++++
cpp/fory/util/float16_test.cc | 1388 ++++++++++++++++++++++++
12 files changed, 2215 insertions(+), 3 deletions(-)
diff --git a/compiler/fory_compiler/generators/cpp.py
b/compiler/fory_compiler/generators/cpp.py
index 8a9b44881..a5bc8b418 100644
--- a/compiler/fory_compiler/generators/cpp.py
+++ b/compiler/fory_compiler/generators/cpp.py
@@ -61,7 +61,7 @@ class CppGenerator(BaseGenerator):
PrimitiveKind.UINT64: "uint64_t",
PrimitiveKind.VAR_UINT64: "uint64_t",
PrimitiveKind.TAGGED_UINT64: "uint64_t",
- PrimitiveKind.FLOAT16: "float",
+ PrimitiveKind.FLOAT16: "fory::float16_t",
PrimitiveKind.FLOAT32: "float",
PrimitiveKind.FLOAT64: "double",
PrimitiveKind.STRING: "std::string",
@@ -1783,6 +1783,8 @@ class CppGenerator(BaseGenerator):
includes.add('"fory/serialization/temporal_serializers.h"')
elif field_type.kind == PrimitiveKind.ANY:
includes.add("<any>")
+ elif field_type.kind == PrimitiveKind.FLOAT16:
+ includes.add('"fory/util/float16.h"')
elif isinstance(field_type, ListType):
includes.add("<vector>")
diff --git a/cpp/fory/serialization/array_serializer.h
b/cpp/fory/serialization/array_serializer.h
index 388fa8de5..344cf748e 100644
--- a/cpp/fory/serialization/array_serializer.h
+++ b/cpp/fory/serialization/array_serializer.h
@@ -286,5 +286,121 @@ template <size_t N> struct Serializer<std::array<bool,
N>> {
}
};
+/// Serializer for std::array<float16_t, N>
+/// float16_t is not std::is_arithmetic, so it needs an explicit specialization
+/// to use the typed-array (FLOAT16_ARRAY) wire path instead of the generic
LIST
+/// path. Stored as raw uint16_t words on the wire (little-endian).
+template <size_t N> struct Serializer<std::array<float16_t, N>> {
+ static constexpr TypeId type_id = TypeId::FLOAT16_ARRAY;
+
+ static inline void write_type_info(WriteContext &ctx) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+
+ static inline void read_type_info(ReadContext &ctx) {
+ uint32_t actual = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return;
+ }
+ if (!type_id_matches(actual, static_cast<uint32_t>(type_id))) {
+ ctx.set_error(
+ Error::type_mismatch(actual, static_cast<uint32_t>(type_id)));
+ }
+ }
+
+ static inline void write(const std::array<float16_t, N> &arr,
+ WriteContext &ctx, RefMode ref_mode, bool
write_type,
+ bool has_generics = false) {
+ write_not_null_ref_flag(ctx, ref_mode);
+ if (write_type) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+ write_data(arr, ctx);
+ }
+
+ static inline void write_data(const std::array<float16_t, N> &arr,
+ WriteContext &ctx) {
+ Buffer &buffer = ctx.buffer();
+ constexpr size_t max_size = 8 + N * sizeof(float16_t);
+ buffer.grow(static_cast<uint32_t>(max_size));
+ uint32_t writer_index = buffer.writer_index();
+ writer_index += buffer.put_var_uint32(
+ writer_index, static_cast<uint32_t>(N * sizeof(float16_t)));
+ if constexpr (N > 0) {
+ if constexpr (FORY_LITTLE_ENDIAN) {
+ buffer.unsafe_put(writer_index, arr.data(), N * sizeof(float16_t));
+ } else {
+ for (size_t i = 0; i < N; ++i) {
+ uint16_t bits = util::to_little_endian(arr[i].to_bits());
+ buffer.unsafe_put(writer_index + i * sizeof(float16_t), &bits,
+ sizeof(float16_t));
+ }
+ }
+ }
+ buffer.writer_index(writer_index + N * sizeof(float16_t));
+ }
+
+ static inline void write_data_generic(const std::array<float16_t, N> &arr,
+ WriteContext &ctx, bool has_generics) {
+ write_data(arr, ctx);
+ }
+
+ static inline std::array<float16_t, N>
+ read(ReadContext &ctx, RefMode ref_mode, bool read_type) {
+ bool has_value = read_null_only_flag(ctx, ref_mode);
+ if (ctx.has_error() || !has_value) {
+ return std::array<float16_t, N>();
+ }
+ if (read_type) {
+ uint32_t type_id_read = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return std::array<float16_t, N>();
+ }
+ if (type_id_read != static_cast<uint32_t>(type_id)) {
+ ctx.set_error(
+ Error::type_mismatch(type_id_read,
static_cast<uint32_t>(type_id)));
+ return std::array<float16_t, N>();
+ }
+ }
+ return read_data(ctx);
+ }
+
+ static inline std::array<float16_t, N> read_data(ReadContext &ctx) {
+ uint32_t size_bytes = ctx.read_var_uint32(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return std::array<float16_t, N>();
+ }
+ uint32_t length = size_bytes / sizeof(float16_t);
+ if (length != N) {
+ ctx.set_error(Error::invalid_data("Array size mismatch: expected " +
+ std::to_string(N) + " but got " +
+ std::to_string(length)));
+ return std::array<float16_t, N>();
+ }
+ std::array<float16_t, N> arr;
+ if constexpr (N > 0) {
+ if constexpr (FORY_LITTLE_ENDIAN) {
+ ctx.read_bytes(arr.data(), N * sizeof(float16_t), ctx.error());
+ } else {
+ for (size_t i = 0; i < N; ++i) {
+ uint16_t bits;
+ ctx.read_bytes(&bits, sizeof(float16_t), ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return arr;
+ }
+ arr[i] = float16_t::from_bits(util::to_little_endian(bits));
+ }
+ }
+ }
+ return arr;
+ }
+
+ static inline std::array<float16_t, N>
+ read_with_type_info(ReadContext &ctx, RefMode ref_mode,
+ const TypeInfo &type_info) {
+ return read(ctx, ref_mode, false);
+ }
+};
+
} // namespace serialization
} // namespace fory
diff --git a/cpp/fory/serialization/basic_serializer.h
b/cpp/fory/serialization/basic_serializer.h
index 7d72990da..0b3132955 100644
--- a/cpp/fory/serialization/basic_serializer.h
+++ b/cpp/fory/serialization/basic_serializer.h
@@ -23,6 +23,7 @@
#include "fory/serialization/serializer_traits.h"
#include "fory/type/type.h"
#include "fory/util/error.h"
+#include "fory/util/float16.h"
#include <cstdint>
#include <type_traits>
@@ -531,6 +532,77 @@ template <> struct Serializer<double> {
}
};
+/// float16_t serializer
+template <> struct Serializer<float16_t> {
+ static constexpr TypeId type_id = TypeId::FLOAT16;
+
+ static inline void write_type_info(WriteContext &ctx) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+
+ static inline void read_type_info(ReadContext &ctx) {
+ uint32_t actual = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return;
+ }
+ if (actual != static_cast<uint32_t>(type_id)) {
+ ctx.set_error(
+ Error::type_mismatch(actual, static_cast<uint32_t>(type_id)));
+ }
+ }
+
+ static inline void write(float16_t value, WriteContext &ctx, RefMode
ref_mode,
+ bool write_type, bool = false) {
+ write_not_null_ref_flag(ctx, ref_mode);
+ if (write_type) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+ write_data(value, ctx);
+ }
+
+ static inline void write_data(float16_t value, WriteContext &ctx) {
+ ctx.write_bytes(&value, sizeof(float16_t));
+ }
+
+ static inline void write_data_generic(float16_t value, WriteContext &ctx,
+ bool) {
+ write_data(value, ctx);
+ }
+
+ static inline float16_t read(ReadContext &ctx, RefMode ref_mode,
+ bool read_type) {
+ bool has_value = read_null_only_flag(ctx, ref_mode);
+ if (ctx.has_error() || !has_value) {
+ return float16_t::from_bits(0);
+ }
+ if (read_type) {
+ uint32_t type_id_read = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return float16_t::from_bits(0);
+ }
+ if (type_id_read != static_cast<uint32_t>(type_id)) {
+ ctx.set_error(
+ Error::type_mismatch(type_id_read,
static_cast<uint32_t>(type_id)));
+ return float16_t::from_bits(0);
+ }
+ }
+ return ctx.read_f16(ctx.error());
+ }
+
+ static inline float16_t read_data(ReadContext &ctx) {
+ return ctx.read_f16(ctx.error());
+ }
+
+ static inline float16_t read_data_generic(ReadContext &ctx, bool) {
+ return read_data(ctx);
+ }
+
+ static inline float16_t
+ read_with_type_info(ReadContext &ctx, RefMode ref_mode, const TypeInfo &) {
+ return read(ctx, ref_mode, false);
+ }
+};
+
// ============================================================================
// Character Type Serializers (C++ native only, not supported in xlang mode)
// ============================================================================
diff --git a/cpp/fory/serialization/collection_serializer.h
b/cpp/fory/serialization/collection_serializer.h
index 419352685..a53e53d85 100644
--- a/cpp/fory/serialization/collection_serializer.h
+++ b/cpp/fory/serialization/collection_serializer.h
@@ -643,6 +643,114 @@ struct Serializer<
}
};
+/// Vector serializer for float16_t — typed array path (FLOAT16_ARRAY).
+/// float16_t is not std::is_arithmetic, so it needs an explicit specialization
+/// to avoid falling through to the generic LIST serializer.
+template <typename Alloc> struct Serializer<std::vector<float16_t, Alloc>> {
+ static constexpr TypeId type_id = TypeId::FLOAT16_ARRAY;
+
+ static inline void write_type_info(WriteContext &ctx) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+
+ static inline void read_type_info(ReadContext &ctx) {
+ uint32_t actual = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return;
+ }
+ if (!type_id_matches(actual, static_cast<uint32_t>(type_id))) {
+ ctx.set_error(
+ Error::type_mismatch(actual, static_cast<uint32_t>(type_id)));
+ }
+ }
+
+ static inline void write(const std::vector<float16_t, Alloc> &vec,
+ WriteContext &ctx, RefMode ref_mode, bool
write_type,
+ bool has_generics = false) {
+ write_not_null_ref_flag(ctx, ref_mode);
+ if (write_type) {
+ ctx.write_uint8(static_cast<uint8_t>(type_id));
+ }
+ write_data(vec, ctx);
+ }
+
+ static inline void write_data(const std::vector<float16_t, Alloc> &vec,
+ WriteContext &ctx) {
+ uint64_t total_bytes =
+ static_cast<uint64_t>(vec.size()) * sizeof(float16_t);
+ if (total_bytes > std::numeric_limits<uint32_t>::max()) {
+ ctx.set_error(Error::invalid("Vector byte size exceeds uint32_t range"));
+ return;
+ }
+ Buffer &buffer = ctx.buffer();
+ size_t max_size = 8 + total_bytes;
+ buffer.grow(static_cast<uint32_t>(max_size));
+ uint32_t writer_index = buffer.writer_index();
+ writer_index +=
+ buffer.put_var_uint32(writer_index,
static_cast<uint32_t>(total_bytes));
+ if (total_bytes > 0) {
+ buffer.unsafe_put(writer_index, vec.data(),
+ static_cast<uint32_t>(total_bytes));
+ }
+ buffer.writer_index(writer_index + static_cast<uint32_t>(total_bytes));
+ }
+
+ static inline void
+ write_data_generic(const std::vector<float16_t, Alloc> &vec,
+ WriteContext &ctx, bool has_generics) {
+ write_data(vec, ctx);
+ }
+
+ static inline std::vector<float16_t, Alloc>
+ read(ReadContext &ctx, RefMode ref_mode, bool read_type) {
+ bool has_value = read_null_only_flag(ctx, ref_mode);
+ if (ctx.has_error() || !has_value) {
+ return std::vector<float16_t, Alloc>();
+ }
+ if (read_type) {
+ uint32_t type_id_read = ctx.read_uint8(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return std::vector<float16_t, Alloc>();
+ }
+ if (type_id_read != static_cast<uint32_t>(type_id)) {
+ ctx.set_error(
+ Error::type_mismatch(type_id_read,
static_cast<uint32_t>(type_id)));
+ return std::vector<float16_t, Alloc>();
+ }
+ }
+ return read_data(ctx);
+ }
+
+ static inline std::vector<float16_t, Alloc>
+ read_with_type_info(ReadContext &ctx, RefMode ref_mode,
+ const TypeInfo &type_info) {
+ return read(ctx, ref_mode, false);
+ }
+
+ static inline std::vector<float16_t, Alloc> read_data(ReadContext &ctx) {
+ uint32_t total_bytes_u32 = ctx.read_var_uint32(ctx.error());
+ if (FORY_PREDICT_FALSE(ctx.has_error())) {
+ return std::vector<float16_t, Alloc>();
+ }
+ if (FORY_PREDICT_FALSE(total_bytes_u32 > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<float16_t, Alloc>();
+ }
+ size_t elem_count = total_bytes_u32 / sizeof(float16_t);
+ if (total_bytes_u32 % sizeof(float16_t) != 0) {
+ ctx.set_error(Error::invalid_data(
+ "Vector byte size not aligned with float16_t element size"));
+ return std::vector<float16_t, Alloc>();
+ }
+ std::vector<float16_t, Alloc> result(elem_count);
+ if (total_bytes_u32 > 0) {
+ ctx.read_bytes(result.data(), static_cast<uint32_t>(total_bytes_u32),
+ ctx.error());
+ }
+ return result;
+ }
+};
+
/// Vector serializer for non-bool, non-arithmetic types
template <typename T, typename Alloc>
struct Serializer<
diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h
index 8fba3b612..3c15dec81 100644
--- a/cpp/fory/serialization/context.h
+++ b/cpp/fory/serialization/context.h
@@ -558,6 +558,11 @@ public:
return buffer().read_double(error);
}
+ /// Read float16_t from buffer. Sets error on failure.
+ FORY_ALWAYS_INLINE float16_t read_f16(Error &error) {
+ return buffer().read_f16(error);
+ }
+
/// Read uint32_t value as varint from buffer. Sets error on failure.
FORY_ALWAYS_INLINE uint32_t read_var_uint32(Error &error) {
return buffer().read_var_uint32(error);
diff --git a/cpp/fory/serialization/struct_serializer.h
b/cpp/fory/serialization/struct_serializer.h
index 43373ab1e..c9b4eb05b 100644
--- a/cpp/fory/serialization/struct_serializer.h
+++ b/cpp/fory/serialization/struct_serializer.h
@@ -139,6 +139,9 @@ FORY_ALWAYS_INLINE uint32_t put_primitive_at(T value,
Buffer &buffer,
std::is_same_v<T, uint16_t>) {
buffer.unsafe_put<T>(offset, value);
return 2;
+ } else if constexpr (std::is_same_v<T, float16_t>) {
+ buffer.unsafe_put<uint16_t>(offset, value.to_bits());
+ return 2;
} else if constexpr (std::is_same_v<T, float>) {
buffer.unsafe_put<float>(offset, value);
return 4;
@@ -175,6 +178,8 @@ FORY_ALWAYS_INLINE void put_fixed_primitive_at(T value,
Buffer &buffer,
} else if constexpr (std::is_same_v<T, int64_t> ||
std::is_same_v<T, long long>) {
buffer.unsafe_put<int64_t>(offset, static_cast<int64_t>(value));
+ } else if constexpr (std::is_same_v<T, float16_t>) {
+ buffer.unsafe_put<uint16_t>(offset, value.to_bits());
} else if constexpr (std::is_same_v<T, float>) {
buffer.unsafe_put<float>(offset, value);
} else if constexpr (std::is_same_v<T, double>) {
@@ -768,6 +773,7 @@ template <typename T> struct CompileTimeFieldHelpers {
std::is_same_v<FieldType, uint8_t> ||
std::is_same_v<FieldType, int16_t> ||
std::is_same_v<FieldType, uint16_t> ||
+ std::is_same_v<FieldType, float16_t> ||
std::is_same_v<FieldType, float> ||
std::is_same_v<FieldType, double>;
}
@@ -807,7 +813,8 @@ template <typename T> struct CompileTimeFieldHelpers {
std::is_same_v<FieldType, uint8_t>) {
return 1;
} else if constexpr (std::is_same_v<FieldType, int16_t> ||
- std::is_same_v<FieldType, uint16_t>) {
+ std::is_same_v<FieldType, uint16_t> ||
+ std::is_same_v<FieldType, float16_t>) {
return 2;
} else if constexpr (is_configurable_int_v<FieldType>) {
return configurable_int_fixed_size_bytes<FieldType, T, Index>();
@@ -2060,6 +2067,7 @@ template <> struct is_raw_primitive<int32_t> :
std::true_type {};
template <> struct is_raw_primitive<uint32_t> : std::true_type {};
template <> struct is_raw_primitive<int64_t> : std::true_type {};
template <> struct is_raw_primitive<uint64_t> : std::true_type {};
+template <> struct is_raw_primitive<float16_t> : std::true_type {};
template <> struct is_raw_primitive<float> : std::true_type {};
template <> struct is_raw_primitive<double> : std::true_type {};
template <typename T>
@@ -2117,6 +2125,8 @@ FORY_ALWAYS_INLINE TargetType
read_primitive_by_type_id(ReadContext &ctx,
case TypeId::TAGGED_UINT64:
// TAGGED_UINT64 uses tagged encoding (special hybrid encoding)
return static_cast<TargetType>(ctx.read_tagged_uint64(error));
+ case TypeId::FLOAT16:
+ return static_cast<TargetType>(ctx.read_f16(error).to_float());
case TypeId::FLOAT32:
return static_cast<TargetType>(ctx.read_float(error));
case TypeId::FLOAT64:
@@ -2128,6 +2138,16 @@ FORY_ALWAYS_INLINE TargetType
read_primitive_by_type_id(ReadContext &ctx,
}
}
+template <>
+FORY_ALWAYS_INLINE float16_t read_primitive_by_type_id<float16_t>(
+ ReadContext &ctx, uint32_t type_id, Error &error) {
+ if (static_cast<TypeId>(type_id) == TypeId::FLOAT16) {
+ return ctx.read_f16(error);
+ }
+ return float16_t::from_float(
+ read_primitive_by_type_id<float>(ctx, type_id, error));
+}
+
/// Helper to read a primitive field directly using Error* pattern.
/// This bypasses Serializer<FieldType>::read for better performance.
/// Returns the read value; sets error on failure.
@@ -2166,6 +2186,8 @@ FORY_ALWAYS_INLINE FieldType
read_primitive_field_direct(ReadContext &ctx,
} else if constexpr (std::is_same_v<FieldType, uint64_t>) {
// uint64_t uses fixed 8-byte encoding (not varint!)
return static_cast<uint64_t>(ctx.read_int64(error));
+ } else if constexpr (std::is_same_v<FieldType, float16_t>) {
+ return ctx.read_f16(error);
} else if constexpr (std::is_same_v<FieldType, float>) {
return ctx.read_float(error);
} else if constexpr (std::is_same_v<FieldType, double>) {
@@ -2531,7 +2553,8 @@ template <typename T> constexpr size_t
fixed_primitive_size() {
std::is_same_v<T, uint8_t>) {
return 1;
} else if constexpr (std::is_same_v<T, int16_t> ||
- std::is_same_v<T, uint16_t>) {
+ std::is_same_v<T, uint16_t> ||
+ std::is_same_v<T, float16_t>) {
return 2;
} else if constexpr (std::is_same_v<T, uint32_t> ||
std::is_same_v<T, int32_t> || std::is_same_v<T, int> ||
@@ -2582,6 +2605,8 @@ FORY_ALWAYS_INLINE T read_fixed_primitive_at(Buffer
&buffer, uint32_t offset) {
std::is_same_v<T, unsigned int>) {
// Handle both uint32_t and unsigned int (different types on some
platforms)
return static_cast<T>(buffer.unsafe_get<uint32_t>(offset));
+ } else if constexpr (std::is_same_v<T, float16_t>) {
+ return float16_t::from_bits(buffer.unsafe_get<uint16_t>(offset));
} else if constexpr (std::is_same_v<T, float>) {
return buffer.unsafe_get<float>(offset);
} else if constexpr (std::is_same_v<T, uint64_t> ||
diff --git a/cpp/fory/util/BUILD b/cpp/fory/util/BUILD
index e1fdc850b..881891378 100644
--- a/cpp/fory/util/BUILD
+++ b/cpp/fory/util/BUILD
@@ -91,3 +91,14 @@ cc_test(
"@googletest//:gtest_main",
],
)
+
+cc_test(
+ name = "float16_test",
+ srcs = ["float16_test.cc"],
+ deps = [
+ ":fory_util",
+ "//cpp/fory/serialization:fory_serialization",
+ "@googletest//:gtest",
+ "@googletest//:gtest_main",
+ ],
+)
diff --git a/cpp/fory/util/CMakeLists.txt b/cpp/fory/util/CMakeLists.txt
index 9b7718d67..e6f4e52f1 100644
--- a/cpp/fory/util/CMakeLists.txt
+++ b/cpp/fory/util/CMakeLists.txt
@@ -22,6 +22,7 @@ set(FORY_UTIL_SOURCES
stream.cc
string_util.cc
time_util.cc
+ float16.cc
)
set(FORY_UTIL_HEADERS
@@ -37,6 +38,7 @@ set(FORY_UTIL_HEADERS
string_util.h
time_util.h
flat_int_map.h
+ float16.h
)
add_library(fory_util ${FORY_UTIL_SOURCES})
@@ -94,4 +96,12 @@ if(FORY_BUILD_TESTS)
add_executable(fory_util_int_map_test int_map_test.cc)
target_link_libraries(fory_util_int_map_test fory_util GTest::gtest
GTest::gtest_main)
gtest_discover_tests(fory_util_int_map_test)
+
+ add_executable(fory_util_flat_int_map_test flat_int_map_test.cc)
+ target_link_libraries(fory_util_flat_int_map_test fory_util GTest::gtest
GTest::gtest_main)
+ gtest_discover_tests(fory_util_flat_int_map_test)
+
+ add_executable(fory_util_float16_test float16_test.cc)
+ target_link_libraries(fory_util_float16_test fory_util fory_serialization
GTest::gtest GTest::gtest_main)
+ gtest_discover_tests(fory_util_float16_test)
endif()
diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h
index b6d1be060..226ade138 100644
--- a/cpp/fory/util/buffer.h
+++ b/cpp/fory/util/buffer.h
@@ -29,6 +29,7 @@
#include "fory/util/bit_util.h"
#include "fory/util/error.h"
+#include "fory/util/float16.h"
#include "fory/util/logging.h"
#include "fory/util/result.h"
#include "fory/util/stream.h"
@@ -754,6 +755,14 @@ public:
increase_writer_index(8);
}
+ /// Write float16_t as fixed 2 bytes (raw IEEE 754 bits, little-endian).
+ /// Automatically grows buffer and advances writer index.
+ FORY_ALWAYS_INLINE void write_f16(float16_t value) {
+ grow(2);
+ unsafe_put<uint16_t>(writer_index_, value.to_bits());
+ increase_writer_index(2);
+ }
+
/// write uint32_t value as varint to buffer at current writer index.
/// Automatically grows buffer and advances writer index.
FORY_ALWAYS_INLINE void write_var_uint32(uint32_t value) {
@@ -957,6 +966,16 @@ public:
return value;
}
+ /// Read float16_t from buffer. Sets error on bounds violation.
+ FORY_ALWAYS_INLINE float16_t read_f16(Error &error) {
+ if (FORY_PREDICT_FALSE(!ensure_readable(2, error))) {
+ return float16_t::from_bits(0);
+ }
+ float16_t value =
float16_t::from_bits(unsafe_get<uint16_t>(reader_index_));
+ reader_index_ += 2;
+ return value;
+ }
+
/// Read uint32_t value as varint from buffer. Sets error on bounds
violation.
FORY_ALWAYS_INLINE uint32_t read_var_uint32(Error &error) {
if (FORY_PREDICT_FALSE(!ensure_readable(1, error))) {
diff --git a/cpp/fory/util/float16.cc b/cpp/fory/util/float16.cc
new file mode 100644
index 000000000..e75b1b90f
--- /dev/null
+++ b/cpp/fory/util/float16.cc
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "fory/util/float16.h"
+#include <cstring>
+
+namespace fory {
+
+// Convert float32 to float16 using IEEE 754 round-to-nearest, ties-to-even.
+//
+// Layout of float32: [S|EEEEEEEE|MMMMMMMMMMMMMMMMMMMMMMM] (1+8+23 bits)
+// Layout of float16: [S|EEEEE|MMMMMMMMMM] (1+5+10 bits)
+// Exponent bias: 127 (f32) vs 15 (f16).
+float16_t float16_t::from_float(float f) noexcept {
+ uint32_t bits;
+ std::memcpy(&bits, &f, sizeof(bits));
+
+ const uint32_t sign = bits & 0x80000000u;
+ const uint32_t exp = (bits >> 23) & 0xFFu;
+ const uint32_t mantissa = bits & 0x007FFFFFu;
+
+ // NaN or Infinity (f32 exp = 0xFF)
+ if (exp == 255u) {
+ if (mantissa == 0u) {
+ // ±Inf → ±Inf
+ return from_bits(static_cast<uint16_t>((sign >> 16) | 0x7C00u));
+ }
+ // NaN → quiet NaN; map top 9 bits of f32 payload to f16 payload,
+ // force quiet bit (bit 9 of f16 fraction).
+ const uint32_t nan_payload = (mantissa >> 13) & 0x03FFu;
+ const uint32_t quiet_bit = 0x0200u;
+ return from_bits(static_cast<uint16_t>((sign >> 16) | 0x7C00u | quiet_bit |
+ nan_payload));
+ }
+
+ // ±0 (also catches -0.0)
+ if (exp == 0u && mantissa == 0u) {
+ return from_bits(static_cast<uint16_t>(sign >> 16));
+ }
+
+ // Convert exponent bias: 127 → 15
+ const int32_t exp16 = static_cast<int32_t>(exp) - 127 + 15;
+
+ // Overflow → ±Inf
+ if (exp16 >= 31) {
+ return from_bits(static_cast<uint16_t>((sign >> 16) | 0x7C00u));
+ }
+
+ // Underflow: result is a f16 subnormal, or flushes to ±0.
+ if (exp16 <= 0) {
+ // Values with exp16 < -10 are too small even for the smallest f16
+ // subnormal (2^-24); also handles all f32 subnormal inputs (exp==0).
+ if (exp16 < -10) {
+ return from_bits(static_cast<uint16_t>(sign >> 16));
+ }
+ // Assemble f16 subnormal. The f32 normal implicit leading 1 participates
+ // in the f16 mantissa, so include it explicitly.
+ const uint32_t full_mantissa = (1u << 23) | mantissa;
+ // Total right-shift to produce the 10-bit f16 subnormal mantissa:
+ // 13 bits (to drop from 23 to 10) + (1 - exp16) for subnormal scaling.
+ const int32_t shift_total = 13 + (1 - exp16);
+ const uint32_t round_bit = 1u << (shift_total - 1);
+ const uint32_t sticky_mask = round_bit - 1u;
+ const bool sticky = (full_mantissa & sticky_mask) != 0u;
+ const uint32_t mantissa16 = full_mantissa >> shift_total;
+ // Round-to-nearest, ties-to-even
+ const uint32_t result = ((full_mantissa & round_bit) != 0u &&
+ (sticky || (mantissa16 & 1u) != 0u))
+ ? mantissa16 + 1u
+ : mantissa16;
+ // Note: if rounding carries out of the subnormal mantissa the natural
+ // carry produces the bit pattern for the f16 minimum normal (0x0400).
+ return from_bits(static_cast<uint16_t>((sign >> 16) | result));
+ }
+
+ // Normal case: round 23-bit mantissa to 10 bits, discarding 13 bits.
+ // Bit 12 is the round bit (half ULP); bits 11:0 form the sticky.
+ const uint32_t round_bit = 1u << 12;
+ const uint32_t sticky_mask = round_bit - 1u;
+ const bool sticky = (mantissa & sticky_mask) != 0u;
+ const uint32_t mantissa10 = mantissa >> 13;
+
+ // Round-to-nearest, ties-to-even
+ const uint32_t rounded =
+ ((mantissa & round_bit) != 0u && (sticky || (mantissa10 & 1u) != 0u))
+ ? mantissa10 + 1u
+ : mantissa10;
+
+ // Rounding may carry the mantissa past 10 bits: propagate into exponent.
+ if (rounded > 0x03FFu) {
+ const int32_t new_exp = exp16 + 1;
+ if (new_exp >= 31) {
+ return from_bits(static_cast<uint16_t>((sign >> 16) | 0x7C00u));
+ }
+ return from_bits(static_cast<uint16_t>(
+ (sign >> 16) | (static_cast<uint32_t>(new_exp) << 10)));
+ }
+
+ return from_bits(static_cast<uint16_t>(
+ (sign >> 16) | (static_cast<uint32_t>(exp16) << 10) | rounded));
+}
+
+// Convert float16 to float32 (exact: every f16 value is representable in f32).
+float float16_t::to_float() const noexcept {
+ const uint32_t sign = static_cast<uint32_t>(bits & 0x8000u) << 16;
+ const uint32_t exp = (bits >> 10) & 0x1Fu;
+ const uint32_t mantissa = bits & 0x03FFu;
+
+ // NaN or Infinity (f16 exp = 0x1F)
+ if (exp == 0x1Fu) {
+ if (mantissa == 0u) {
+ // ±Inf
+ const uint32_t f32_bits = sign | 0x7F800000u;
+ float result;
+ std::memcpy(&result, &f32_bits, sizeof(result));
+ return result;
+ }
+ // NaN: expand 10-bit f16 fraction to 23-bit f32 fraction by shifting
+ // left 13 bits. The f16 quiet bit (bit 9) maps to the f32 quiet bit
+ // (bit 22), preserving quiet/signaling status and the payload.
+ const uint32_t nan_payload = mantissa << 13;
+ const uint32_t f32_bits = sign | 0x7F800000u | nan_payload;
+ float result;
+ std::memcpy(&result, &f32_bits, sizeof(result));
+ return result;
+ }
+
+ // ±0
+ if (exp == 0u && mantissa == 0u) {
+ float result;
+ std::memcpy(&result, &sign, sizeof(result));
+ return result;
+ }
+
+ // Subnormal f16: normalize into a f32 normal.
+ // f16 subnormals have true exponent -14 and no implicit leading 1.
+ if (exp == 0u) {
+ uint32_t m = mantissa;
+ int32_t e = -14;
+ // Shift left until the implicit leading 1 reaches bit 10.
+ while ((m & 0x0400u) == 0u) {
+ m <<= 1;
+ e -= 1;
+ }
+ m &= 0x03FFu; // strip implicit leading 1
+ const uint32_t exp32 = static_cast<uint32_t>(e + 127);
+ const uint32_t mantissa32 = m << 13;
+ const uint32_t f32_bits = sign | (exp32 << 23) | mantissa32;
+ float result;
+ std::memcpy(&result, &f32_bits, sizeof(result));
+ return result;
+ }
+
+ // Normal f16: remap exponent bias (15 → 127) and zero-extend mantissa
+ // (10 → 23 bits).
+ const uint32_t exp32 = exp - 15u + 127u;
+ const uint32_t mantissa32 = mantissa << 13;
+ const uint32_t f32_bits = sign | (exp32 << 23) | mantissa32;
+ float result;
+ std::memcpy(&result, &f32_bits, sizeof(result));
+ return result;
+}
+
+} // namespace fory
diff --git a/cpp/fory/util/float16.h b/cpp/fory/util/float16.h
new file mode 100644
index 000000000..c5811258f
--- /dev/null
+++ b/cpp/fory/util/float16.h
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+#include <string>
+#include <type_traits>
+namespace fory {
+
+// A 16-bit floating point representation with 1 sign bit, 5 exponent bits, and
+// 10 mantissa bits.
+struct float16_t {
+ uint16_t bits;
+
+ // Internal bit access
+ [[nodiscard]] uint16_t to_bits() const noexcept { return bits; }
+ [[nodiscard]] static float16_t from_bits(const uint16_t bits) noexcept {
+ float16_t half{};
+ half.bits = bits;
+ return half;
+ }
+
+ // Conversions
+ [[nodiscard]] float to_float() const noexcept;
+ [[nodiscard]] static float16_t from_float(float f) noexcept;
+
+ // ---- Classification (IEEE 754-consistent) ----
+
+ // True if the value is a NaN (quiet or signaling).
+ [[nodiscard]] static bool is_nan(float16_t h) noexcept {
+ return (h.bits & 0x7C00u) == 0x7C00u && (h.bits & 0x03FFu) != 0u;
+ }
+
+ // True if the value is positive or negative infinity.
+ [[nodiscard]] static bool is_inf(float16_t h) noexcept {
+ return (h.bits & 0x7FFFu) == 0x7C00u;
+ }
+
+ // True if the value is infinity of the requested sign.
+ // sign > 0 → +Inf only
+ // sign < 0 → -Inf only
+ // sign == 0 → either +Inf or -Inf
+ [[nodiscard]] static bool is_inf(float16_t h, int sign) noexcept {
+ if (sign == 0)
+ return is_inf(h);
+ return sign > 0 ? h.bits == 0x7C00u : h.bits == 0xFC00u;
+ }
+
+ // True if the value is +0 or -0.
+ [[nodiscard]] static bool is_zero(float16_t h) noexcept {
+ return (h.bits & 0x7FFFu) == 0u;
+ }
+
+ // True if the sign bit is set (value is negative or negative zero/NaN).
+ [[nodiscard]] static bool signbit(float16_t h) noexcept {
+ return (h.bits & 0x8000u) != 0u;
+ }
+
+ // True if the value is a subnormal (denormal): exp == 0, mantissa != 0.
+ [[nodiscard]] static bool is_subnormal(float16_t h) noexcept {
+ return (h.bits & 0x7C00u) == 0u && (h.bits & 0x03FFu) != 0u;
+ }
+
+ // True if the value is a normal number (not zero, subnormal, Inf, or NaN).
+ [[nodiscard]] static bool is_normal(float16_t h) noexcept {
+ const uint16_t exp = h.bits & 0x7C00u;
+ return exp != 0u && exp != 0x7C00u;
+ }
+
+ // True if the value is finite (not Inf and not NaN).
+ [[nodiscard]] static bool is_finite(float16_t h) noexcept {
+ return (h.bits & 0x7C00u) != 0x7C00u;
+ }
+
+ // ---- Comparisons (IEEE 754-consistent) ----
+ // NaN != NaN; +0 == -0.
+ [[nodiscard]] static bool equal(const float16_t a,
+ const float16_t b) noexcept {
+ if (is_nan(a) || is_nan(b))
+ return false;
+ if (is_zero(a) && is_zero(b))
+ return true;
+ return a.bits == b.bits;
+ }
+ // Returns false if either operand is NaN (unordered).
+ [[nodiscard]] static bool less(const float16_t a,
+ const float16_t b) noexcept {
+ if (is_nan(a) || is_nan(b))
+ return false;
+ if (is_zero(a) && is_zero(b))
+ return false;
+ // Exploit sign-magnitude encoding: for non-NaN values, signed comparison
+ // of the bit patterns works after handling the negative number ordering.
+ const bool neg_a = signbit(a);
+ const bool neg_b = signbit(b);
+ if (neg_a != neg_b)
+ return neg_a; // negative < positive
+ return neg_a ? a.bits > b.bits : a.bits < b.bits;
+ }
+ [[nodiscard]] static bool less_eq(float16_t a, float16_t b) noexcept {
+ return equal(a, b) || less(a, b);
+ }
+ [[nodiscard]] static bool greater(float16_t a, float16_t b) noexcept {
+ return less(b, a);
+ }
+ [[nodiscard]] static bool greater_eq(float16_t a, float16_t b) noexcept {
+ return equal(a, b) || greater(a, b);
+ }
+ // Returns -1, 0, or 1. NaN operands produce 0 (unordered).
+ [[nodiscard]] static int compare(float16_t a, float16_t b) noexcept {
+ if (is_nan(a) || is_nan(b))
+ return 0;
+ if (equal(a, b))
+ return 0;
+ return less(a, b) ? -1 : 1;
+ }
+
+ // ---- String representation ----
+
+ [[nodiscard]] static std::string to_string(float16_t h) {
+ return std::to_string(h.to_float());
+ }
+
+ // ---- Arithmetic (computed in float32, rounded back to float16) ----
+
+ [[nodiscard]] static float16_t add(float16_t a, float16_t b) noexcept {
+ return from_float(a.to_float() + b.to_float());
+ }
+ [[nodiscard]] static float16_t sub(float16_t a, float16_t b) noexcept {
+ return from_float(a.to_float() - b.to_float());
+ }
+ [[nodiscard]] static float16_t mul(float16_t a, float16_t b) noexcept {
+ return from_float(a.to_float() * b.to_float());
+ }
+ [[nodiscard]] static float16_t div(float16_t a, float16_t b) noexcept {
+ return from_float(a.to_float() / b.to_float());
+ }
+
+ // Negate: flip sign bit directly (exact, no rounding).
+ [[nodiscard]] static float16_t neg(float16_t a) noexcept {
+ return from_bits(static_cast<uint16_t>(a.bits ^ 0x8000u));
+ }
+
+ // Absolute value: clear sign bit directly (exact, no rounding).
+ [[nodiscard]] static float16_t abs(float16_t a) noexcept {
+ return from_bits(static_cast<uint16_t>(a.bits & 0x7FFFu));
+ }
+
+ // ---- Optional math (computed in float32, rounded back) ----
+
+ [[nodiscard]] static float16_t sqrt(float16_t a) noexcept {
+ return from_float(std::sqrt(a.to_float()));
+ }
+ // fmin/fmax propagate NaN the same way as IEEE minNum/maxNum.
+ [[nodiscard]] static float16_t min(float16_t a, float16_t b) noexcept {
+ return from_float(std::fmin(a.to_float(), b.to_float()));
+ }
+ [[nodiscard]] static float16_t max(float16_t a, float16_t b) noexcept {
+ return from_float(std::fmax(a.to_float(), b.to_float()));
+ }
+ // copysign: take magnitude from |a|, sign from b — exact, bit operation.
+ [[nodiscard]] static float16_t copysign(float16_t a, float16_t b) noexcept {
+ return from_bits(
+ static_cast<uint16_t>((a.bits & 0x7FFFu) | (b.bits & 0x8000u)));
+ }
+ [[nodiscard]] static float16_t floor(float16_t a) noexcept {
+ return from_float(std::floor(a.to_float()));
+ }
+ [[nodiscard]] static float16_t ceil(float16_t a) noexcept {
+ return from_float(std::ceil(a.to_float()));
+ }
+ [[nodiscard]] static float16_t trunc(float16_t a) noexcept {
+ return from_float(std::trunc(a.to_float()));
+ }
+ // round: round half away from zero (matches std::round semantics).
+ [[nodiscard]] static float16_t round(float16_t a) noexcept {
+ return from_float(std::round(a.to_float()));
+ }
+ // round_to_even: round half to even (banker's rounding, matches
+ // std::nearbyint with default IEEE rounding mode).
+ [[nodiscard]] static float16_t round_to_even(float16_t a) noexcept {
+ return from_float(std::nearbyint(a.to_float()));
+ }
+
+ // ---- Compound assignment operators ----
+
+ float16_t &operator+=(float16_t rhs) noexcept {
+ *this = add(*this, rhs);
+ return *this;
+ }
+ float16_t &operator-=(float16_t rhs) noexcept {
+ *this = sub(*this, rhs);
+ return *this;
+ }
+ float16_t &operator*=(float16_t rhs) noexcept {
+ *this = mul(*this, rhs);
+ return *this;
+ }
+ float16_t &operator/=(float16_t rhs) noexcept {
+ *this = div(*this, rhs);
+ return *this;
+ }
+};
+
+static_assert(sizeof(float16_t) == 2);
+static_assert(std::is_trivial_v<float16_t>);
+static_assert(std::is_standard_layout_v<float16_t>);
+
+// ---- Free-function operator overloads ----
+
+[[nodiscard]] inline float16_t operator+(float16_t a, float16_t b) noexcept {
+ return float16_t::add(a, b);
+}
+[[nodiscard]] inline float16_t operator-(float16_t a, float16_t b) noexcept {
+ return float16_t::sub(a, b);
+}
+[[nodiscard]] inline float16_t operator*(float16_t a, float16_t b) noexcept {
+ return float16_t::mul(a, b);
+}
+[[nodiscard]] inline float16_t operator/(float16_t a, float16_t b) noexcept {
+ return float16_t::div(a, b);
+}
+[[nodiscard]] inline float16_t operator-(float16_t a) noexcept {
+ return float16_t::neg(a);
+}
+[[nodiscard]] inline float16_t operator+(float16_t a) noexcept { return a; }
+
+[[nodiscard]] inline bool operator==(float16_t a, float16_t b) noexcept {
+ return float16_t::equal(a, b);
+}
+[[nodiscard]] inline bool operator!=(float16_t a, float16_t b) noexcept {
+ return !float16_t::equal(a, b);
+}
+[[nodiscard]] inline bool operator<(float16_t a, float16_t b) noexcept {
+ return float16_t::less(a, b);
+}
+[[nodiscard]] inline bool operator<=(float16_t a, float16_t b) noexcept {
+ return float16_t::less_eq(a, b);
+}
+[[nodiscard]] inline bool operator>(float16_t a, float16_t b) noexcept {
+ return float16_t::greater(a, b);
+}
+[[nodiscard]] inline bool operator>=(float16_t a, float16_t b) noexcept {
+ return float16_t::greater_eq(a, b);
+}
+
+} // namespace fory
+
+namespace std {
+template <> struct hash<fory::float16_t> {
+ size_t operator()(fory::float16_t h) const noexcept {
+ // Canonicalize ±0: float16_t::equal treats +0 == -0, so they must hash
+ // identically.
+ uint16_t bits = fory::float16_t::is_zero(h) ? 0u : h.to_bits();
+ return std::hash<uint16_t>{}(bits);
+ }
+};
+} // namespace std
diff --git a/cpp/fory/util/float16_test.cc b/cpp/fory/util/float16_test.cc
new file mode 100644
index 000000000..f8b665152
--- /dev/null
+++ b/cpp/fory/util/float16_test.cc
@@ -0,0 +1,1388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <map>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "fory/serialization/basic_serializer.h"
+#include "fory/serialization/fory.h"
+#include "fory/type/type.h"
+#include "fory/util/buffer.h"
+#include "fory/util/float16.h"
+#include "gtest/gtest.h"
+
+namespace fory {
+namespace {
+
+#define EXPECT_HALF_EQ(actual, expected)
\
+ EXPECT_EQ((actual), (expected))
\
+ << "actual=0x" << std::hex << (actual) << " expected=0x" << (expected)
+
+float bits_to_float(uint32_t bits) {
+ float value = 0;
+ std::memcpy(&value, &bits, sizeof(value));
+ return value;
+}
+
+uint32_t float_to_bits(float value) {
+ uint32_t bits = 0;
+ std::memcpy(&bits, &value, sizeof(bits));
+ return bits;
+}
+
+double half_bits_to_double(const uint16_t bits) {
+ const auto sign = static_cast<uint16_t>(bits >> 15);
+ const auto exponent = static_cast<uint16_t>((bits >> 10) & 0x1F);
+ const auto fraction = static_cast<uint16_t>(bits & 0x03FF);
+
+ const double sign_scale = sign == 0 ? 1.0 : -1.0;
+ if (exponent == 0x1F) {
+ if (fraction == 0) {
+ return sign == 0 ? std::numeric_limits<double>::infinity()
+ : -std::numeric_limits<double>::infinity();
+ }
+ return std::numeric_limits<double>::quiet_NaN();
+ }
+ if (exponent == 0) {
+ if (fraction == 0) {
+ return sign == 0 ? 0.0 : -0.0;
+ }
+ return sign_scale * std::ldexp(static_cast<double>(fraction), -24);
+ }
+ return sign_scale * std::ldexp(1.0 + static_cast<double>(fraction) / 1024.0,
+ static_cast<int>(exponent) - 15);
+}
+
+uint16_t convert_bits(float value) {
+ return float16_t::from_float(value).to_bits();
+}
+
+void ExpectSignSymmetry(float value) {
+ if (std::isnan(value)) {
+ return;
+ }
+ const uint16_t positive = convert_bits(std::fabs(value));
+ const uint16_t negative = convert_bits(-std::fabs(value));
+ EXPECT_HALF_EQ(static_cast<uint16_t>(positive ^ negative), 0x8000);
+}
+
+TEST(Float16FromFloatTest, HandlesSignedZerosAndInfinities) {
+ const uint16_t positive_zero = convert_bits(0.0f);
+ const uint16_t negative_zero = convert_bits(-0.0f);
+ EXPECT_HALF_EQ(positive_zero, 0x0000);
+ EXPECT_HALF_EQ(negative_zero, 0x8000);
+ EXPECT_HALF_EQ(static_cast<uint16_t>(positive_zero & 0x7FFF), 0x0000);
+ EXPECT_HALF_EQ(static_cast<uint16_t>(negative_zero & 0x7FFF), 0x0000);
+
+ EXPECT_HALF_EQ(convert_bits(std::numeric_limits<float>::infinity()), 0x7C00);
+ EXPECT_HALF_EQ(convert_bits(-std::numeric_limits<float>::infinity()),
0xFC00);
+}
+
+TEST(Float16FromFloatTest, PreservesNaNPayloadAndQuietsSignalingNaNs) {
+ struct Case {
+ uint32_t input_bits;
+ uint16_t expected_half_bits;
+ };
+ const Case cases[] = {
+ {0x7FC00000u, 0x7E00u}, // qNaN
+ {0x7F800001u, 0x7E00u}, // smallest sNaN, quieted
+ {0x7FC02000u, 0x7E01u}, // qNaN with small payload should be preserved
+ {0x7FDFF000u, 0x7EFFu}, // qNaN with large payload should be truncated
+ {0xFFC02000u,
+ 0xFE01u}, // negative qNaN with small payload should be preserved
+ {0x7FA00000u, 0x7F00u}, // sNaN with payload, quieted
+ {0xFFA00000u, 0xFF00u}, // negative sNaN with payload, quieted
+ };
+ for (const auto &[input_bits, expected_half_bits] : cases) {
+ const uint16_t got = convert_bits(bits_to_float(input_bits));
+ EXPECT_HALF_EQ(got, expected_half_bits);
+ EXPECT_HALF_EQ(static_cast<uint16_t>(got & 0x7C00),
+ 0x7C00); // Exponent bits should be all 1s for NaNs
+ EXPECT_NE(got & 0x03FF, 0); // Fraction bits should not be all 0s for NaNs
+ EXPECT_NE(
+ got & 0x0200,
+ 0); // The quiet bit (bit 9 of the fraction) should be set for NaNs
+ }
+
+ const uint16_t a = convert_bits(bits_to_float(0x7FC02000u));
+ const uint16_t b = convert_bits(bits_to_float(0x7FC04000u));
+ EXPECT_NE(a, b);
+}
+
+// min normal 2^-14, min subnormal 2^-24
+TEST(Float16FromFloatTest, MinNormalMinSubnormalMaxSubnormal) {
+ // Min normal: 2^-14
+ EXPECT_HALF_EQ(convert_bits(std::ldexp(1.0f, -14)), 0x0400);
+ EXPECT_HALF_EQ(convert_bits(-std::ldexp(1.0f, -14)), 0x8400);
+ // Min positive subnormal: 2^-24
+ EXPECT_HALF_EQ(convert_bits(std::ldexp(1.0f, -24)), 0x0001);
+ EXPECT_HALF_EQ(convert_bits(-std::ldexp(1.0f, -24)), 0x8001);
+ // Max subnormal: 1023 * 2^-24
+ EXPECT_HALF_EQ(convert_bits(std::ldexp(1023.0f, -24)), 0x03FF);
+ EXPECT_HALF_EQ(convert_bits(-std::ldexp(1023.0f, -24)), 0x83FF);
+ // Sign symmetry
+ ExpectSignSymmetry(std::ldexp(1.0f, -14));
+ ExpectSignSymmetry(std::ldexp(1.0f, -24));
+}
+
+TEST(Float16FromFloatTest, PreservesEveryFiniteExactlyRepresentableHalfValue) {
+ for (uint32_t bits = 0; bits <= 0xFFFF; ++bits) {
+ const auto half = static_cast<uint16_t>(bits);
+ const auto exponent = static_cast<uint16_t>((half >> 10) & 0x1F);
+ if (const auto fraction = static_cast<uint16_t>(half & 0x03FF);
+ exponent == 0x1F && fraction != 0) {
+ continue;
+ }
+ const auto value = static_cast<float>(half_bits_to_double(half));
+ EXPECT_HALF_EQ(convert_bits(value), half);
+ if (value != 0.0f) {
+ ExpectSignSymmetry(value);
+ }
+ }
+}
+
+TEST(Float16FromFloatTest, PreservesAllSubnormalHalfValues) {
+ for (uint16_t fraction = 1; fraction <= 0x03FF; ++fraction) {
+ const uint16_t positive = fraction;
+ const auto negative = static_cast<uint16_t>(fraction | 0x8000);
+ EXPECT_HALF_EQ(
+ convert_bits(static_cast<float>(half_bits_to_double(positive))),
+ positive);
+ EXPECT_HALF_EQ(
+ convert_bits(static_cast<float>(half_bits_to_double(negative))),
+ negative);
+ }
+}
+
+TEST(Float16FromFloatTest, SubnormalGradualUnderflowAndTieToZero) {
+ const float tie_to_zero =
+ std::ldexp(1.0f, -25); // halfway between 0 and the smallest subnormal
+ EXPECT_HALF_EQ(convert_bits(tie_to_zero), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(-tie_to_zero), 0x8000);
+
+ EXPECT_HALF_EQ(convert_bits(std::nextafter(tie_to_zero, 1.0f)), 0x0001);
+ EXPECT_HALF_EQ(convert_bits(-std::nextafter(tie_to_zero, 1.0f)), 0x8001);
+
+ for (uint16_t lower = 0x0001; lower < 0x03FF; ++lower) {
+ const auto upper = static_cast<uint16_t>(lower + 1);
+ const auto midpoint = static_cast<float>(
+ (half_bits_to_double(lower) + half_bits_to_double(upper)) * 0.5);
+ const float just_below = std::nextafter(midpoint, 0.0f);
+ const float just_above =
+ std::nextafter(midpoint, std::numeric_limits<float>::infinity());
+ EXPECT_HALF_EQ(convert_bits(just_below), lower);
+ EXPECT_HALF_EQ(convert_bits(just_above), upper);
+ EXPECT_HALF_EQ(convert_bits(-just_below),
+ static_cast<uint16_t>(lower | 0x8000));
+ EXPECT_HALF_EQ(convert_bits(-just_above),
+ static_cast<uint16_t>(upper | 0x8000));
+ // Exact midpoint: ties to even (lowest bit of the even one is 0)
+ const double exact_mid_d =
+ (half_bits_to_double(lower) + half_bits_to_double(upper)) * 0.5;
+ const auto exact_mid_f = static_cast<float>(exact_mid_d);
+ if (static_cast<double>(exact_mid_f) == exact_mid_d) {
+ const uint16_t even = (lower & 1u) == 0 ? lower : upper;
+ EXPECT_HALF_EQ(convert_bits(exact_mid_f), even);
+ EXPECT_HALF_EQ(convert_bits(-exact_mid_f),
+ static_cast<uint16_t>(even | 0x8000));
+ }
+ }
+}
+
+TEST(Float16FromFloatTest, RoundsToNearestTiesToEvenAcrossMidpoints) {
+
+ const double zero_subnormal_midpoint =
+ half_bits_to_double(0x0001) *
+ 0.5; // halfway between 0 and the smallest subnormal
+ const auto zero_subnormal_midpoint_float =
+ static_cast<float>(zero_subnormal_midpoint);
+ ASSERT_EQ(static_cast<double>(zero_subnormal_midpoint_float),
+ zero_subnormal_midpoint);
+ EXPECT_HALF_EQ(convert_bits(zero_subnormal_midpoint_float), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(-zero_subnormal_midpoint_float), 0x8000);
+
+ for (uint16_t lower = 0x0001; lower < 0x7BFF; ++lower) {
+ const auto upper = static_cast<uint16_t>(lower + 1);
+ if ((upper & 0x7C00) == 0x7C00) {
+ continue; // skip inf/NaN boundaries
+ }
+
+ const double midpoint =
+ (half_bits_to_double(lower) + half_bits_to_double(upper)) * 0.5;
+ const auto midpoint_as_float = static_cast<float>(midpoint);
+ if (static_cast<double>(midpoint_as_float) != midpoint) {
+ continue; // skip midpoints that aren't exactly representable in float32,
+ // since they won't round to either half value
+ }
+
+ const uint16_t expected =
+ (lower & 1u) == 0 ? lower : upper; // which one is even?
+ EXPECT_HALF_EQ(convert_bits(midpoint_as_float), expected);
+ EXPECT_HALF_EQ(convert_bits(-midpoint_as_float),
+ static_cast<uint16_t>(expected | 0x8000));
+ }
+}
+
+TEST(Float16FromFloatTest, OverflowBoundariesAndLargeFloat32Inputs) {
+ EXPECT_HALF_EQ(convert_bits(65504.0f), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(-65504.0f), 0xFBFF);
+
+ EXPECT_HALF_EQ(convert_bits(65519.0f), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(-65519.0f), 0xFBFF);
+
+ EXPECT_HALF_EQ(convert_bits(65520.0f), 0x7C00);
+ EXPECT_HALF_EQ(convert_bits(-65520.0f), 0xFC00);
+
+ EXPECT_HALF_EQ(convert_bits(std::nextafter(65520.0f, 0.0f)), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(std::nextafter(
+ 65520.0f, std::numeric_limits<float>::infinity())),
+ 0x7C00);
+
+ EXPECT_HALF_EQ(convert_bits(65510.0f), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(65512.0f), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(65518.0f), 0x7BFF);
+ EXPECT_HALF_EQ(convert_bits(65530.0f), 0x7C00);
+
+ EXPECT_HALF_EQ(convert_bits(std::numeric_limits<float>::max()), 0x7C00);
+ EXPECT_HALF_EQ(convert_bits(-std::numeric_limits<float>::max()), 0xFC00);
+}
+
+TEST(Float16FromFloatTest, Float32SubnormalsAndMinNormalUnderflowToZero) {
+ EXPECT_HALF_EQ(convert_bits(std::numeric_limits<float>::denorm_min()),
+ 0x0000);
+ EXPECT_HALF_EQ(convert_bits(-std::numeric_limits<float>::denorm_min()),
+ 0x8000);
+ EXPECT_HALF_EQ(convert_bits(bits_to_float(0x00000100u)), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(bits_to_float(0x00400000u)), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(bits_to_float(0x007FFFFFu)), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(bits_to_float(0x807FFFFFu)), 0x8000);
+ EXPECT_HALF_EQ(convert_bits(std::numeric_limits<float>::min()), 0x0000);
+ EXPECT_HALF_EQ(convert_bits(-std::numeric_limits<float>::min()), 0x8000);
+}
+
+TEST(Float16FromFloatTest, IntegerAndUlpRegressionCases) {
+ for (int value = 1; value <= 2048; ++value) {
+ const uint16_t half = convert_bits(static_cast<float>(value));
+ EXPECT_EQ(half_bits_to_double(half), static_cast<double>(value));
+ ExpectSignSymmetry(static_cast<float>(value));
+ }
+ EXPECT_HALF_EQ(convert_bits(2049.0f), 0x6800);
+
+ constexpr float one = 1.0f;
+ const float one_half_ulp = std::ldexp(1.0f, -11);
+ const float one_full_ulp = std::ldexp(1.0f, -10);
+ EXPECT_HALF_EQ(convert_bits(one + one_full_ulp), 0x3C01);
+ // 1 + half-ULP (midpoint between 0x3C00 and 0x3C01): ties to even → 0x3C00
+ EXPECT_HALF_EQ(convert_bits(one + one_half_ulp), 0x3C00);
+ // 1 - 2^-11 = 2047/2048 is EXACTLY 0x3BFF in float16 — no rounding occurs
+ EXPECT_HALF_EQ(convert_bits(one - one_half_ulp), 0x3BFF);
+ // The true midpoint between 0x3BFF and 0x3C00 is 1 - 2^-12; ties to even →
+ // 0x3C00
+ EXPECT_HALF_EQ(convert_bits(one - std::ldexp(1.0f, -12)), 0x3C00);
+ EXPECT_HALF_EQ(
+ convert_bits(std::nextafter(one + one_half_ulp,
+ std::numeric_limits<float>::infinity())),
+ 0x3C01);
+}
+
+TEST(Float16FromFloatTest, SignSymmetryForNonNaNBitPatterns) {
+ for (uint32_t bits = 0; bits < 0xFFFFFFFFu; bits += 104729u) {
+ const float value = bits_to_float(bits);
+ if (std::isnan(value)) {
+ continue;
+ }
+ ExpectSignSymmetry(value);
+ }
+}
+
+TEST(Float16Test, FromBitsRoundTrip) {
+ for (uint32_t bits = 0; bits <= 0xFFFF; ++bits) {
+ const auto half_bits = static_cast<uint16_t>(bits);
+ EXPECT_HALF_EQ(float16_t::from_bits(half_bits).to_bits(), half_bits);
+ }
+}
+
+// ============================================================
+// to_float() tests — testing both directions
+// ============================================================
+
+TEST(Float16ToFloatTest, SignedZerosAndInfinities) {
+ // +0: value is 0.0 and sign bit is clear
+ const float pos_zero = float16_t::from_bits(0x0000).to_float();
+ EXPECT_EQ(pos_zero, 0.0f);
+ EXPECT_FALSE(std::signbit(pos_zero));
+
+ // -0: value is 0.0 but sign bit is set
+ const float neg_zero = float16_t::from_bits(0x8000).to_float();
+ EXPECT_EQ(neg_zero, -0.0f);
+ EXPECT_TRUE(std::signbit(neg_zero));
+
+ // +Inf
+ const float pos_inf = float16_t::from_bits(0x7C00).to_float();
+ EXPECT_TRUE(std::isinf(pos_inf));
+ EXPECT_GT(pos_inf, 0.0f);
+
+ // -Inf
+ const float neg_inf = float16_t::from_bits(0xFC00).to_float();
+ EXPECT_TRUE(std::isinf(neg_inf));
+ EXPECT_LT(neg_inf, 0.0f);
+}
+
+TEST(Float16ToFloatTest, NaNPayloadAndSignPreservation) {
+ // Canonical positive qNaN (0x7E00): maps to f32 canonical qNaN 0x7FC00000
+ // f16 mantissa=0x200; 0x200<<13 = 0x400000 (f32 quiet bit), so
f32=0x7FC00000
+ const float qnan_pos = float16_t::from_bits(0x7E00).to_float();
+ EXPECT_TRUE(std::isnan(qnan_pos));
+ EXPECT_FALSE(std::signbit(qnan_pos));
+ EXPECT_EQ(float_to_bits(qnan_pos), 0x7FC00000u);
+
+ // Canonical negative qNaN (0xFE00): f32 = 0xFFC00000
+ const float qnan_neg = float16_t::from_bits(0xFE00).to_float();
+ EXPECT_TRUE(std::isnan(qnan_neg));
+ EXPECT_TRUE(std::signbit(qnan_neg));
+ EXPECT_EQ(float_to_bits(qnan_neg), 0xFFC00000u);
+
+ // qNaN with payload bit 0 set (0x7E01): f16 mantissa=0x201;
+ // 0x201<<13=0x402000 f32 = 0x7F800000 | 0x402000 = 0x7FC02000
+ const float qnan_payload = float16_t::from_bits(0x7E01).to_float();
+ EXPECT_TRUE(std::isnan(qnan_payload));
+ EXPECT_FALSE(std::signbit(qnan_payload));
+ EXPECT_EQ(float_to_bits(qnan_payload), 0x7FC02000u);
+
+ // qNaN with full payload (0x7EFF): f16 mantissa=0x2FF; 0x2FF<<13=0x5FE000
+ // f32 = 0x7F800000 | 0x5FE000 = 0x7FDFE000
+ const float qnan_full = float16_t::from_bits(0x7EFF).to_float();
+ EXPECT_TRUE(std::isnan(qnan_full));
+ EXPECT_EQ(float_to_bits(qnan_full), 0x7FDFE000u);
+
+ // Negative qNaN with payload (0xFE01): f32 = 0xFFC02000
+ const float qnan_neg_payload = float16_t::from_bits(0xFE01).to_float();
+ EXPECT_TRUE(std::isnan(qnan_neg_payload));
+ EXPECT_TRUE(std::signbit(qnan_neg_payload));
+ EXPECT_EQ(float_to_bits(qnan_neg_payload), 0xFFC02000u);
+
+ // to_float() faithfully preserves all 1024 NaN bit patterns.
+ // The f16 quiet bit (bit 9) maps to the f32 quiet bit (bit 22) via <<13.
+ for (uint16_t frac = 1; frac <= 0x03FF; ++frac) {
+ const auto nan_bits = static_cast<uint16_t>(0x7C00 | frac);
+ const float f = float16_t::from_bits(nan_bits).to_float();
+ EXPECT_TRUE(std::isnan(f)) << "bits=0x" << std::hex << nan_bits;
+ // f16 quiet bit (bit 9) preserved as f32 quiet bit (bit 22)
+ const bool f16_quiet = (frac & 0x0200u) != 0;
+ const bool f32_quiet = (float_to_bits(f) & 0x00400000u) != 0u;
+ EXPECT_EQ(f16_quiet, f32_quiet)
+ << "quiet bit not preserved for bits=0x" << std::hex << nan_bits;
+ // Negative counterpart: sign preserved
+ const auto neg_nan_bits = static_cast<uint16_t>(nan_bits | 0x8000);
+ const float fn = float16_t::from_bits(neg_nan_bits).to_float();
+ EXPECT_TRUE(std::isnan(fn));
+ EXPECT_TRUE(std::signbit(fn))
+ << "sign not preserved for bits=0x" << std::hex << neg_nan_bits;
+ }
+}
+
+// min normal 2^-14 and min subnormal 2^-24
+TEST(Float16ToFloatTest, BoundaryValues) {
+ // Max finite: 65504
+ EXPECT_EQ(float16_t::from_bits(0x7BFF).to_float(), 65504.0f);
+ EXPECT_EQ(float16_t::from_bits(0xFBFF).to_float(), -65504.0f);
+
+ // Min normal: 2^-14
+ EXPECT_EQ(float16_t::from_bits(0x0400).to_float(), std::ldexp(1.0f, -14));
+ EXPECT_EQ(float16_t::from_bits(0x8400).to_float(), -std::ldexp(1.0f, -14));
+
+ // Min positive subnormal: 2^-24
+ EXPECT_EQ(float16_t::from_bits(0x0001).to_float(), std::ldexp(1.0f, -24));
+ EXPECT_EQ(float16_t::from_bits(0x8001).to_float(), -std::ldexp(1.0f, -24));
+
+ // Max subnormal: 1023 * 2^-24
+ EXPECT_EQ(float16_t::from_bits(0x03FF).to_float(), std::ldexp(1023.0f, -24));
+ EXPECT_EQ(float16_t::from_bits(0x83FF).to_float(), -std::ldexp(1023.0f,
-24));
+}
+
+TEST(Float16ToFloatTest, NormalValueSpotChecks) {
+ EXPECT_EQ(float16_t::from_bits(0x3C00).to_float(), 1.0f);
+ EXPECT_EQ(float16_t::from_bits(0xBC00).to_float(), -1.0f);
+ EXPECT_EQ(float16_t::from_bits(0x4000).to_float(), 2.0f);
+ EXPECT_EQ(float16_t::from_bits(0xC000).to_float(), -2.0f);
+ EXPECT_EQ(float16_t::from_bits(0x3800).to_float(), 0.5f);
+ EXPECT_EQ(float16_t::from_bits(0xB800).to_float(), -0.5f);
+ EXPECT_EQ(float16_t::from_bits(0x3E00).to_float(), 1.5f);
+ EXPECT_EQ(float16_t::from_bits(0x4200).to_float(), 3.0f);
+ // Exponent range: 2^15 at exp=30
+ EXPECT_EQ(float16_t::from_bits(0x7800).to_float(), std::ldexp(1.0f, 15));
+ // 2^-14 at exp=1
+ EXPECT_EQ(float16_t::from_bits(0x0400).to_float(), std::ldexp(1.0f, -14));
+}
+
+TEST(Float16ToFloatTest, AllSubnormalsMatchReference) {
+ for (uint16_t frac = 1; frac <= 0x03FF; ++frac) {
+ const auto expected_pos = static_cast<float>(half_bits_to_double(frac));
+ EXPECT_EQ(float16_t::from_bits(frac).to_float(), expected_pos)
+ << "to_float mismatch for subnormal 0x" << std::hex << frac;
+ const auto neg_bits = static_cast<uint16_t>(frac | 0x8000);
+ const auto expected_neg =
static_cast<float>(half_bits_to_double(neg_bits));
+ EXPECT_EQ(float16_t::from_bits(neg_bits).to_float(), expected_neg)
+ << "to_float mismatch for negative subnormal 0x" << std::hex
+ << neg_bits;
+ }
+}
+
+TEST(Float16ToFloatTest, AllNormalsMatchReference) {
+ for (uint16_t exp = 1; exp <= 30; ++exp) {
+ for (uint16_t frac = 0; frac <= 0x03FF; ++frac) {
+ const auto bits = static_cast<uint16_t>((exp << 10) | frac);
+ const auto expected_pos = static_cast<float>(half_bits_to_double(bits));
+ EXPECT_EQ(float16_t::from_bits(bits).to_float(), expected_pos)
+ << "to_float mismatch for normal 0x" << std::hex << bits;
+ const auto neg_bits = static_cast<uint16_t>(bits | 0x8000);
+ const auto expected_neg =
+ static_cast<float>(half_bits_to_double(neg_bits));
+ EXPECT_EQ(float16_t::from_bits(neg_bits).to_float(), expected_neg)
+ << "to_float mismatch for negative normal 0x" << std::hex <<
neg_bits;
+ }
+ }
+}
+
+// verify bit preservation for all non-NaN; for NaN validate chosen policy.
+
+TEST(Float16Test, StressAllBitPatternsViaToFloat) {
+ for (uint32_t bits = 0; bits <= 0xFFFF; ++bits) {
+ const auto half_bits = static_cast<uint16_t>(bits);
+ const float16_t h = float16_t::from_bits(half_bits);
+ const float h_float = h.to_float();
+ const float16_t h2 = float16_t::from_float(h_float);
+
+ const auto exp = static_cast<uint16_t>((half_bits >> 10) & 0x1F);
+ const auto frac = static_cast<uint16_t>(half_bits & 0x03FF);
+
+ if (exp == 0x1F && frac != 0) {
+ // NaN: validate chosen policy (always quieted, sign preserved)
+ EXPECT_TRUE(std::isnan(h_float)) << "to_float of NaN bits=0x" << std::hex
+ << half_bits << " must be NaN";
+ EXPECT_EQ(h2.to_bits() & 0x7C00u, 0x7C00u)
+ << "NaN round-trip exp must be all-ones for bits=0x" << std::hex
+ << half_bits;
+ EXPECT_NE(h2.to_bits() & 0x03FFu, 0u)
+ << "NaN round-trip frac must be non-zero for bits=0x" << std::hex
+ << half_bits;
+ EXPECT_NE(h2.to_bits() & 0x0200u, 0u)
+ << "NaN round-trip quiet bit must be set for bits=0x" << std::hex
+ << half_bits;
+ EXPECT_EQ(h2.to_bits() & 0x8000u,
+ static_cast<uint16_t>(half_bits & 0x8000u))
+ << "NaN sign must be preserved for bits=0x" << std::hex << half_bits;
+ } else {
+ // Non-NaN: must round-trip exactly
+ EXPECT_EQ(h2.to_bits(), half_bits)
+ << "Round-trip failed for bits=0x" << std::hex << half_bits;
+ }
+ }
+}
+
+// ============================================================
+// 3.3 Classification tests
+// ============================================================
+
+// Representative bit patterns used across classification tests.
+// Named constants avoid magic numbers and make intent clear.
+constexpr uint16_t kPosZero = 0x0000; // +0
+constexpr uint16_t kNegZero = 0x8000; // -0
+constexpr uint16_t kPosInf = 0x7C00; // +Inf
+constexpr uint16_t kNegInf = 0xFC00; // -Inf
+constexpr uint16_t kQNaN = 0x7E00; // canonical quiet NaN
+constexpr uint16_t kNegQNaN = 0xFE00; // negative quiet NaN
+constexpr uint16_t kSNaN = 0x7C01; // smallest signaling NaN
+constexpr uint16_t kNegSNaN = 0xFC01; // negative signaling NaN
+constexpr uint16_t kMinSubnorm = 0x0001; // smallest positive subnormal (2^-24)
+constexpr uint16_t kMaxSubnorm = 0x03FF; // largest positive subnormal
+constexpr uint16_t kMinNormal = 0x0400; // smallest positive normal (2^-14)
+constexpr uint16_t kOne = 0x3C00; // 1.0
+constexpr uint16_t kNegOne = 0xBC00; // -1.0
+constexpr uint16_t kMaxFinite = 0x7BFF; // 65504
+
+static float16_t H(uint16_t b) { return float16_t::from_bits(b); }
+
+TEST(Float16ClassificationTest, IsNaN) {
+ EXPECT_TRUE(float16_t::is_nan(H(kQNaN)));
+ EXPECT_TRUE(float16_t::is_nan(H(kNegQNaN)));
+ EXPECT_TRUE(float16_t::is_nan(H(kSNaN)));
+ EXPECT_TRUE(float16_t::is_nan(H(kNegSNaN)));
+ EXPECT_TRUE(float16_t::is_nan(H(0x7FFFu))); // NaN with all fraction bits set
+ EXPECT_TRUE(
+ float16_t::is_nan(H(0xFFFFu))); // negative NaN, all fraction bits set
+ // All 1024 non-zero fraction values with exp=0x1F are NaN
+ for (uint16_t frac = 1; frac <= 0x03FF; ++frac) {
+ EXPECT_TRUE(float16_t::is_nan(H(static_cast<uint16_t>(0x7C00u | frac))));
+ EXPECT_TRUE(float16_t::is_nan(H(static_cast<uint16_t>(0xFC00u | frac))));
+ }
+ EXPECT_FALSE(float16_t::is_nan(H(kPosInf)));
+ EXPECT_FALSE(float16_t::is_nan(H(kNegInf)));
+ EXPECT_FALSE(float16_t::is_nan(H(kPosZero)));
+ EXPECT_FALSE(float16_t::is_nan(H(kNegZero)));
+ EXPECT_FALSE(float16_t::is_nan(H(kOne)));
+ EXPECT_FALSE(float16_t::is_nan(H(kNegOne)));
+ EXPECT_FALSE(float16_t::is_nan(H(kMinSubnorm)));
+ EXPECT_FALSE(float16_t::is_nan(H(kMaxFinite)));
+}
+
+TEST(Float16ClassificationTest, IsInf) {
+ EXPECT_TRUE(float16_t::is_inf(H(kPosInf)));
+ EXPECT_TRUE(float16_t::is_inf(H(kNegInf)));
+ EXPECT_FALSE(float16_t::is_inf(H(kQNaN)));
+ EXPECT_FALSE(float16_t::is_inf(H(kSNaN)));
+ EXPECT_FALSE(float16_t::is_inf(H(kPosZero)));
+ EXPECT_FALSE(float16_t::is_inf(H(kNegZero)));
+ EXPECT_FALSE(float16_t::is_inf(H(kOne)));
+ EXPECT_FALSE(float16_t::is_inf(H(kMaxFinite)));
+ EXPECT_FALSE(float16_t::is_inf(H(kMinSubnorm)));
+}
+
+TEST(Float16ClassificationTest, IsInfWithSign) {
+ // sign == 0: either infinity
+ EXPECT_TRUE(float16_t::is_inf(H(kPosInf), 0));
+ EXPECT_TRUE(float16_t::is_inf(H(kNegInf), 0));
+ EXPECT_FALSE(float16_t::is_inf(H(kQNaN), 0));
+ EXPECT_FALSE(float16_t::is_inf(H(kOne), 0));
+
+ // sign > 0: +Inf only
+ EXPECT_TRUE(float16_t::is_inf(H(kPosInf), +1));
+ EXPECT_FALSE(float16_t::is_inf(H(kNegInf), +1));
+ EXPECT_FALSE(float16_t::is_inf(H(kOne), +1));
+
+ // sign < 0: -Inf only
+ EXPECT_TRUE(float16_t::is_inf(H(kNegInf), -1));
+ EXPECT_FALSE(float16_t::is_inf(H(kPosInf), -1));
+ EXPECT_FALSE(float16_t::is_inf(H(kNegOne), -1));
+}
+
+TEST(Float16ClassificationTest, IsZero) {
+ EXPECT_TRUE(float16_t::is_zero(H(kPosZero)));
+ EXPECT_TRUE(float16_t::is_zero(H(kNegZero)));
+ EXPECT_FALSE(float16_t::is_zero(H(kMinSubnorm)));
+ EXPECT_FALSE(float16_t::is_zero(H(kOne)));
+ EXPECT_FALSE(float16_t::is_zero(H(kPosInf)));
+ EXPECT_FALSE(float16_t::is_zero(H(kQNaN)));
+}
+
+TEST(Float16ClassificationTest, Signbit) {
+ EXPECT_FALSE(float16_t::signbit(H(kPosZero)));
+ EXPECT_TRUE(float16_t::signbit(H(kNegZero)));
+ EXPECT_FALSE(float16_t::signbit(H(kOne)));
+ EXPECT_TRUE(float16_t::signbit(H(kNegOne)));
+ EXPECT_FALSE(float16_t::signbit(H(kPosInf)));
+ EXPECT_TRUE(float16_t::signbit(H(kNegInf)));
+ EXPECT_FALSE(float16_t::signbit(H(kQNaN)));
+ EXPECT_TRUE(float16_t::signbit(H(kNegQNaN)));
+ EXPECT_FALSE(float16_t::signbit(H(kMinSubnorm)));
+ EXPECT_TRUE(
+ float16_t::signbit(H(static_cast<uint16_t>(kMinSubnorm | 0x8000u))));
+}
+
+TEST(Float16ClassificationTest, IsSubnormal) {
+ EXPECT_TRUE(float16_t::is_subnormal(H(kMinSubnorm)));
+ EXPECT_TRUE(float16_t::is_subnormal(H(kMaxSubnorm)));
+ // All 1023 positive subnormals
+ for (uint16_t frac = 1; frac <= 0x03FFu; ++frac) {
+ EXPECT_TRUE(float16_t::is_subnormal(H(frac)));
+ EXPECT_TRUE(
+ float16_t::is_subnormal(H(static_cast<uint16_t>(frac | 0x8000u))));
+ }
+ EXPECT_FALSE(float16_t::is_subnormal(H(kPosZero)));
+ EXPECT_FALSE(float16_t::is_subnormal(H(kNegZero)));
+ EXPECT_FALSE(float16_t::is_subnormal(H(kMinNormal)));
+ EXPECT_FALSE(float16_t::is_subnormal(H(kOne)));
+ EXPECT_FALSE(float16_t::is_subnormal(H(kPosInf)));
+ EXPECT_FALSE(float16_t::is_subnormal(H(kQNaN)));
+}
+
+TEST(Float16ClassificationTest, IsNormal) {
+ EXPECT_TRUE(float16_t::is_normal(H(kMinNormal)));
+ EXPECT_TRUE(float16_t::is_normal(H(kOne)));
+ EXPECT_TRUE(float16_t::is_normal(H(kNegOne)));
+ EXPECT_TRUE(float16_t::is_normal(H(kMaxFinite)));
+ // All 30 exponent values (1..30), all 1024 mantissa values
+ for (uint16_t exp = 1; exp <= 30; ++exp) {
+ for (uint16_t frac = 0; frac <= 0x03FFu; ++frac) {
+ EXPECT_TRUE(
+ float16_t::is_normal(H(static_cast<uint16_t>((exp << 10) | frac))));
+ }
+ }
+ EXPECT_FALSE(float16_t::is_normal(H(kPosZero)));
+ EXPECT_FALSE(float16_t::is_normal(H(kNegZero)));
+ EXPECT_FALSE(float16_t::is_normal(H(kMinSubnorm)));
+ EXPECT_FALSE(float16_t::is_normal(H(kPosInf)));
+ EXPECT_FALSE(float16_t::is_normal(H(kNegInf)));
+ EXPECT_FALSE(float16_t::is_normal(H(kQNaN)));
+ EXPECT_FALSE(float16_t::is_normal(H(kSNaN)));
+}
+
+TEST(Float16ClassificationTest, IsFinite) {
+ EXPECT_TRUE(float16_t::is_finite(H(kPosZero)));
+ EXPECT_TRUE(float16_t::is_finite(H(kNegZero)));
+ EXPECT_TRUE(float16_t::is_finite(H(kMinSubnorm)));
+ EXPECT_TRUE(float16_t::is_finite(H(kOne)));
+ EXPECT_TRUE(float16_t::is_finite(H(kNegOne)));
+ EXPECT_TRUE(float16_t::is_finite(H(kMaxFinite)));
+ EXPECT_TRUE(float16_t::is_finite(H(kMinNormal)));
+ EXPECT_FALSE(float16_t::is_finite(H(kPosInf)));
+ EXPECT_FALSE(float16_t::is_finite(H(kNegInf)));
+ EXPECT_FALSE(float16_t::is_finite(H(kQNaN)));
+ EXPECT_FALSE(float16_t::is_finite(H(kSNaN)));
+ EXPECT_FALSE(float16_t::is_finite(H(kNegQNaN)));
+}
+
+TEST(Float16ClassificationTest, MutualExclusionAcrossAllBitPatterns) {
+ // Every bit pattern falls into exactly one of: zero, subnormal, normal,
+ // inf, nan. Also validate that is_finite == !inf && !nan.
+ for (uint32_t b = 0; b <= 0xFFFFu; ++b) {
+ const float16_t h = float16_t::from_bits(static_cast<uint16_t>(b));
+ const bool z = float16_t::is_zero(h);
+ const bool sub = float16_t::is_subnormal(h);
+ const bool nor = float16_t::is_normal(h);
+ const bool inf = float16_t::is_inf(h);
+ const bool nan = float16_t::is_nan(h);
+ // Exactly one category is true
+ const int count = static_cast<int>(z) + static_cast<int>(sub) +
+ static_cast<int>(nor) + static_cast<int>(inf) +
+ static_cast<int>(nan);
+ EXPECT_EQ(count, 1) << "bits=0x" << std::hex << b;
+ // is_finite agrees with !inf && !nan
+ EXPECT_EQ(float16_t::is_finite(h), !inf && !nan)
+ << "bits=0x" << std::hex << b;
+ }
+}
+
+// ============================================================
+// to_string tests
+// ============================================================
+
+TEST(Float16ToStringTest, MatchesToFloat) {
+ // to_string must equal std::to_string of the float32 value
+ const uint16_t cases[] = {kPosZero, kNegZero, kOne, kNegOne,
+ kPosInf, kNegInf, kQNaN, kMinSubnorm,
+ kMaxFinite, 0x4000u, 0x3800u};
+ for (uint16_t b : cases) {
+ const float16_t h = H(b);
+ EXPECT_EQ(float16_t::to_string(h), std::to_string(h.to_float()))
+ << "bits=0x" << std::hex << b;
+ }
+}
+
+TEST(Float16ToStringTest, SpecialValues) {
+ // Normal finite values produce digit strings
+ const std::string s_one = float16_t::to_string(H(kOne));
+ EXPECT_FALSE(s_one.empty());
+ EXPECT_NE(s_one.find('1'), std::string::npos);
+
+ // ±Inf: std::to_string of ±infinity is implementation-defined but non-empty
+ EXPECT_FALSE(float16_t::to_string(H(kPosInf)).empty());
+ EXPECT_FALSE(float16_t::to_string(H(kNegInf)).empty());
+
+ // NaN: std::to_string of NaN is non-empty
+ EXPECT_FALSE(float16_t::to_string(H(kQNaN)).empty());
+}
+
+// ============================================================
+// 3.4 Arithmetic tests
+// ============================================================
+
+// Helper: convert two float16 bit patterns through an operation and return
+// bits.
+static uint16_t add_bits(uint16_t a, uint16_t b) {
+ return float16_t::add(H(a), H(b)).to_bits();
+}
+static uint16_t sub_bits(uint16_t a, uint16_t b) {
+ return float16_t::sub(H(a), H(b)).to_bits();
+}
+static uint16_t mul_bits(uint16_t a, uint16_t b) {
+ return float16_t::mul(H(a), H(b)).to_bits();
+}
+static uint16_t div_bits(uint16_t a, uint16_t b) {
+ return float16_t::div(H(a), H(b)).to_bits();
+}
+
+TEST(Float16ArithmeticTest, AddBasic) {
+ // 1.0 + 1.0 = 2.0
+ EXPECT_HALF_EQ(add_bits(kOne, kOne), 0x4000u);
+ // 1.0 + (-1.0) = +0
+ EXPECT_HALF_EQ(add_bits(kOne, kNegOne), 0x0000u);
+ // 1.0 + 0 = 1.0
+ EXPECT_HALF_EQ(add_bits(kOne, kPosZero), kOne);
+ // -0 + -0 = -0
+ EXPECT_HALF_EQ(add_bits(kNegZero, kNegZero), kNegZero);
+ // +Inf + 1.0 = +Inf
+ EXPECT_HALF_EQ(add_bits(kPosInf, kOne), kPosInf);
+ // +Inf + (-Inf) = NaN
+ {
+ const float16_t r = float16_t::add(H(kPosInf), H(kNegInf));
+ EXPECT_TRUE(float16_t::is_nan(r));
+ }
+ // NaN propagates
+ EXPECT_TRUE(float16_t::is_nan(float16_t::add(H(kQNaN), H(kOne))));
+}
+
+TEST(Float16ArithmeticTest, SubBasic) {
+ // 2.0 - 1.0 = 1.0
+ EXPECT_HALF_EQ(sub_bits(0x4000u, kOne), kOne);
+ // 1.0 - 1.0 = +0
+ EXPECT_HALF_EQ(sub_bits(kOne, kOne), kPosZero);
+ // 0 - 1.0 = -1.0
+ EXPECT_HALF_EQ(sub_bits(kPosZero, kOne), kNegOne);
+ // -Inf - (-Inf) = NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::sub(H(kNegInf), H(kNegInf))));
+ // NaN propagates
+ EXPECT_TRUE(float16_t::is_nan(float16_t::sub(H(kQNaN), H(kOne))));
+}
+
+TEST(Float16ArithmeticTest, MulBasic) {
+ // 2.0 * 3.0 = 6.0 (0x4600)
+ EXPECT_HALF_EQ(mul_bits(0x4000u, 0x4200u), 0x4600u);
+ // 1.0 * -1.0 = -1.0
+ EXPECT_HALF_EQ(mul_bits(kOne, kNegOne), kNegOne);
+ // -1.0 * -1.0 = 1.0
+ EXPECT_HALF_EQ(mul_bits(kNegOne, kNegOne), kOne);
+ // 0 * +Inf = NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::mul(H(kPosZero), H(kPosInf))));
+ // +Inf * 2.0 = +Inf
+ EXPECT_HALF_EQ(mul_bits(kPosInf, 0x4000u), kPosInf);
+ // NaN propagates
+ EXPECT_TRUE(float16_t::is_nan(float16_t::mul(H(kQNaN), H(kOne))));
+}
+
+TEST(Float16ArithmeticTest, DivBasic) {
+ // 1.0 / 2.0 = 0.5 (0x3800)
+ EXPECT_HALF_EQ(div_bits(kOne, 0x4000u), 0x3800u);
+ // 6.0 / 3.0 = 2.0
+ EXPECT_HALF_EQ(div_bits(0x4600u, 0x4200u), 0x4000u);
+ // 1.0 / 0 = +Inf
+ EXPECT_HALF_EQ(div_bits(kOne, kPosZero), kPosInf);
+ // -1.0 / 0 = -Inf
+ EXPECT_HALF_EQ(div_bits(kNegOne, kPosZero), kNegInf);
+ // 0 / 0 = NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::div(H(kPosZero), H(kPosZero))));
+ // NaN propagates
+ EXPECT_TRUE(float16_t::is_nan(float16_t::div(H(kQNaN), H(kOne))));
+}
+
+TEST(Float16ArithmeticTest, NegAndAbs) {
+ // neg: flip sign bit
+ EXPECT_HALF_EQ(float16_t::neg(H(kOne)).to_bits(), kNegOne);
+ EXPECT_HALF_EQ(float16_t::neg(H(kNegOne)).to_bits(), kOne);
+ EXPECT_HALF_EQ(float16_t::neg(H(kPosZero)).to_bits(), kNegZero);
+ EXPECT_HALF_EQ(float16_t::neg(H(kNegZero)).to_bits(), kPosZero);
+ EXPECT_HALF_EQ(float16_t::neg(H(kPosInf)).to_bits(), kNegInf);
+ EXPECT_HALF_EQ(float16_t::neg(H(kNegInf)).to_bits(), kPosInf);
+ // neg(NaN): flip sign, keep NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::neg(H(kQNaN))));
+ EXPECT_TRUE(float16_t::signbit(float16_t::neg(H(kQNaN))));
+
+ // abs: clear sign bit
+ EXPECT_HALF_EQ(float16_t::abs(H(kNegOne)).to_bits(), kOne);
+ EXPECT_HALF_EQ(float16_t::abs(H(kOne)).to_bits(), kOne);
+ EXPECT_HALF_EQ(float16_t::abs(H(kNegZero)).to_bits(), kPosZero);
+ EXPECT_HALF_EQ(float16_t::abs(H(kNegInf)).to_bits(), kPosInf);
+ // abs of negative NaN → positive NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::abs(H(kNegQNaN))));
+ EXPECT_FALSE(float16_t::signbit(float16_t::abs(H(kNegQNaN))));
+}
+
+TEST(Float16ArithmeticTest, OperatorOverloads) {
+ const float16_t one = H(kOne);
+ const float16_t two = H(0x4000u);
+
+ // Binary operators
+ EXPECT_HALF_EQ((one + one).to_bits(), 0x4000u);
+ EXPECT_HALF_EQ((two - one).to_bits(), kOne);
+ EXPECT_HALF_EQ((two * two).to_bits(), 0x4400u); // 4.0
+ EXPECT_HALF_EQ((two / two).to_bits(), kOne);
+
+ // Unary minus
+ EXPECT_HALF_EQ((-one).to_bits(), kNegOne);
+ // Unary plus (identity)
+ EXPECT_HALF_EQ((+one).to_bits(), kOne);
+}
+
+TEST(Float16ArithmeticTest, CompoundAssignmentOperators) {
+ float16_t v = H(kOne);
+
+ v += H(kOne);
+ EXPECT_HALF_EQ(v.to_bits(), 0x4000u); // 2.0
+
+ v -= H(kOne);
+ EXPECT_HALF_EQ(v.to_bits(), kOne); // 1.0
+
+ v *= H(0x4000u); // *= 2.0
+ EXPECT_HALF_EQ(v.to_bits(), 0x4000u); // 2.0
+
+ v /= H(0x4000u); // /= 2.0
+ EXPECT_HALF_EQ(v.to_bits(), kOne); // 1.0
+}
+
+TEST(Float16ArithmeticTest, RoundingThroughFloat32) {
+ // 1.0 + epsilon should produce 1.0 if epsilon is below the rounding
+ // threshold, or 1.0 + ULP if large enough. Verify the float32→float16
+ // round-trip path.
+ const float16_t one = H(kOne);
+ // Adding min subnormal (~6e-8) to 1.0: too small to shift 1.0 in f16,
+ // so result should still be 1.0 (ties-to-even with the even side).
+ const float16_t result = one + H(kMinSubnorm);
+ EXPECT_HALF_EQ(result.to_bits(), kOne);
+ // Adding one full f16 ULP (2^-10) to 1.0 must advance to 0x3C01.
+ // Actual 1 ULP for 1.0 is 2^-10. Build it from_float:
+ const float16_t result2 =
+ float16_t::add(H(kOne), float16_t::from_float(std::ldexp(1.0f, -10)));
+ EXPECT_HALF_EQ(result2.to_bits(), 0x3C01u);
+}
+
+// ============================================================
+// 3.5 Optional math tests
+// ============================================================
+
+TEST(Float16MathTest, Sqrt) {
+ // sqrt(4.0) = 2.0
+ EXPECT_HALF_EQ(float16_t::sqrt(H(0x4400u)).to_bits(), 0x4000u);
+ // sqrt(1.0) = 1.0
+ EXPECT_HALF_EQ(float16_t::sqrt(H(kOne)).to_bits(), kOne);
+ // sqrt(0) = 0
+ EXPECT_HALF_EQ(float16_t::sqrt(H(kPosZero)).to_bits(), kPosZero);
+ // sqrt(+Inf) = +Inf
+ EXPECT_HALF_EQ(float16_t::sqrt(H(kPosInf)).to_bits(), kPosInf);
+ // sqrt(negative) = NaN
+ EXPECT_TRUE(float16_t::is_nan(float16_t::sqrt(H(kNegOne))));
+}
+
+TEST(Float16MathTest, MinMax) {
+ // min(1.0, 2.0) = 1.0
+ EXPECT_HALF_EQ(float16_t::min(H(kOne), H(0x4000u)).to_bits(), kOne);
+ // max(1.0, 2.0) = 2.0
+ EXPECT_HALF_EQ(float16_t::max(H(kOne), H(0x4000u)).to_bits(), 0x4000u);
+ // min(-1.0, 1.0) = -1.0
+ EXPECT_HALF_EQ(float16_t::min(H(kNegOne), H(kOne)).to_bits(), kNegOne);
+ // max(-1.0, 1.0) = 1.0
+ EXPECT_HALF_EQ(float16_t::max(H(kNegOne), H(kOne)).to_bits(), kOne);
+ // fmin(x, NaN) = x (NaN is suppressed by fmin)
+ EXPECT_HALF_EQ(float16_t::min(H(kOne), H(kQNaN)).to_bits(), kOne);
+ EXPECT_HALF_EQ(float16_t::max(H(kOne), H(kQNaN)).to_bits(), kOne);
+ // min/max with -0 and +0: sign handling
+ // fmin(-0, +0) is implementation-defined between -0 and +0, but result is
+ // zero
+ EXPECT_TRUE(float16_t::is_zero(float16_t::min(H(kNegZero), H(kPosZero))));
+ EXPECT_TRUE(float16_t::is_zero(float16_t::max(H(kNegZero), H(kPosZero))));
+}
+
+TEST(Float16MathTest, Copysign) {
+ // copysign(1.0, -2.0) = -1.0
+ EXPECT_HALF_EQ(float16_t::copysign(H(kOne), H(kNegOne)).to_bits(), kNegOne);
+ // copysign(-1.0, 1.0) = 1.0
+ EXPECT_HALF_EQ(float16_t::copysign(H(kNegOne), H(kOne)).to_bits(), kOne);
+ // copysign(+Inf, -1.0) = -Inf
+ EXPECT_HALF_EQ(float16_t::copysign(H(kPosInf), H(kNegOne)).to_bits(),
+ kNegInf);
+ // copysign(NaN, -1.0): NaN with sign flipped
+ const float16_t r = float16_t::copysign(H(kQNaN), H(kNegOne));
+ EXPECT_TRUE(float16_t::is_nan(r));
+ EXPECT_TRUE(float16_t::signbit(r));
+}
+
+TEST(Float16MathTest, FloorCeilTruncRound) {
+ // Values: 1.5, -1.5, 1.0, -1.0
+ const float16_t one_point_five = float16_t::from_float(1.5f);
+ const float16_t neg_one_point_five = float16_t::from_float(-1.5f);
+
+ // floor
+ EXPECT_EQ(float16_t::floor(one_point_five).to_float(), 1.0f);
+ EXPECT_EQ(float16_t::floor(neg_one_point_five).to_float(), -2.0f);
+ EXPECT_EQ(float16_t::floor(H(kOne)).to_float(), 1.0f);
+ EXPECT_EQ(float16_t::floor(H(kNegOne)).to_float(), -1.0f);
+
+ // ceil
+ EXPECT_EQ(float16_t::ceil(one_point_five).to_float(), 2.0f);
+ EXPECT_EQ(float16_t::ceil(neg_one_point_five).to_float(), -1.0f);
+ EXPECT_EQ(float16_t::ceil(H(kOne)).to_float(), 1.0f);
+
+ // trunc
+ EXPECT_EQ(float16_t::trunc(one_point_five).to_float(), 1.0f);
+ EXPECT_EQ(float16_t::trunc(neg_one_point_five).to_float(), -1.0f);
+
+ // round (half away from zero)
+ EXPECT_EQ(float16_t::round(one_point_five).to_float(), 2.0f);
+ EXPECT_EQ(float16_t::round(neg_one_point_five).to_float(), -2.0f);
+ EXPECT_EQ(float16_t::round(float16_t::from_float(1.4f)).to_float(), 1.0f);
+ EXPECT_EQ(float16_t::round(float16_t::from_float(-1.4f)).to_float(), -1.0f);
+
+ // round_to_even (half to even / banker's rounding)
+ // 0.5 rounds to 0.0 (even), 1.5 rounds to 2.0 (even)
+ const float16_t half = float16_t::from_float(0.5f);
+ EXPECT_EQ(float16_t::round_to_even(half).to_float(), 0.0f);
+ EXPECT_EQ(float16_t::round_to_even(one_point_five).to_float(), 2.0f);
+ EXPECT_EQ(float16_t::round_to_even(neg_one_point_five).to_float(), -2.0f);
+
+ // Special values: floor/ceil/trunc/round of ±Inf = ±Inf
+ EXPECT_TRUE(std::isinf(float16_t::floor(H(kPosInf)).to_float()));
+ EXPECT_TRUE(std::isinf(float16_t::ceil(H(kNegInf)).to_float()));
+ // NaN propagates
+ EXPECT_TRUE(float16_t::is_nan(float16_t::floor(H(kQNaN))));
+ EXPECT_TRUE(float16_t::is_nan(float16_t::ceil(H(kQNaN))));
+ EXPECT_TRUE(float16_t::is_nan(float16_t::trunc(H(kQNaN))));
+ EXPECT_TRUE(float16_t::is_nan(float16_t::round(H(kQNaN))));
+ EXPECT_TRUE(float16_t::is_nan(float16_t::round_to_even(H(kQNaN))));
+}
+
+// ============================================================
+// §3.8 / §6 Buffer write_f16 / read_f16 tests
+// ============================================================
+
+TEST(Float16BufferTest, WriteReadRoundTrip) {
+ const uint16_t cases[] = {
+ 0x0000, // +0
+ 0x8000, // -0
+ 0x7C00, // +Inf
+ 0xFC00, // -Inf
+ 0x7E00, // canonical qNaN
+ 0x3C00, // 1.0
+ 0xBC00, // -1.0
+ 0x7BFF, // 65504 (max finite)
+ 0x0001, // min subnormal (2^-24)
+ 0x0400, // min normal (2^-14)
+ 0x03FF, // max subnormal
+ 0x4000, // 2.0
+ };
+ for (uint16_t bits : cases) {
+ std::shared_ptr<Buffer> buf;
+ allocate_buffer(16, &buf);
+ buf->write_f16(float16_t::from_bits(bits));
+ buf->reader_index(0);
+ Error err;
+ const float16_t got = buf->read_f16(err);
+ ASSERT_TRUE(err.ok()) << "read_f16 error for bits=0x" << std::hex << bits;
+ EXPECT_EQ(got.to_bits(), bits)
+ << "round-trip failed for bits=0x" << std::hex << bits;
+ }
+}
+
+TEST(Float16BufferTest, WireFormatGoldenLittleEndian) {
+ // IEEE 754 binary16 wire format: little-endian (low byte first).
+ struct Case {
+ uint16_t bits;
+ uint8_t lo;
+ uint8_t hi;
+ };
+ const Case cases[] = {
+ {0x3C00, 0x00, 0x3C}, // 1.0
+ {0x7C00, 0x00, 0x7C}, // +Inf
+ {0x8000, 0x00, 0x80}, // -0
+ {0x0001, 0x01, 0x00}, // min subnormal
+ {0x7BFF, 0xFF, 0x7B}, // 65504 max finite
+ {0xFC00, 0x00, 0xFC}, // -Inf
+ };
+ for (const auto &c : cases) {
+ std::shared_ptr<Buffer> buf;
+ allocate_buffer(4, &buf);
+ buf->write_f16(float16_t::from_bits(c.bits));
+ EXPECT_EQ(buf->get<uint8_t>(0), c.lo)
+ << "lo byte mismatch for bits=0x" << std::hex << c.bits;
+ EXPECT_EQ(buf->get<uint8_t>(1), c.hi)
+ << "hi byte mismatch for bits=0x" << std::hex << c.bits;
+ }
+}
+
+TEST(Float16BufferTest, ReadBoundsErrorOnEmpty) {
+ // size_=0 means no bytes available; reading 2 must fail.
+ std::shared_ptr<Buffer> buf;
+ allocate_buffer(0, &buf);
+ Error error;
+ const float16_t result = buf->read_f16(error);
+ EXPECT_FALSE(error.ok());
+ EXPECT_EQ(result.to_bits(), 0x0000u);
+}
+
+TEST(Float16BufferTest, ReadBoundsErrorOnOneByte) {
+ // Only 1 byte of capacity; reading 2 must fail.
+ std::shared_ptr<Buffer> buf;
+ allocate_buffer(1, &buf);
+ Error error;
+ buf->read_f16(error);
+ EXPECT_FALSE(error.ok());
+}
+
+TEST(Float16BufferTest, MultipleValuesSequential) {
+ const uint16_t vals[] = {0x3C00, 0x4000, 0x7BFF, 0x0001, 0x7E00};
+ std::shared_ptr<Buffer> buf;
+ allocate_buffer(32, &buf);
+ for (uint16_t v : vals) {
+ buf->write_f16(float16_t::from_bits(v));
+ }
+ buf->reader_index(0);
+ for (uint16_t expected : vals) {
+ Error err;
+ const float16_t got = buf->read_f16(err);
+ ASSERT_TRUE(err.ok());
+ EXPECT_EQ(got.to_bits(), expected);
+ }
+}
+
+// ============================================================
+// §3.8 / §6 Serializer<float16_t> type-ID check
+// ============================================================
+
+TEST(Float16SerializerTest, TypeId) {
+ using namespace fory::serialization;
+ EXPECT_EQ(static_cast<uint32_t>(Serializer<float16_t>::type_id),
+ static_cast<uint32_t>(fory::TypeId::FLOAT16));
+ EXPECT_EQ(static_cast<uint32_t>(fory::TypeId::FLOAT16), 17u);
+}
+
+// ============================================================
+// §6 Full Fory struct round-trip tests
+// ============================================================
+
+namespace {
+
+struct Float16Scalar {
+ float16_t value;
+ bool operator==(const Float16Scalar &o) const {
+ return value.to_bits() == o.value.to_bits();
+ }
+ FORY_STRUCT(Float16Scalar, value);
+};
+
+struct Float16Vector {
+ std::vector<float16_t> values;
+ bool operator==(const Float16Vector &o) const {
+ if (values.size() != o.values.size())
+ return false;
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (values[i].to_bits() != o.values[i].to_bits())
+ return false;
+ }
+ return true;
+ }
+ FORY_STRUCT(Float16Vector, values);
+};
+
+struct Float16Map {
+ std::map<std::string, float16_t> named_values;
+ bool operator==(const Float16Map &o) const {
+ if (named_values.size() != o.named_values.size())
+ return false;
+ for (const auto &kv : named_values) {
+ auto it = o.named_values.find(kv.first);
+ if (it == o.named_values.end())
+ return false;
+ if (kv.second.to_bits() != it->second.to_bits())
+ return false;
+ }
+ return true;
+ }
+ FORY_STRUCT(Float16Map, named_values);
+};
+
+struct Float16Optional {
+ std::optional<float16_t> opt_value;
+ bool operator==(const Float16Optional &o) const {
+ if (opt_value.has_value() != o.opt_value.has_value())
+ return false;
+ if (!opt_value.has_value())
+ return true;
+ return opt_value->to_bits() == o.opt_value->to_bits();
+ }
+ FORY_STRUCT(Float16Optional, opt_value);
+};
+
+struct Float16Array {
+ std::array<float16_t, 3> values;
+ bool operator==(const Float16Array &o) const {
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (values[i].to_bits() != o.values[i].to_bits())
+ return false;
+ }
+ return true;
+ }
+ FORY_STRUCT(Float16Array, values);
+};
+
+fory::serialization::Fory make_xlang_fory() {
+ return fory::serialization::Fory::builder()
+ .xlang(true)
+ .track_ref(false)
+ .build();
+}
+
+} // namespace
+
+TEST(Float16SerializerTest, ScalarRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Scalar>(200);
+
+ const uint16_t cases[] = {
+ 0x0000, 0x8000, 0x3C00, 0xBC00, 0x7C00,
+ 0xFC00, 0x7BFF, 0x0001, 0x0400, 0x7E00,
+ };
+ for (uint16_t bits : cases) {
+ Float16Scalar original{float16_t::from_bits(bits)};
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << "serialize failed bits=0x" << std::hex << bits
+ << ": " << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Scalar>(ser.value().data(),
ser.value().size());
+ ASSERT_TRUE(deser.ok()) << "deserialize failed bits=0x" << std::hex << bits
+ << ": " << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original)
+ << "round-trip mismatch bits=0x" << std::hex << bits;
+ }
+}
+
+TEST(Float16SerializerTest, VectorRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Vector>(201);
+
+ Float16Vector original;
+ original.values = {
+ float16_t::from_float(0.0f), float16_t::from_float(1.0f),
+ float16_t::from_float(-1.0f),
+ float16_t::from_bits(0x7C00), // +Inf
+ float16_t::from_bits(0x7BFF), // max finite
+ float16_t::from_bits(0x0001), // min subnormal
+ };
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Vector>(ser.value().data(), ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+
+ // Verify at compile time that vector<float16_t> uses the typed-array path.
+ static_assert(
+ fory::serialization::Serializer<std::vector<float16_t>>::type_id ==
+ fory::TypeId::FLOAT16_ARRAY,
+ "std::vector<float16_t> must use FLOAT16_ARRAY, not LIST");
+}
+
+TEST(Float16SerializerTest, EmptyVectorRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Vector>(201);
+
+ Float16Vector original;
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Vector>(ser.value().data(), ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+}
+
+TEST(Float16SerializerTest, ArrayRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Array>(204);
+
+ Float16Array original;
+ original.values = {float16_t::from_float(1.0f), float16_t::from_float(-0.5f),
+ float16_t::from_bits(0x7C00)}; // +Inf
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Array>(ser.value().data(), ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+
+ // Verify at compile time that array<float16_t, N> uses the typed-array path.
+ static_assert(
+ fory::serialization::Serializer<std::array<float16_t, 3>>::type_id ==
+ fory::TypeId::FLOAT16_ARRAY,
+ "std::array<float16_t, N> must use FLOAT16_ARRAY, not generic LIST");
+}
+
+TEST(Float16SerializerTest, MapRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Map>(202);
+
+ Float16Map original;
+ original.named_values["one"] = float16_t::from_float(1.0f);
+ original.named_values["neg_inf"] = float16_t::from_bits(0xFC00);
+ original.named_values["max"] = float16_t::from_bits(0x7BFF);
+ original.named_values["zero"] = float16_t::from_float(0.0f);
+
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Map>(ser.value().data(), ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+}
+
+TEST(Float16SerializerTest, OptionalPresentRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Optional>(203);
+
+ Float16Optional original;
+ original.opt_value = float16_t::from_float(1.5f);
+
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Optional>(ser.value().data(),
ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+}
+
+TEST(Float16SerializerTest, OptionalAbsentRoundTrip) {
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Optional>(203);
+
+ Float16Optional original; // opt_value is nullopt
+ auto ser = fory.serialize(original);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+ auto deser =
+ fory.deserialize<Float16Optional>(ser.value().data(),
ser.value().size());
+ ASSERT_TRUE(deser.ok()) << deser.error().to_string();
+ EXPECT_EQ(deser.value(), original);
+}
+
+TEST(Float16SerializerTest, WireGoldenOnePointZero) {
+ // 1.0 = 0x3C00; serialized data bytes must contain [0x00, 0x3C] (LE).
+ auto fory = make_xlang_fory();
+ fory.register_struct<Float16Scalar>(200);
+
+ Float16Scalar s{float16_t::from_bits(0x3C00)};
+ auto ser = fory.serialize(s);
+ ASSERT_TRUE(ser.ok()) << ser.error().to_string();
+
+ const std::vector<uint8_t> &bytes = ser.value();
+ bool found = false;
+ for (size_t i = 0; i + 1 < bytes.size(); ++i) {
+ if (bytes[i] == 0x00 && bytes[i + 1] == 0x3C) {
+ found = true;
+ break;
+ }
+ }
+ EXPECT_TRUE(found)
+ << "wire bytes [0x00, 0x3C] for 1.0 not found in serialized output";
+}
+
+// ---- Comparison static methods ----
+
+TEST(Float16CompareTest, EqualBasic) {
+ // Same value
+ EXPECT_TRUE(float16_t::equal(H(kOne), H(kOne)));
+ // +0 == -0
+ EXPECT_TRUE(float16_t::equal(H(kPosZero), H(kNegZero)));
+ // NaN != NaN
+ EXPECT_FALSE(float16_t::equal(H(kQNaN), H(kQNaN)));
+ // NaN != any finite
+ EXPECT_FALSE(float16_t::equal(H(kQNaN), H(kOne)));
+ // 1.0 != -1.0
+ EXPECT_FALSE(float16_t::equal(H(kOne), H(kNegOne)));
+ // +Inf == +Inf
+ EXPECT_TRUE(float16_t::equal(H(kPosInf), H(kPosInf)));
+ // +Inf != -Inf
+ EXPECT_FALSE(float16_t::equal(H(kPosInf), H(kNegInf)));
+}
+
+TEST(Float16CompareTest, LessBasic) {
+ // 1.0 < 2.0
+ EXPECT_TRUE(float16_t::less(H(kOne), H(0x4000u)));
+ // 2.0 < 1.0 is false
+ EXPECT_FALSE(float16_t::less(H(0x4000u), H(kOne)));
+ // equal is not less
+ EXPECT_FALSE(float16_t::less(H(kOne), H(kOne)));
+ // -1.0 < 1.0
+ EXPECT_TRUE(float16_t::less(H(kNegOne), H(kOne)));
+ // -2.0 < -1.0
+ EXPECT_TRUE(
+ float16_t::less(H(float16_t::from_float(-2.0f).to_bits()), H(kNegOne)));
+ // -Inf < any finite
+ EXPECT_TRUE(float16_t::less(H(kNegInf), H(kOne)));
+ // finite < +Inf
+ EXPECT_TRUE(float16_t::less(H(kOne), H(kPosInf)));
+ // NaN operand always false
+ EXPECT_FALSE(float16_t::less(H(kQNaN), H(kOne)));
+ EXPECT_FALSE(float16_t::less(H(kOne), H(kQNaN)));
+ // -0 < +0 is false (+0 == -0)
+ EXPECT_FALSE(float16_t::less(H(kNegZero), H(kPosZero)));
+ EXPECT_FALSE(float16_t::less(H(kPosZero), H(kNegZero)));
+}
+
+TEST(Float16CompareTest, LessEqBasic) {
+ EXPECT_TRUE(float16_t::less_eq(H(kOne), H(kOne)));
+ EXPECT_TRUE(float16_t::less_eq(H(kOne), H(0x4000u)));
+ EXPECT_FALSE(float16_t::less_eq(H(0x4000u), H(kOne)));
+ EXPECT_FALSE(float16_t::less_eq(H(kQNaN), H(kOne)));
+ // +0 <= -0
+ EXPECT_TRUE(float16_t::less_eq(H(kPosZero), H(kNegZero)));
+}
+
+TEST(Float16CompareTest, GreaterAndGreaterEq) {
+ EXPECT_TRUE(float16_t::greater(H(0x4000u), H(kOne)));
+ EXPECT_FALSE(float16_t::greater(H(kOne), H(kOne)));
+ EXPECT_FALSE(float16_t::greater(H(kQNaN), H(kOne)));
+ EXPECT_TRUE(float16_t::greater_eq(H(kOne), H(kOne)));
+ EXPECT_TRUE(float16_t::greater_eq(H(0x4000u), H(kOne)));
+ EXPECT_FALSE(float16_t::greater_eq(H(kQNaN), H(kOne)));
+}
+
+TEST(Float16CompareTest, CompareReturnValue) {
+ EXPECT_EQ(float16_t::compare(H(kOne), H(kOne)), 0);
+ EXPECT_EQ(float16_t::compare(H(kOne), H(0x4000u)), -1);
+ EXPECT_EQ(float16_t::compare(H(0x4000u), H(kOne)), 1);
+ // NaN unordered → 0
+ EXPECT_EQ(float16_t::compare(H(kQNaN), H(kOne)), 0);
+ EXPECT_EQ(float16_t::compare(H(kOne), H(kQNaN)), 0);
+ // +0 vs -0 → 0
+ EXPECT_EQ(float16_t::compare(H(kPosZero), H(kNegZero)), 0);
+}
+
+TEST(Float16CompareTest, OperatorOverloads) {
+ const float16_t one = H(kOne);
+ const float16_t two = H(0x4000u);
+ const float16_t nan = H(kQNaN);
+
+ EXPECT_TRUE(one == one);
+ EXPECT_FALSE(one == two);
+ EXPECT_TRUE(one != two);
+ EXPECT_FALSE(one != one);
+ EXPECT_TRUE(one < two);
+ EXPECT_FALSE(two < one);
+ EXPECT_TRUE(one <= one);
+ EXPECT_TRUE(one <= two);
+ EXPECT_FALSE(two <= one);
+ EXPECT_TRUE(two > one);
+ EXPECT_FALSE(one > two);
+ EXPECT_TRUE(two >= one);
+ EXPECT_TRUE(one >= one);
+ EXPECT_FALSE(one >= two);
+ // NaN comparisons (all false except !=)
+ EXPECT_FALSE(nan == one);
+ EXPECT_TRUE(nan != one);
+ EXPECT_FALSE(nan < one);
+ EXPECT_FALSE(nan <= one);
+ EXPECT_FALSE(nan > one);
+ EXPECT_FALSE(nan >= one);
+ // +0 == -0
+ EXPECT_TRUE(H(kPosZero) == H(kNegZero));
+ EXPECT_FALSE(H(kPosZero) != H(kNegZero));
+}
+
+} // namespace
+} // namespace fory
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]