(iceberg-rust) branch main updated: fix: fix float compare order (#1416)

liurenjie1024 Mon, 09 Jun 2025 02:12:25 -0700

This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git



The following commit(s) were added to refs/heads/main by this push:
     new c74a27ac fix: fix float compare order (#1416)
c74a27ac is described below

commit c74a27ac0d77314926240d325701107e5360f836
Author: ZENOTME <[email protected]>
AuthorDate: Mon Jun 9 17:11:46 2025 +0800

    fix: fix float compare order (#1416)
    
    ## Which issue does this PR close?
    
    Closes #1415
    
    ## What changes are included in this PR?
    
    
    ## Are these changes tested?
    
    
    
    Co-authored-by: ZENOTME <[email protected]>
---
 crates/iceberg/src/spec/values.rs | 104 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 101 insertions(+), 3 deletions(-)

diff --git a/crates/iceberg/src/spec/values.rs 
b/crates/iceberg/src/spec/values.rs
index 786fe961..b005f7ab 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -20,6 +20,7 @@
  */
 
 use std::any::Any;
+use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::fmt::{Display, Formatter};
 use std::hash::Hash;
@@ -29,7 +30,7 @@ use std::str::FromStr;
 pub use _serde::RawLiteral;
 use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
 use num_bigint::BigInt;
-use ordered_float::OrderedFloat;
+use ordered_float::{Float, OrderedFloat};
 use rust_decimal::Decimal;
 use rust_decimal::prelude::ToPrimitive;
 use serde::de::{
@@ -214,6 +215,36 @@ impl<'de> Deserialize<'de> for Datum {
     }
 }
 
+// Compare following iceberg float ordering rules:
+//  -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN
+fn iceberg_float_cmp<T: Float>(a: T, b: T) -> Option<Ordering> {
+    if a.is_nan() && b.is_nan() {
+        return match (a.is_sign_negative(), b.is_sign_negative()) {
+            (true, false) => Some(Ordering::Less),
+            (false, true) => Some(Ordering::Greater),
+            _ => Some(Ordering::Equal),
+        };
+    }
+
+    if a.is_nan() {
+        return Some(if a.is_sign_negative() {
+            Ordering::Less
+        } else {
+            Ordering::Greater
+        });
+    }
+
+    if b.is_nan() {
+        return Some(if b.is_sign_negative() {
+            Ordering::Greater
+        } else {
+            Ordering::Less
+        });
+    }
+
+    a.partial_cmp(&b)
+}
+
 impl PartialOrd for Datum {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         match (&self.literal, &other.literal, &self.r#type, &other.r#type) {
@@ -241,13 +272,13 @@ impl PartialOrd for Datum {
                 PrimitiveLiteral::Float(other_val),
                 PrimitiveType::Float,
                 PrimitiveType::Float,
-            ) => val.partial_cmp(other_val),
+            ) => iceberg_float_cmp(*val, *other_val),
             (
                 PrimitiveLiteral::Double(val),
                 PrimitiveLiteral::Double(other_val),
                 PrimitiveType::Double,
                 PrimitiveType::Double,
-            ) => val.partial_cmp(other_val),
+            ) => iceberg_float_cmp(*val, *other_val),
             (
                 PrimitiveLiteral::Int(val),
                 PrimitiveLiteral::Int(other_val),
@@ -3845,4 +3876,71 @@ mod tests {
 
         assert_eq!(result, expected);
     }
+
+    #[test]
+    fn test_iceberg_float_order() {
+        // Test float ordering
+        let float_values = vec![
+            Datum::float(f32::NAN),
+            Datum::float(-f32::NAN),
+            Datum::float(f32::MAX),
+            Datum::float(f32::MIN),
+            Datum::float(f32::INFINITY),
+            Datum::float(-f32::INFINITY),
+            Datum::float(1.0),
+            Datum::float(-1.0),
+            Datum::float(0.0),
+            Datum::float(-0.0),
+        ];
+
+        let mut float_sorted = float_values.clone();
+        float_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+        let float_expected = vec![
+            Datum::float(-f32::NAN),
+            Datum::float(-f32::INFINITY),
+            Datum::float(f32::MIN),
+            Datum::float(-1.0),
+            Datum::float(-0.0),
+            Datum::float(0.0),
+            Datum::float(1.0),
+            Datum::float(f32::MAX),
+            Datum::float(f32::INFINITY),
+            Datum::float(f32::NAN),
+        ];
+
+        assert_eq!(float_sorted, float_expected);
+
+        // Test double ordering
+        let double_values = vec![
+            Datum::double(f64::NAN),
+            Datum::double(-f64::NAN),
+            Datum::double(f64::INFINITY),
+            Datum::double(-f64::INFINITY),
+            Datum::double(f64::MAX),
+            Datum::double(f64::MIN),
+            Datum::double(1.0),
+            Datum::double(-1.0),
+            Datum::double(0.0),
+            Datum::double(-0.0),
+        ];
+
+        let mut double_sorted = double_values.clone();
+        double_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+        let double_expected = vec![
+            Datum::double(-f64::NAN),
+            Datum::double(-f64::INFINITY),
+            Datum::double(f64::MIN),
+            Datum::double(-1.0),
+            Datum::double(-0.0),
+            Datum::double(0.0),
+            Datum::double(1.0),
+            Datum::double(f64::MAX),
+            Datum::double(f64::INFINITY),
+            Datum::double(f64::NAN),
+        ];
+
+        assert_eq!(double_sorted, double_expected);
+    }
 }

(iceberg-rust) branch main updated: fix: fix float compare order (#1416)

Reply via email to