Tim-53 commented on code in PR #18663:
URL: https://github.com/apache/datafusion/pull/18663#discussion_r2525551002


##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -747,69 +811,82 @@ impl<T: ArrowNumericType> Accumulator for 
DistinctPercentileContAccumulator<T> {
 /// For percentile p and n values:
 /// - If p * (n-1) is an integer, return the value at that position
 /// - Otherwise, interpolate between the two closest values
-fn calculate_percentile<T: ArrowNumericType>(
+fn calculate_percentiles<T: ArrowNumericType>(
     mut values: Vec<T::Native>,
-    percentile: f64,
-) -> Option<T::Native> {
+    percentiles: &[f64],
+) -> Vec<Option<T::Native>> {
     let cmp = |x: &T::Native, y: &T::Native| x.compare(*y);
 
     let len = values.len();
     if len == 0 {
-        None
+        vec![None; percentiles.len()]
     } else if len == 1 {
-        Some(values[0])
-    } else if percentile == 0.0 {
+        vec![Some(values[0]); percentiles.len()]
+    } else if percentiles[0] == 0.0 {
         // Get minimum value
-        Some(
-            *values
-                .iter()
-                .min_by(|a, b| cmp(a, b))
-                .expect("we checked for len > 0 a few lines above"),
-        )
-    } else if percentile == 1.0 {
+        vec![
+            Some(
+                *values
+                    .iter()
+                    .min_by(|a, b| cmp(a, b))
+                    .expect("we checked for len > 0 a few lines above"),
+            );
+            percentiles.len()
+        ]
+    } else if percentiles[0] == 1.0 {
         // Get maximum value
-        Some(
-            *values
-                .iter()
-                .max_by(|a, b| cmp(a, b))
-                .expect("we checked for len > 0 a few lines above"),
-        )
-    } else {
-        // Calculate the index using the formula: p * (n - 1)
-        let index = percentile * ((len - 1) as f64);
-        let lower_index = index.floor() as usize;
-        let upper_index = index.ceil() as usize;
-
-        if lower_index == upper_index {
-            // Exact index, return the value at that position
-            let (_, value, _) = values.select_nth_unstable_by(lower_index, 
cmp);
-            Some(*value)
-        } else {
-            // Need to interpolate between two values
-            // First, partition at lower_index to get the lower value
-            let (_, lower_value, _) = 
values.select_nth_unstable_by(lower_index, cmp);
-            let lower_value = *lower_value;
-
-            // Then partition at upper_index to get the upper value
-            let (_, upper_value, _) = 
values.select_nth_unstable_by(upper_index, cmp);
-            let upper_value = *upper_value;
-
-            // Linear interpolation using wrapping arithmetic
-            // We use wrapping operations here (matching the approach in 
median.rs) because:
-            // 1. Both values come from the input data, so diff is bounded by 
the value range
-            // 2. fraction is between 0 and 1, and INTERPOLATION_PRECISION is 
small enough
-            //    to prevent overflow when combined with typical numeric ranges
-            // 3. The result is guaranteed to be between lower_value and 
upper_value
-            // 4. For floating-point types, wrapping ops behave the same as 
standard ops
-            let fraction = index - (lower_index as f64);
-            let diff = upper_value.sub_wrapping(lower_value);
-            let interpolated = lower_value.add_wrapping(
-                diff.mul_wrapping(T::Native::usize_as(
-                    (fraction * INTERPOLATION_PRECISION as f64) as usize,
-                ))
-                .div_wrapping(T::Native::usize_as(INTERPOLATION_PRECISION)),
+        vec![
+            Some(
+                *values
+                    .iter()
+                    .max_by(|a, b| cmp(a, b))
+                    .expect("we checked for len > 0 a few lines above"),
             );
-            Some(interpolated)
-        }
+            percentiles.len()
+        ]
+    } else {
+        percentiles
+            .iter()
+            .map(|percentile| {

Review Comment:
   That's actually what i did in 
https://github.com/apache/datafusion/pull/18663/commits/27243fd6d0568f0b122201983c016e5de0e7e44d



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to