zhuqi-lucas commented on code in PR #19064:
URL: https://github.com/apache/datafusion/pull/19064#discussion_r2614159095


##########
datafusion/datasource/src/file_scan_config.rs:
##########
@@ -766,6 +767,107 @@ impl DataSource for FileScanConfig {
             }
         }
     }
+
+    fn try_pushdown_sort(
+        &self,
+        order: &[PhysicalSortExpr],
+    ) -> Result<Option<Arc<dyn DataSource>>> {
+        let current_ordering = match self.output_ordering.first() {
+            Some(ordering) => ordering.as_ref(),
+            None => return Ok(None),
+        };
+
+        // Only support reverse ordering pushdown until now
+        if !is_reverse_ordering(order, current_ordering) {
+            return Ok(None);
+        }
+
+        // Ask the file source if it can handle the sort pushdown
+        let pushdown_result = self.file_source.try_pushdown_sort(order)?;
+
+        let new_file_source = match pushdown_result {
+            SortOrderPushdownResult::Exact { inner }
+            | SortOrderPushdownResult::Inexact { inner } => inner,
+            SortOrderPushdownResult::Unsupported => return Ok(None),
+        };
+
+        let mut new_config = self.clone();
+
+        // Reverse file groups: when scanning in reverse, we need to read files
+        // in reverse order to maintain the correct global ordering
+        new_config.file_groups = new_config
+            .file_groups
+            .into_iter()
+            .map(|group| {
+                let mut files = group.into_inner();
+                files.reverse();
+                files.into()
+            })
+            .collect();
+
+        // Phase 1: DO NOT change output_ordering
+        // The ordering is still the same as before (e.g., ASC) because:
+        // 1. We're only reversing row groups, not rows within groups
+        // 2. This makes the scan "closer" to DESC but not guaranteed
+        // 3. The Sort operator above will still be needed
+        //
+        // Keep the original output_ordering unchanged
+        // new_config.output_ordering = ... (NO CHANGE)
+
+        new_config.file_source = new_file_source;
+
+        Ok(Some(Arc::new(new_config)))
+    }
+}
+
+/// Check if the requested ordering can be satisfied by reversing the current 
ordering.
+///
+/// This function supports **prefix matching**: if the file has ordering [A 
DESC, B ASC]
+/// and we need [A ASC], reversing the scan gives us [A ASC, B DESC], which 
satisfies
+/// the requirement since [A ASC] is a prefix.
+///
+/// # Arguments
+/// * `requested` - The ordering required by the query
+/// * `current` - The natural ordering of the data source (e.g., from file 
metadata)
+///
+/// # Returns
+/// `true` if reversing the current ordering would satisfy the requested 
ordering
+///
+/// # Example
+/// ```text
+/// Current:   [number DESC, letter ASC]
+/// Requested: [number ASC]
+/// Reversed:  [number ASC, letter DESC]  ✓ Prefix match!
+/// ```
+fn is_reverse_ordering(
+    requested: &[PhysicalSortExpr],
+    current: &[PhysicalSortExpr],
+) -> bool {
+    // Allow prefix matching - we can satisfy a prefix of the current ordering
+    // by reversing the scan
+    if requested.len() > current.len() {
+        return false;
+    }
+
+    requested.iter().zip(current.iter()).all(|(req, cur)| {
+        // Check if the expressions are semantically equivalent using 
PhysicalExpr::eq
+        // This is more robust than string comparison as it handles:
+        // - Expression equivalence (not just string representation)
+        // - Complex expressions that might have different string forms but 
same semantics
+        let exprs_match = req.expr.eq(&cur.expr);
+
+        // Now check if the sort options are exactly reversed
+        // For a valid reverse scan:
+        //   - descending must be opposite: ASC ↔ DESC
+        //   - nulls_first must be opposite: NULLS FIRST ↔ NULLS LAST
+        let options_reversed = req.options.descending != cur.options.descending
+            && req.options.nulls_first != cur.options.nulls_first;
+
+        // Both conditions must be true:
+        //   1. Expressions are semantically equivalent
+        //   2. Completely reversed sort options
+        exprs_match && options_reversed
+    })

Review Comment:
   Thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to