alamb commented on code in PR #13947:
URL: https://github.com/apache/datafusion/pull/13947#discussion_r1899592668


##########
datafusion/core/tests/fuzz_cases/pruning.rs:
##########
@@ -38,151 +38,266 @@ use parquet::{
     file::properties::{EnabledStatistics, WriterProperties},
 };
 use rand::seq::SliceRandom;
+use tokio::sync::Mutex;
 use url::Url;
 
 #[tokio::test]
-async fn test_fuzz_utf8() {
-    // Fuzz testing for UTF8 predicate pruning
-    // The basic idea is that query results should always be the same with or 
without stats/pruning
-    // If we get this right we at least guarantee that there are no incorrect 
results
-    // There may still be suboptimal pruning or stats but that's something we 
can try to catch
-    // with more targeted tests.
-
-    // Since we know where the edge cases might be we don't do random black 
box fuzzing.
-    // Instead we fuzz on specific pre-defined axis:
-    //
-    // - Which characters are in each value. We want to make sure to include 
characters that when
-    //   incremented, truncated or otherwise manipulated might cause issues.
-    // - The values in each row group. This impacts which min/max stats are 
generated for each rg.
-    //   We'll generate combinations of the characters with lengths ranging 
from 1 to 4.
-    // - Truncation of statistics to 1, 2 or 3 characters as well as no 
truncation.
-
-    let mut rng = rand::thread_rng();
-
-    let characters = [
-        "z",
-        "0",
-        "~",
-        "ß",
-        "℣",
-        "%", // this one is useful for like/not like tests since it will 
result in randomly inserted wildcards
-        "_", // this one is useful for like/not like tests since it will 
result in randomly inserted wildcards
-        "\u{7F}",
-        "\u{7FF}",
-        "\u{FF}",
-        "\u{10FFFF}",
-        "\u{D7FF}",
-        "\u{FDCF}",
-        // null character
-        "\u{0}",
-    ];
-
-    let value_lengths = [1, 2, 3];
-
-    // generate all combinations of characters with lengths ranging from 1 to 4
-    let mut values = vec![];
-    for length in &value_lengths {
-        values.extend(
-            characters
-                .iter()
-                .cloned()
-                .combinations(*length)
-                // now get all permutations of each combination
-                .flat_map(|c| c.into_iter().permutations(*length))
-                // and join them into strings
-                .map(|c| c.join("")),
-        );
-    }
+async fn test_utf8_eq() {

Review Comment:
   All the individual predicates are now run in their own test that shows up in 
the output



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to