This is an automated email from the ASF dual-hosted git repository.

haonan pushed a commit to branch iotdb
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit 014f1ae275b984ace30c96c9100fdd30c57af5b6
Author: Zhihao Shen <[email protected]>
AuthorDate: Thu Jul 18 09:23:59 2024 +0800

    Optimize time and value in filter by TsFile statistics.
---
 .../read/filter/operator/TimeFilterOperators.java  |  32 ++++++-
 .../read/filter/operator/ValueFilterOperators.java | 101 ++++++++++++++++++++-
 .../tsfile/read/filter/StatisticsFilterTest.java   |  68 ++++++++++++++
 3 files changed, 194 insertions(+), 7 deletions(-)

diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
index 2c3214b4..ed0d0c57 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
@@ -20,7 +20,6 @@
 package org.apache.tsfile.read.filter.operator;
 
 import org.apache.tsfile.read.common.TimeRange;
-import org.apache.tsfile.read.filter.basic.DisableStatisticsTimeFilter;
 import org.apache.tsfile.read.filter.basic.Filter;
 import org.apache.tsfile.read.filter.basic.OperatorType;
 import org.apache.tsfile.read.filter.basic.TimeFilter;
@@ -492,16 +491,22 @@ public final class TimeFilterOperators {
   }
 
   // base class for TimeIn, TimeNotIn
-  abstract static class TimeColumnSetFilter extends 
DisableStatisticsTimeFilter {
+  abstract static class TimeColumnSetFilter extends TimeFilter {
 
     protected final Set<Long> candidates;
+    protected final Long candidatesMin;
+    protected final Long candidatesMax;
 
     protected TimeColumnSetFilter(Set<Long> candidates) {
       this.candidates = Objects.requireNonNull(candidates, "candidates cannot 
be null");
+      this.candidatesMin = Collections.min(candidates);
+      this.candidatesMax = Collections.max(candidates);
     }
 
     protected TimeColumnSetFilter(ByteBuffer buffer) {
       this.candidates = ReadWriteIOUtils.readLongSet(buffer);
+      this.candidatesMin = Collections.min(candidates);
+      this.candidatesMax = Collections.max(candidates);
     }
 
     @Override
@@ -548,6 +553,17 @@ public final class TimeFilterOperators {
       return candidates.contains(time);
     }
 
+    @Override
+    public boolean satisfyStartEndTime(long startTime, long endTime) {
+      return startTime <= candidatesMax && endTime >= candidatesMin;
+    }
+
+    @Override
+    public boolean containStartEndTime(long startTime, long endTime) {
+      // Make `allSatisfy` always return false
+      return false;
+    }
+
     @Override
     public List<TimeRange> getTimeRanges() {
       List<TimeRange> res = new ArrayList<>();
@@ -583,6 +599,18 @@ public final class TimeFilterOperators {
       return !candidates.contains(time);
     }
 
+    @Override
+    public boolean satisfyStartEndTime(long startTime, long endTime) {
+      // Make `canSkip` always return false
+      return true;
+    }
+
+    @Override
+    public boolean containStartEndTime(long startTime, long endTime) {
+      // Make `allSatisfy` always return false
+      return false;
+    }
+
     @Override
     public List<TimeRange> getTimeRanges() {
       return Collections.singletonList(new TimeRange(Long.MIN_VALUE, 
Long.MAX_VALUE));
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
index c2b7284c..905c364b 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
@@ -35,6 +35,7 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.Serializable;
 import java.nio.ByteBuffer;
+import java.util.Collections;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
@@ -706,18 +707,24 @@ public final class ValueFilterOperators {
   }
 
   // base class for ValueIn, ValueNotIn
-  abstract static class ValueColumnSetFilter<T> extends 
DisableStatisticsValueFilter {
+  abstract static class ValueColumnSetFilter<T extends Comparable<T>> extends 
ValueFilter {
 
     protected final Set<T> candidates;
+    protected final T candidatesMin;
+    protected final T candidatesMax;
 
     protected ValueColumnSetFilter(int measurementIndex, Set<T> candidates) {
       super(measurementIndex);
       this.candidates = Objects.requireNonNull(candidates, "candidates cannot 
be null");
+      this.candidatesMin = candidates.size() != 0 ? 
Collections.min(candidates) : null;
+      this.candidatesMax = candidates.size() != 0 ? 
Collections.max(candidates) : null;
     }
 
     protected ValueColumnSetFilter(ByteBuffer buffer) {
       super(buffer);
       candidates = ReadWriteIOUtils.readObjectSet(buffer);
+      this.candidatesMin = candidates.size() != 0 ? 
Collections.min(candidates) : null;
+      this.candidatesMax = candidates.size() != 0 ? 
Collections.max(candidates) : null;
     }
 
     @Override
@@ -753,8 +760,7 @@ public final class ValueFilterOperators {
     }
   }
 
-  public static final class ValueIn<T> extends ValueColumnSetFilter<T> {
-
+  public static final class ValueIn<T extends Comparable<T>> extends 
ValueColumnSetFilter<T> {
     public ValueIn(int measurementIndex, Set<T> candidates) {
       super(measurementIndex, candidates);
     }
@@ -765,7 +771,78 @@ public final class ValueFilterOperators {
 
     @Override
     public boolean valueSatisfy(Object value) {
-      return candidates.contains(value);
+      return candidates.contains((T) value);
+    }
+
+    @Override
+    public boolean canSkip(IMetadata metadata) {
+      Optional<Statistics<? extends Serializable>> statistics =
+          metadata.getMeasurementStatistics(measurementIndex);
+
+      // All values are null, but candidates do not contain null
+      if ((!statistics.isPresent() || isAllNulls(statistics.get())) && 
candidates.size() != 0) {
+        return true;
+      }
+
+      // All values are not null, but candidate is one null value
+      if (!metadata.hasNullValue(measurementIndex) && candidates.size() == 0) {
+        return true;
+      }
+
+      if (statistics.isPresent()) {
+        T valuesMin = (T) statistics.get().getMinValue();
+        T valuesMax = (T) statistics.get().getMaxValue();
+        // All values are same
+        if (valuesMin.compareTo(valuesMax) == 0) {
+          return !candidates.contains(valuesMin);
+        } else {
+          if (candidates.size() != 0) {
+            // All values are less than min, or greater than max
+            if (candidatesMin.compareTo(valuesMax) > 0) {
+              return true;
+            }
+            if (candidatesMax.compareTo(valuesMin) < 0) {
+              return true;
+            }
+          }
+        }
+      }
+
+      return false;
+    }
+
+    @Override
+    protected boolean canSkip(Statistics<? extends Serializable> statistics) {
+      throw new NotImplementedException();
+    }
+
+    @Override
+    public boolean allSatisfy(IMetadata metadata) {
+      Optional<Statistics<? extends Serializable>> statistics =
+          metadata.getMeasurementStatistics(measurementIndex);
+
+      // All values are null, and candidate contains null
+      // Note null value cannot be added to set
+      if ((!statistics.isPresent() || isAllNulls(statistics.get())) && 
candidates.size() == 0) {
+        return true;
+      }
+
+      // All values are same
+      if (statistics.isPresent()) {
+        T valuesMin = (T) statistics.get().getMinValue();
+        T valuesMax = (T) statistics.get().getMaxValue();
+        // All values are same
+        if (valuesMin.compareTo(valuesMax) == 0) {
+          return candidates.contains(valuesMin);
+        }
+      }
+
+      return false;
+    }
+
+    @Override
+    protected boolean allSatisfy(Statistics<? extends Serializable> 
statistics) {
+      throw new NotImplementedException();
     }
 
     @Override
@@ -777,9 +854,13 @@ public final class ValueFilterOperators {
     public OperatorType getOperatorType() {
       return OperatorType.VALUE_IN;
     }
+
+    private boolean isAllNulls(Statistics<? extends Serializable> statistics) {
+      return statistics.getCount() == 0;
+    }
   }
 
-  public static final class ValueNotIn<T> extends ValueColumnSetFilter<T> {
+  public static final class ValueNotIn<T extends Comparable<T>> extends 
ValueColumnSetFilter<T> {
 
     public ValueNotIn(int measurementIndex, Set<T> candidates) {
       super(measurementIndex, candidates);
@@ -794,6 +875,16 @@ public final class ValueFilterOperators {
       return !candidates.contains(value);
     }
 
+    @Override
+    protected boolean canSkip(Statistics<? extends Serializable> statistics) {
+      return false;
+    }
+
+    @Override
+    protected boolean allSatisfy(Statistics<? extends Serializable> 
statistics) {
+      return false;
+    }
+
     @Override
     public Filter reverse() {
       return new ValueIn<>(measurementIndex, candidates);
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
 
b/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
index 2acf3e35..d967756f 100644
--- 
a/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
+++ 
b/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
@@ -30,6 +30,9 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.HashSet;
+import java.util.Set;
+
 import static org.apache.tsfile.read.filter.FilterTestUtil.newAlignedMetadata;
 import static org.apache.tsfile.read.filter.FilterTestUtil.newMetadata;
 import static org.apache.tsfile.read.filter.operator.Not.CONTAIN_NOT_ERR_MSG;
@@ -280,4 +283,69 @@ public class StatisticsFilterTest {
     Assert.assertFalse(valueIsNotNull.allSatisfy(alignedMetadata2));
     Assert.assertFalse(valueIsNotNull.allSatisfy(alignedMetadata3));
   }
+
+  @Test
+  public void testIn() {
+    // Non-null candidates
+    Set<Long> candidates = new HashSet<>();
+    candidates.add(1L);
+    candidates.add(10L);
+
+    // Time
+    Filter timeIn = TimeFilterApi.in(candidates);
+    Assert.assertFalse(timeIn.canSkip(metadata1));
+    Assert.assertTrue(timeIn.canSkip(metadata2));
+    Assert.assertFalse(timeIn.canSkip(metadata3));
+    Assert.assertFalse(timeIn.allSatisfy(metadata1));
+    Assert.assertFalse(timeIn.allSatisfy(metadata2));
+    Assert.assertFalse(timeIn.allSatisfy(metadata3));
+    // Value
+    Filter valueIn = ValueFilterApi.in(candidates);
+    Assert.assertFalse(valueIn.canSkip(metadata1));
+    Assert.assertTrue(valueIn.canSkip(metadata2));
+    Assert.assertFalse(valueIn.canSkip(metadata3));
+    Assert.assertTrue(valueIn.canSkip(alignedMetadata3));
+    Assert.assertFalse(valueIn.allSatisfy(metadata1));
+    Assert.assertFalse(valueIn.allSatisfy(metadata2));
+    Assert.assertTrue(valueIn.allSatisfy(metadata3));
+    Assert.assertFalse(valueIn.allSatisfy(alignedMetadata3));
+
+    // Null candidate
+    Set<Long> nullCandidate = new HashSet<>();
+    candidates.add(null);
+
+    // Time cannot be null
+    Filter valueIn2 = ValueFilterApi.in(nullCandidate);
+    Assert.assertTrue(valueIn2.canSkip(metadata1));
+    Assert.assertTrue(valueIn2.canSkip(metadata2));
+    Assert.assertTrue(valueIn2.canSkip(metadata3));
+    Assert.assertFalse(valueIn2.canSkip(alignedMetadata3));
+    Assert.assertFalse(valueIn2.allSatisfy(metadata1));
+    Assert.assertFalse(valueIn2.allSatisfy(metadata2));
+    Assert.assertFalse(valueIn2.allSatisfy(metadata3));
+    Assert.assertTrue(valueIn2.allSatisfy(alignedMetadata3));
+  }
+
+  @Test
+  public void testNotIn() {
+    Set<Long> candidates = new HashSet<>();
+    candidates.add(1L);
+    candidates.add(10L);
+
+    Filter timeNotIn = TimeFilterApi.notIn(candidates);
+    Assert.assertFalse(timeNotIn.canSkip(metadata1));
+    Assert.assertFalse(timeNotIn.canSkip(metadata2));
+    Assert.assertFalse(timeNotIn.canSkip(metadata3));
+    Assert.assertFalse(timeNotIn.allSatisfy(metadata1));
+    Assert.assertFalse(timeNotIn.allSatisfy(metadata2));
+    Assert.assertFalse(timeNotIn.allSatisfy(metadata3));
+
+    Filter valueNotIn = ValueFilterApi.notIn(candidates);
+    Assert.assertFalse(valueNotIn.canSkip(metadata1));
+    Assert.assertFalse(valueNotIn.canSkip(metadata2));
+    Assert.assertFalse(valueNotIn.canSkip(metadata3));
+    Assert.assertFalse(valueNotIn.allSatisfy(metadata1));
+    Assert.assertFalse(valueNotIn.allSatisfy(metadata2));
+    Assert.assertFalse(valueNotIn.allSatisfy(metadata3));
+  }
 }

Reply via email to