huaxingao commented on a change in pull request #923:
URL: https://github.com/apache/parquet-mr/pull/923#discussion_r699767560
##########
File path:
parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
##########
@@ -287,6 +291,27 @@ boolean isNullPage(int pageIndex) {
pageIndex -> nullCounts[pageIndex] > 0 ||
matchingIndexes.contains(pageIndex));
}
+ @Override
Review comment:
@gszadovszky I tried this: if the values in a page are <= the max value
in the IN set, and >= the min value in the IN set, then the page might contain
the values in the IN set. I am not sure if this is want you want so I only
changed `In` for now. Please take a look. Thanks!
##########
File path:
parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
##########
@@ -247,6 +250,80 @@ public int hashCode() {
}
}
+ // base class for In and NotIn
Review comment:
Fixed. Thanks!
##########
File path:
parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
##########
@@ -247,6 +250,80 @@ public int hashCode() {
}
}
+ // base class for In and NotIn
+ public static abstract class SetColumnFilterPredicate<T extends
Comparable<T>> implements FilterPredicate, Serializable {
+ private final Column<T> column;
+ private final Set<T> values;
+ private final String toString;
+
+ protected SetColumnFilterPredicate(Column<T> column, Set<T> values) {
+ this.column = Objects.requireNonNull(column, "column cannot be null");
+ this.values = Objects.requireNonNull(values, "values cannot be null");
+ checkArgument(!values.isEmpty(), "values in SetColumnFilterPredicate
shouldn't be empty!");
+
+ String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
Review comment:
Removed.
##########
File path:
parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
##########
@@ -247,6 +250,80 @@ public int hashCode() {
}
}
+ // base class for In and NotIn
+ public static abstract class SetColumnFilterPredicate<T extends
Comparable<T>> implements FilterPredicate, Serializable {
+ private final Column<T> column;
+ private final Set<T> values;
+ private final String toString;
+
+ protected SetColumnFilterPredicate(Column<T> column, Set<T> values) {
+ this.column = Objects.requireNonNull(column, "column cannot be null");
+ this.values = Objects.requireNonNull(values, "values cannot be null");
+ checkArgument(!values.isEmpty(), "values in SetColumnFilterPredicate
shouldn't be empty!");
+
+ String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
+ StringBuilder str = new StringBuilder();
+ int iter = 0;
+ for (T value : values) {
+ if (iter >= 100) break;
+ str.append(value).append(", ");
+ iter++;
+ }
+ String valueStr = values.size() <= 100 ? str.substring(0, str.length() -
2) : str + "...";
+ this.toString = name + "(" + column.getColumnPath().toDotString() + ", "
+ valueStr + ")";
Review comment:
Fixed. Thanks!
##########
File path:
parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
##########
@@ -247,6 +250,80 @@ public int hashCode() {
}
}
+ // base class for In and NotIn
+ public static abstract class SetColumnFilterPredicate<T extends
Comparable<T>> implements FilterPredicate, Serializable {
+ private final Column<T> column;
+ private final Set<T> values;
+ private final String toString;
+
+ protected SetColumnFilterPredicate(Column<T> column, Set<T> values) {
+ this.column = Objects.requireNonNull(column, "column cannot be null");
+ this.values = Objects.requireNonNull(values, "values cannot be null");
+ checkArgument(!values.isEmpty(), "values in SetColumnFilterPredicate
shouldn't be empty!");
+
+ String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
+ StringBuilder str = new StringBuilder();
+ int iter = 0;
+ for (T value : values) {
+ if (iter >= 100) break;
+ str.append(value).append(", ");
+ iter++;
+ }
+ String valueStr = values.size() <= 100 ? str.substring(0, str.length() -
2) : str + "...";
+ this.toString = name + "(" + column.getColumnPath().toDotString() + ", "
+ valueStr + ")";
+ }
+
+ public Column<T> getColumn() {
+ return column;
+ }
+
+ public Set<T> getValues() {
+ return values;
+ }
+
+ @Override
+ public String toString() {
+ return toString;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
Review comment:
Yes, but just trying to follow the style at
https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java#L150
##########
File path:
parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
##########
@@ -247,6 +250,80 @@ public int hashCode() {
}
}
+ // base class for In and NotIn
+ public static abstract class SetColumnFilterPredicate<T extends
Comparable<T>> implements FilterPredicate, Serializable {
+ private final Column<T> column;
+ private final Set<T> values;
+ private final String toString;
+
+ protected SetColumnFilterPredicate(Column<T> column, Set<T> values) {
+ this.column = Objects.requireNonNull(column, "column cannot be null");
+ this.values = Objects.requireNonNull(values, "values cannot be null");
+ checkArgument(!values.isEmpty(), "values in SetColumnFilterPredicate
shouldn't be empty!");
+
+ String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
+ StringBuilder str = new StringBuilder();
+ int iter = 0;
+ for (T value : values) {
+ if (iter >= 100) break;
+ str.append(value).append(", ");
+ iter++;
+ }
+ String valueStr = values.size() <= 100 ? str.substring(0, str.length() -
2) : str + "...";
+ this.toString = name + "(" + column.getColumnPath().toDotString() + ", "
+ valueStr + ")";
+ }
+
+ public Column<T> getColumn() {
+ return column;
+ }
+
+ public Set<T> getValues() {
+ return values;
+ }
+
+ @Override
+ public String toString() {
+ return toString;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ SetColumnFilterPredicate<?> that = (SetColumnFilterPredicate<?>) o;
+ return column.equals(that.column) && values.equals(that.values) &&
Objects.equals(toString, that.toString);
Review comment:
Removed toString comparison
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]