javanna commented on code in PR #15653:
URL: https://github.com/apache/lucene/pull/15653#discussion_r3079675830
##########
lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java:
##########
@@ -73,6 +73,43 @@ final void addMissing() {
++missing;
}
+ void merge(DocValuesStats<?> other) {
+ count += other.count;
+ missing += other.missing;
+ if (other.min != null && (min == null || compareMin(other.min, min) < 0)) {
+ copyMin(other);
+ }
+ if (other.max != null && (max == null || compareMax(other.max, max) > 0)) {
+ copyMax(other);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ void copyMin(DocValuesStats<?> other) {
+ min = (T) other.min;
+ }
+
+ @SuppressWarnings("unchecked")
+ void copyMax(DocValuesStats<?> other) {
+ max = (T) other.max;
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ int compareMin(Object a, Object b) {
+ if (a instanceof Number numA && b instanceof Number numB) {
+ return Double.compare(numA.doubleValue(), numB.doubleValue());
Review Comment:
I believe this could lead to precision loss? is the special number case
necessary?
##########
lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStatsCollectorManager.java:
##########
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc.search;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.function.Supplier;
+import org.apache.lucene.search.CollectorManager;
+
+/**
+ * A {@link CollectorManager} implementation for {@link
DocValuesStatsCollector}.
+ *
+ * @param <S> the type of {@link DocValuesStats}
+ */
+public class DocValuesStatsCollectorManager<S extends DocValuesStats<?>>
+ implements CollectorManager<DocValuesStatsCollector, S> {
+
+ private final Supplier<S> statsSupplier;
+
+ /**
+ * Creates a new DocValuesStatsCollectorManager.
+ *
+ * @param statsSupplier a supplier that creates new stats instances for each
collector
+ */
+ public DocValuesStatsCollectorManager(Supplier<S> statsSupplier) {
+ this.statsSupplier = statsSupplier;
+ }
+
+ @Override
+ public DocValuesStatsCollector newCollector() throws IOException {
+ return new DocValuesStatsCollector(statsSupplier.get());
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public S reduce(Collection<DocValuesStatsCollector> collectors) throws
IOException {
+ if (collectors.isEmpty()) {
+ return null;
Review Comment:
This is a potential bug that could cause problems down the line? Could we
instead return empty stats? Is it even necessary to have this special case? You
could instead just remove this conditional?
##########
lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java:
##########
@@ -73,6 +73,43 @@ final void addMissing() {
++missing;
}
+ void merge(DocValuesStats<?> other) {
+ count += other.count;
+ missing += other.missing;
+ if (other.min != null && (min == null || compareMin(other.min, min) < 0)) {
+ copyMin(other);
+ }
+ if (other.max != null && (max == null || compareMax(other.max, max) > 0)) {
+ copyMax(other);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ void copyMin(DocValuesStats<?> other) {
+ min = (T) other.min;
+ }
+
+ @SuppressWarnings("unchecked")
+ void copyMax(DocValuesStats<?> other) {
+ max = (T) other.max;
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ int compareMin(Object a, Object b) {
+ if (a instanceof Number numA && b instanceof Number numB) {
+ return Double.compare(numA.doubleValue(), numB.doubleValue());
+ }
+ return ((Comparable) a).compareTo(b);
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ int compareMax(Object a, Object b) {
+ if (a instanceof Number numA && b instanceof Number numB) {
+ return Double.compare(numA.doubleValue(), numB.doubleValue());
+ }
+ return ((Comparable) a).compareTo(b);
Review Comment:
This is the same as compareMin ?
##########
lucene/misc/src/test/org/apache/lucene/misc/search/TestDocValuesStatsCollector.java:
##########
@@ -55,16 +56,21 @@ public class TestDocValuesStatsCollector extends
LuceneTestCase {
Review Comment:
Should we also update `testDocsWithSortedValues` and
`testDocsWithSortedSetValues`?
##########
lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStatsCollectorManager.java:
##########
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.misc.search;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.function.Supplier;
+import org.apache.lucene.search.CollectorManager;
+
+/**
+ * A {@link CollectorManager} implementation for {@link
DocValuesStatsCollector}.
+ *
Review Comment:
Could we expand javadocs to include a usage example for users?
##########
lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java:
##########
@@ -296,6 +433,14 @@ protected void doAccumulate(int count) throws IOException {
public Long sum() {
return sum;
}
+
+ @Override
+ void merge(DocValuesStats<?> other) {
+ super.merge(other);
+ if (other instanceof SortedLongDocValuesStats o) {
Review Comment:
is this type of conditional necessary? Does it ever happen that we get a
different type than `SortedLongDocValuesStats` here? This applies to all merge
methods.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]