jpountz commented on a change in pull request #1440:
URL: https://github.com/apache/lucene-solr/pull/1440#discussion_r413639432



##########
File path: 
lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/package.html
##########
@@ -0,0 +1,25 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Lucene 7.0 file format.
+</body>
+</html>

Review comment:
       you could use a package-info.java instead. We use the html version in 
backward-codecs because there are otherwise conflicts if the package also 
exists in core, but it looks like you removed the package from core here so we 
could use a package-info.java in backward-codecs?

##########
File path: 
lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
##########
@@ -527,45 +589,63 @@ private void indexPoint(PerField fp, IndexableField 
field) throws IOException {
     fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
   }
 
-  private void validateIndexSortDVType(Sort indexSort, String fieldName, 
DocValuesType dvType) {
+  private void validateIndexSortDVType(Sort indexSort, String fieldToValidate, 
DocValuesType dvType) throws IOException {
     for (SortField sortField : indexSort.getSort()) {
-      if (sortField.getField().equals(fieldName)) {
-        switch (dvType) {
-          case NUMERIC:
-            if (sortField.getType().equals(SortField.Type.INT) == false &&
-                  sortField.getType().equals(SortField.Type.LONG) == false &&
-                  sortField.getType().equals(SortField.Type.FLOAT) == false &&
-                  sortField.getType().equals(SortField.Type.DOUBLE) == false) {
-              throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
-            }
-            break;
+      IndexSorter sorter = sortField.getIndexSorter();
+      if (sorter == null) {
+        throw new IllegalStateException("Cannot sort index with sort order " + 
sortField);
+      }
+      sorter.getDocComparator(new DocValuesLeafReader() {
+        @Override
+        public NumericDocValues getNumericDocValues(String field) {
+          if (Objects.equals(field, fieldToValidate) && dvType != 
DocValuesType.NUMERIC) {
+            throw new IllegalArgumentException("SortField " + sortField + " 
expected field [" + field + "] to be NUMERIC but it is [" + dvType + "]");
+          }
+          return DocValues.emptyNumeric();
+        }
 
-          case BINARY:
-            throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
+        @Override
+        public BinaryDocValues getBinaryDocValues(String field) {
+          if (Objects.equals(field, fieldToValidate) && dvType != 
DocValuesType.BINARY) {
+            throw new IllegalArgumentException("SortField " + sortField + " 
expected field [" + field + "] to be BINARY but it is [" + dvType + "]");
+          }
+          return DocValues.emptyBinary();
+        }
 
-          case SORTED:
-            if (sortField.getType().equals(SortField.Type.STRING) == false) {
-              throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
-            }
-            break;
+        @Override
+        public SortedDocValues getSortedDocValues(String field) {
+          if (Objects.equals(field, fieldToValidate) && dvType != 
DocValuesType.SORTED) {
+            throw new IllegalArgumentException("SortField " + sortField + " 
expected field [" + field + "] to be SORTED but it is [" + dvType + "]");
+          }
+          return DocValues.emptySorted();
+        }
 
-          case SORTED_NUMERIC:
-            if (sortField instanceof SortedNumericSortField == false) {
-              throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
-            }
-            break;
+        @Override
+        public SortedNumericDocValues getSortedNumericDocValues(String field) {
+          if (Objects.equals(field, fieldToValidate) && dvType != 
DocValuesType.SORTED_NUMERIC) {
+            throw new IllegalArgumentException("SortField " + sortField + " 
expected field [" + field + "] to be SORTED_NUMERIC but it is [" + dvType + 
"]");
+          }
+          return DocValues.emptySortedNumeric(0);
+        }
 
-          case SORTED_SET:
-            if (sortField instanceof SortedSetSortField == false) {
-              throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
-            }
-            break;
+        @Override
+        public SortedSetDocValues getSortedSetDocValues(String field) {
+          if (Objects.equals(field, fieldToValidate) && dvType != 
DocValuesType.SORTED_SET) {
+            throw new IllegalArgumentException("SortField " + sortField + " 
expected field [" + field + "] to be SORTED_SET but it is [" + dvType + "]");
+          }
+          return DocValues.emptySortedSet();
+        }
 
-          default:
-            throw new IllegalArgumentException("invalid doc value type:" + 
dvType + " for sortField:" + sortField);
+        @Override
+        public FieldInfos getFieldInfos() {
+          throw new UnsupportedOperationException();
         }
-        break;
-      }
+
+        @Override
+        public int maxDoc() {
+          return 0;

Review comment:
       I'd have a slight preferenrece for throwing an UOE here, would it work?

##########
File path: lucene/core/src/java/org/apache/lucene/index/SortFieldProvider.java
##########
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.NamedSPILoader;
+
+/**
+ * Reads a named SortField from a segment info file, used to record index sorts
+ */
+public abstract class SortFieldProvider implements NamedSPILoader.NamedSPI {
+
+  private static class Holder {
+    private static final NamedSPILoader<SortFieldProvider> LOADER = new 
NamedSPILoader<>(SortFieldProvider.class);
+
+    static NamedSPILoader<SortFieldProvider> getLoader() {
+      if (LOADER == null) {
+        throw new IllegalStateException("You tried to lookup a 
SortFieldProvider by name before all SortFieldProviders could be initialized. "+
+            "This likely happens if you call SortFieldProvider#forName from a 
SortFieldProviders's ctor.");
+      }
+      return LOADER;
+    }
+  }
+
+  /**
+   * Looks up a SortFieldProvider by name
+   */
+  public static SortFieldProvider forName(String name) {
+    return Holder.getLoader().lookup(name);
+  }
+
+  /**
+   * Lists all available SortFieldProviders
+   */
+  public static Set<String> availableSortFieldProviders() {
+    return Holder.getLoader().availableServices();
+  }
+
+  /**
+   * Reloads the SortFieldProvider list from the given {@link ClassLoader}.
+   * Changes to the list are visible after the method ends, all
+   * iterators ({@link #availableSortFieldProviders()} ()},...) stay 
consistent.
+   *
+   * <p><b>NOTE:</b> Only new SortFieldProviders are added, existing ones are
+   * never removed or replaced.
+   *
+   * <p><em>This method is expensive and should only be called for discovery
+   * of new SortFieldProviders on the given classpath/classloader!</em>
+   */
+  public static void reloadSortFieldProviders(ClassLoader classLoader) {
+    Holder.getLoader().reload(classLoader);
+  }
+
+  public static void serialize(SortField sf, DataOutput output) throws 
IOException {

Review comment:
       javadocs?

##########
File path: 
lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
##########
@@ -94,29 +89,100 @@ public DefaultIndexingChain(DocumentsWriterPerThread 
docWriter) {
     termsHash = new FreqProxTermsWriter(docWriter, termVectorsWriter);
   }
 
+  private LeafReader getDocValuesReader(int maxDoc) {

Review comment:
       ```suggestion
     private LeafReader getDocValuesLeafReader(int maxDoc) {
   ```

##########
File path: 
lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
##########
@@ -79,11 +78,6 @@ public void addValue(int docID, BytesRef value) {
     lastDocID = docID;
   }
 
-  @Override
-  public void finish(int maxDoc) {
-    updateBytesUsed();
-  }

Review comment:
       I wonder if there are undesired consequences of dropping this?

##########
File path: lucene/core/src/java/org/apache/lucene/index/SortFieldProvider.java
##########
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.NamedSPILoader;
+
+/**
+ * Reads a named SortField from a segment info file, used to record index sorts
+ */
+public abstract class SortFieldProvider implements NamedSPILoader.NamedSPI {
+
+  private static class Holder {
+    private static final NamedSPILoader<SortFieldProvider> LOADER = new 
NamedSPILoader<>(SortFieldProvider.class);
+
+    static NamedSPILoader<SortFieldProvider> getLoader() {
+      if (LOADER == null) {
+        throw new IllegalStateException("You tried to lookup a 
SortFieldProvider by name before all SortFieldProviders could be initialized. "+
+            "This likely happens if you call SortFieldProvider#forName from a 
SortFieldProviders's ctor.");
+      }
+      return LOADER;
+    }
+  }
+
+  /**
+   * Looks up a SortFieldProvider by name
+   */
+  public static SortFieldProvider forName(String name) {
+    return Holder.getLoader().lookup(name);
+  }
+
+  /**
+   * Lists all available SortFieldProviders
+   */
+  public static Set<String> availableSortFieldProviders() {
+    return Holder.getLoader().availableServices();
+  }
+
+  /**
+   * Reloads the SortFieldProvider list from the given {@link ClassLoader}.
+   * Changes to the list are visible after the method ends, all
+   * iterators ({@link #availableSortFieldProviders()} ()},...) stay 
consistent.
+   *
+   * <p><b>NOTE:</b> Only new SortFieldProviders are added, existing ones are
+   * never removed or replaced.
+   *
+   * <p><em>This method is expensive and should only be called for discovery
+   * of new SortFieldProviders on the given classpath/classloader!</em>
+   */
+  public static void reloadSortFieldProviders(ClassLoader classLoader) {
+    Holder.getLoader().reload(classLoader);
+  }
+
+  public static void serialize(SortField sf, DataOutput output) throws 
IOException {
+    IndexSorter sorter = sf.getIndexSorter();
+    if (sorter == null) {
+      throw new IllegalArgumentException("Cannot serialize sort field " + sf);
+    }
+    SortFieldProvider provider = 
SortFieldProvider.forName(sorter.getProviderName());
+    provider.writeSortField(sf, output);
+  }
+
+  /** The name this SortFieldProvider is registered under */
+  protected final String name;
+
+  /**
+   * Creates a new SortFieldProvider.
+   * <p>
+   * The provided name will be written into the index segment: in order to
+   * for the segment to be read this class should be registered with Java's
+   * SPI mechanism (registered in META-INF/ of your jar file, etc).
+   * @param name must be all ascii alphanumeric, and less than 128 characters 
in length.
+   */
+  protected SortFieldProvider(String name) {
+    this.name = name;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * Loads a SortField from serialized bytes
+   */
+  public abstract SortField loadSortField(DataInput in) throws IOException;
+
+  public abstract void writeSortField(SortField sf, DataOutput out) throws 
IOException;

Review comment:
       javadocs?

##########
File path: lucene/core/src/java/org/apache/lucene/search/SortField.java
##########
@@ -392,4 +491,31 @@ public SortField rewrite(IndexSearcher searcher) throws 
IOException {
   public boolean needsScores() {
     return type == Type.SCORE;
   }
+
+  /**
+   * Returns an {@link IndexSorter} used for sorting index segments by this 
SortField.
+   *
+   * If the SortField cannot be used for index sorting (for example, if it 
uses scores or
+   * other query-dependent values) then this method should return {@code null}
+   *
+   * SortFields that implement this method should also implement a constructor 
that
+   * takes a {@link DataInput} for deserialization, to match the {@link 
IndexSorter#serialize(DataOutput)}
+   * method on the returned IndexSorter
+   */

Review comment:
       I suggested it because I was seeing your change as a cleanup to the way 
that we support configuring index sorts. experimental would work for me too, 
what I like is to signal users that we feel free to make breaking changes to 
these APIs.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to