[GitHub] [lucene-solr] mikemccand commented on a change in pull request #1930: LUCENE-9322: add VectorValues to new Lucene90 codec

GitBox Fri, 16 Oct 2020 09:12:17 -0700


mikemccand commented on a change in pull request #1930:
URL: https://github.com/apache/lucene-solr/pull/1930#discussion_r506561868




##########
File path: 
lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
##########
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.FloatBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.VectorReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Reads vectors from the index segments.
+ */

Review comment:
       Can we mark as `@lucene.experimental`?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValues.java
##########
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class provides access to per-document floating point vector values 
indexed as {@link
+ * org.apache.lucene.document.VectorField}.
+ */
+public abstract class VectorValues extends DocIdSetIterator {
+
+  /** The maximum length of a vector */
+  public static int MAX_DIMENSIONS = 1024;
+
+  /** Sole constructor */
+  protected VectorValues() {}
+
+  /**
+   * Return the dimension of the vectors
+   */
+  public abstract int dimension();
+
+  /**
+   * TODO: should we use cost() for this? We rely on its always being exactly 
the number
+   * of documents having a value for this field, which is not guaranteed by 
the cost() contract,
+   * but in all the implementations so far they are the same.
+   * @return the number of vectors returned by this iterator
+   */
+  public abstract int size();
+
+  /**
+   * Return the score function used to compare these vectors
+   */
+  public abstract ScoreFunction scoreFunction();
+
+  /**
+   * Return the vector value for the current document ID.
+   * It is illegal to call this method when the iterator is not positioned: 
before advancing, or after failing to advance.
+   * The returned array may be shared across calls, re-used, and modified as 
the iterator advances.
+   * @return the vector value
+   */
+  public abstract float[] vectorValue() throws IOException;
+
+  /**
+   * Return the binary encoded vector value for the current document ID. These 
are the bytes corresponding to the float array
+   * in IEEE 754 standard encoding, encoded using little-endian byte order.
+   * It is illegal to call this method when the iterator is not positioned: 
before advancing, or after failing to advance.
+   * The returned storage may be shared across calls, re-used and modified as 
the iterator advances.
+   * @return the binary value
+   */
+  public BytesRef binaryValue() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Return a random access interface over this iterator's vectors. Calling 
the RandomAccess methods will
+   * have no effect on the progress of the iteration or the values returned by 
this iterator. Successive calls
+   * will retrieve independent copies that do not overwrite each others' 
returned values.
+   */
+  public abstract RandomAccess randomAccess();
+
+  /**
+   * Provides random access to vectors by dense ordinal.
+   */
+  public interface RandomAccess {

Review comment:
       Also `@lucene.experimental`?

##########
File path: lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
##########
@@ -0,0 +1,650 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.VectorField;
+import org.apache.lucene.index.VectorValues.ScoreFunction;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** Test Indexing/IndexWriter with vectors */
+public class TestVectorValues extends LuceneTestCase {
+
+  private IndexWriterConfig createIndexWriterConfig() {
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    iwc.setCodec(Codec.forName("Lucene90"));
+    return iwc;
+  }
+
+  // Suddenly add vectors to an existing field:
+  public void testUpgradeFieldToVectors() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(newStringField("dim", "foo", Store.NO));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+    }
+  }
+
+  public void testFieldConstructor() {
+    float[] v = new float[1];
+    VectorField field = new VectorField("f", v);
+    assertEquals(1, field.fieldType().vectorDimension());
+    assertEquals(ScoreFunction.EUCLIDEAN, 
field.fieldType().vectorScoreFunction());
+    assertSame(v, field.vectorValue());
+  }
+
+  public void testFieldConstructorExceptions() {
+    expectThrows(IllegalArgumentException.class, () -> new VectorField(null, 
new float[1]));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
null));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[1], null));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[0]));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[VectorValues.MAX_DIMENSIONS + 1]));
+  }
+
+  public void testFieldSetValue() {
+    VectorField field = new VectorField("f", new float[1]);
+    float[] v1 = new float[1];
+    field.setVectorValue(v1);
+    assertSame(v1, field.vectorValue());
+    expectThrows(IllegalArgumentException.class, () -> 
field.setVectorValue(new float[2]));
+    expectThrows(NullPointerException.class, () -> field.setVectorValue(null));
+  }
+
+  // Illegal schema change tests:
+
+  public void testIllegalDimChangeTwoDocs() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        // sometimes test with two segments
+        w.commit();
+      }
+
+      Document doc2 = new Document();
+      doc2.add(new VectorField("dim", new float[3], 
ScoreFunction.DOT_PRODUCT));
+      IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+          () -> w.addDocument(doc2));
+      assertEquals("cannot change vector dimension from 4 to 3 for 
field=\"dim\"", expected.getMessage());
+    }
+  }
+
+  public void testIllegalScoreFunctionChange() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        // sometimes test with two segments
+        w.commit();
+      }
+
+      Document doc2 = new Document();
+      doc2.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+      IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+          () -> w.addDocument(doc2));
+      assertEquals("cannot change vector score function from DOT_PRODUCT to 
EUCLIDEAN for field=\"dim\"", expected.getMessage());
+    }
+  }
+
+  public void testIllegalDimChangeTwoWriters() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+
+      try (IndexWriter w2 = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc2 = new Document();
+        doc2.add(new VectorField("dim", new float[1], 
ScoreFunction.DOT_PRODUCT));
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addDocument(doc2));
+        assertEquals("cannot change vector dimension from 4 to 1 for 
field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeTwoWriters() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+
+      try (IndexWriter w2 = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc2 = new Document();
+        doc2.add(new VectorField("dim", new float[4], 
ScoreFunction.EUCLIDEAN));
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addDocument(doc2));
+        assertEquals("cannot change vector score function from DOT_PRODUCT to 
EUCLIDEAN for field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesDirectory() throws Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addIndexes(new Directory[]{dir}));
+        assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesDirectory() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+        w2.addDocument(doc);
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addIndexes(dir));
+        assertEquals("cannot change vector score function from EUCLIDEAN to 
DOT_PRODUCT for field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesCodecReader() throws Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> w2.addIndexes(new CodecReader[]{(CodecReader) 
getOnlyLeafReader(r)}));
+          assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesCodecReader() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> w2.addIndexes(new CodecReader[]{(CodecReader) 
getOnlyLeafReader(r)}));
+          assertEquals("cannot change vector score function from EUCLIDEAN to 
DOT_PRODUCT for field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesSlowCodecReader() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> TestUtil.addIndexesSlowly(w2, r));
+          assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesSlowCodecReader() 
throws Exception {

Review comment:
       We have good coverage of illegal `addIndexes` usage, but do we have a 
test case of a successful usage, adding in an index with vectors to an index 
without, and vice/versa, and then both?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValues.java
##########
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class provides access to per-document floating point vector values 
indexed as {@link
+ * org.apache.lucene.document.VectorField}.
+ */
+public abstract class VectorValues extends DocIdSetIterator {
+
+  /** The maximum length of a vector */
+  public static int MAX_DIMENSIONS = 1024;
+
+  /** Sole constructor */
+  protected VectorValues() {}
+
+  /**
+   * Return the dimension of the vectors
+   */
+  public abstract int dimension();
+
+  /**
+   * TODO: should we use cost() for this? We rely on its always being exactly 
the number
+   * of documents having a value for this field, which is not guaranteed by 
the cost() contract,
+   * but in all the implementations so far they are the same.
+   * @return the number of vectors returned by this iterator
+   */
+  public abstract int size();
+
+  /**
+   * Return the score function used to compare these vectors
+   */
+  public abstract ScoreFunction scoreFunction();
+
+  /**
+   * Return the vector value for the current document ID.
+   * It is illegal to call this method when the iterator is not positioned: 
before advancing, or after failing to advance.
+   * The returned array may be shared across calls, re-used, and modified as 
the iterator advances.
+   * @return the vector value
+   */
+  public abstract float[] vectorValue() throws IOException;
+
+  /**
+   * Return the binary encoded vector value for the current document ID. These 
are the bytes corresponding to the float array
+   * in IEEE 754 standard encoding, encoded using little-endian byte order.
+   * It is illegal to call this method when the iterator is not positioned: 
before advancing, or after failing to advance.
+   * The returned storage may be shared across calls, re-used and modified as 
the iterator advances.
+   * @return the binary value
+   */
+  public BytesRef binaryValue() throws IOException {

Review comment:
       I wonder if we should reserve the right to change this binary format?  
It feels like we are exposing implementation details of the underlying Codec 
storage?  E.g. what if Codec does lossy compression (one or two byte floats) in 
the future?

##########
File path: 
lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java
##########
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.VectorWriter;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/**
+ * Writes vector values and knn graphs to index segments.
+ */

Review comment:
       Can we mark as `@lucene.experimental`?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValues.java
##########
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class provides access to per-document floating point vector values 
indexed as {@link
+ * org.apache.lucene.document.VectorField}.
+ */
+public abstract class VectorValues extends DocIdSetIterator {
+
+  /** The maximum length of a vector */
+  public static int MAX_DIMENSIONS = 1024;
+
+  /** Sole constructor */
+  protected VectorValues() {}
+
+  /**
+   * Return the dimension of the vectors
+   */
+  public abstract int dimension();
+
+  /**
+   * TODO: should we use cost() for this? We rely on its always being exactly 
the number
+   * of documents having a value for this field, which is not guaranteed by 
the cost() contract,
+   * but in all the implementations so far they are the same.
+   * @return the number of vectors returned by this iterator
+   */
+  public abstract int size();
+
+  /**
+   * Return the score function used to compare these vectors
+   */
+  public abstract ScoreFunction scoreFunction();
+
+  /**
+   * Return the vector value for the current document ID.
+   * It is illegal to call this method when the iterator is not positioned: 
before advancing, or after failing to advance.
+   * The returned array may be shared across calls, re-used, and modified as 
the iterator advances.
+   * @return the vector value
+   */
+  public abstract float[] vectorValue() throws IOException;

Review comment:
       Hmm, is there an API somewhere to let me get the vector ordinal for the 
current Lucene `docId`?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValues.java
##########
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class provides access to per-document floating point vector values 
indexed as {@link
+ * org.apache.lucene.document.VectorField}.
+ */

Review comment:
       Can we mark as `@lucene.experimental`?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
##########
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.codecs.VectorWriter;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/** Buffers up pending vector value(s) per doc, then flushes when segment 
flushes. */
+class VectorValuesWriter {
+
+  private final FieldInfo fieldInfo;
+  private final Counter iwBytesUsed;
+  private final List<float[]> vectors = new ArrayList<>();
+  private final DocsWithFieldSet docsWithField;
+
+  private int lastDocID = -1;
+
+  private long bytesUsed;
+
+  VectorValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
+    this.fieldInfo = fieldInfo;
+    this.iwBytesUsed = iwBytesUsed;
+    this.docsWithField = new DocsWithFieldSet();
+    this.bytesUsed = docsWithField.ramBytesUsed();
+    if (iwBytesUsed != null) {
+      iwBytesUsed.addAndGet(bytesUsed);
+    }
+  }
+
+  /**
+   * Adds a value for the given document. Only a single value may be added.
+   * @param docID the value is added to this document
+   * @param vectorValue the value to add
+   * @throws IllegalArgumentException if a value has already been added to the 
given document
+   */
+  public void addValue(int docID, float[] vectorValue) {
+    if (docID == lastDocID) {
+      throw new IllegalArgumentException("VectorValuesField \"" + 
fieldInfo.name + "\" appears more than once in this document (only one value is 
allowed per field)");
+    }
+    if (vectorValue.length != fieldInfo.getVectorDimension()) {
+      throw new IllegalArgumentException("Attempt to index a vector of 
dimension " + vectorValue.length +
+          " but \"" + fieldInfo.name + "\" has dimension " + 
fieldInfo.getVectorDimension());
+    }
+    assert docID > lastDocID;
+    docsWithField.add(docID);
+    vectors.add(ArrayUtil.copyOfSubArray(vectorValue, 0, vectorValue.length));
+    updateBytesUsed();
+    lastDocID = docID;
+  }
+
+  private void updateBytesUsed() {
+    final long newBytesUsed = docsWithField.ramBytesUsed()
+            + vectors.size() * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + 
RamUsageEstimator.NUM_BYTES_ARRAY_HEADER)
+            + vectors.size() * vectors.get(0).length * Float.BYTES;
+    if (iwBytesUsed != null) {
+      iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
+    }
+    bytesUsed = newBytesUsed;
+  }
+
+  /**
+   * Flush this field's values to storage, sorting the values in accordance 
with sortMap
+   * @param sortMap specifies the order of documents being flushed, or null if 
they are to be flushed in docid order
+   * @param vectorWriter the Codec's vector writer that handles the actual 
encoding and I/O
+   * @throws IOException if there is an error writing the field and its values
+   */
+  public void flush(Sorter.DocMap sortMap, VectorWriter vectorWriter) throws 
IOException {
+    VectorValues vectorValues = new BufferedVectorValues(docsWithField, 
vectors, fieldInfo.getVectorDimension(), fieldInfo.getVectorScoreFunction());
+    if (sortMap != null) {
+      vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, 
sortMap));
+    } else {
+      vectorWriter.writeField(fieldInfo, vectorValues);
+    }
+  }
+
+  private static class SortingVectorValues extends VectorValues {
+
+    private final VectorValues delegate;
+    private final VectorValues.RandomAccess randomAccess;
+    private final int[] docIdOffsets;
+    private final int[] ordMap;
+    private int docId = -1;
+
+    SortingVectorValues(VectorValues delegate, Sorter.DocMap sortMap) throws 
IOException {
+      this.delegate = delegate;
+      randomAccess = delegate.randomAccess();
+      docIdOffsets = new int[sortMap.size()];
+
+      int offset = 1; // 0 means no vector for this (field, document)
+      int docID;
+      while ((docID = delegate.nextDoc()) != NO_MORE_DOCS) {
+        int newDocID = sortMap.oldToNew(docID);
+        docIdOffsets[newDocID] = offset++;
+      }
+
+      // set up ordMap to map from new dense ordinal to old dense ordinal
+      ordMap = new int[offset - 1];
+      int ord = 0;
+      for (int docIdOffset : docIdOffsets) {
+        if (docIdOffset != 0) {
+          ordMap[ord++] = docIdOffset - 1;
+        }
+      }
+      assert ord == ordMap.length;
+    }
+
+    @Override
+    public int docID() {
+      return docId;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      while (docId < docIdOffsets.length - 1) {
+        ++docId;
+        if (docIdOffsets[docId] != 0) {
+          return docId;
+        }
+      }
+      docId = NO_MORE_DOCS;
+      return docId;
+    }
+
+    @Override
+    public BytesRef binaryValue() throws IOException {
+      return randomAccess.binaryValue(docIdOffsets[docId] - 1);
+    }
+
+    @Override
+    public float[] vectorValue() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int dimension() {
+      return delegate.dimension();
+    }
+
+    @Override
+    public int size() {
+      return delegate.size();
+    }
+
+    @Override
+    public ScoreFunction scoreFunction() {
+      return delegate.scoreFunction();
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long cost() {
+      return size();
+    }
+
+    @Override
+    public RandomAccess randomAccess() {
+      RandomAccess ra = delegate.randomAccess();
+      return new RandomAccess() {
+
+        @Override
+        public int size() {
+          return delegate.size();
+        }
+
+        @Override
+        public int dimension() {
+          return delegate.dimension();
+        }
+
+        @Override
+        public ScoreFunction scoreFunction() {
+          return delegate.scoreFunction();
+        }
+
+        @Override
+        public float[] vectorValue(int targetOrd) throws IOException {
+          return ra.vectorValue(ordMap[targetOrd]);
+        }
+
+        @Override
+        public BytesRef binaryValue(int targetOrd) {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public TopDocs search(float[] target, int k, int fanout) {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
+
+  private static class BufferedVectorValues extends VectorValues implements 
VectorValues.RandomAccess {
+
+    final DocsWithFieldSet docsWithField;
+
+    // These are always the vectors of a VectorValuesWriter, which are copied 
when added to it
+    final List<float[]> vectors;
+    final VectorValues.ScoreFunction scoreFunction;
+    final int dimension;
+
+    final ByteBuffer buffer;
+    final BytesRef binaryValue;
+    final ByteBuffer raBuffer;
+    final BytesRef raBinaryValue;
+
+    DocIdSetIterator docsWithFieldIter;
+    int ord = -1;
+
+    BufferedVectorValues(DocsWithFieldSet docsWithField, List<float[]> 
vectorsArray, int dimension, VectorValues.ScoreFunction scoreFunction) {

Review comment:
       Hmm rename to `vectorsList` or just `vectors`?

##########
File path: lucene/core/src/java/org/apache/lucene/index/VectorValues.java
##########
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Access to per-document vector value.
+ */
+public abstract class VectorValues extends DocIdSetIterator {
+
+  /** The maximum length of a vector */
+  public static int MAX_DIMENSIONS = 1024;
+
+  /** Sole constructor */
+  protected VectorValues() {}
+
+  /**
+   * Return the dimension of the vectors
+   */
+  public abstract int dimension();
+
+  /**
+   * TODO: should we use cost() for this? We rely on its always being exactly 
the number
+   * of documents having a value for this field, which is not guaranteed by 
the cost() contract,
+   * but in all the implementations so far they are the same.
+   * @return the number of vectors returned by this iterator
+   */
+  public abstract int size();
+
+  /**
+   * Return the score function used to compare these vectors
+   */
+  public abstract ScoreFunction scoreFunction();
+
+  /**
+   * Return the vector value for the current document ID.
+   * It is illegal to call this method after the iterator failed to advance.
+   * @return the vector value
+   */
+  public abstract float[] vectorValue() throws IOException;
+
+  /**
+   * Return the binary encoded vector value for the current document ID.
+   * It is illegal to call this method after the iterator failed to advance.
+   * @return the binary value
+   */
+  public BytesRef binaryValue() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Return a random access interface over this iterator's vectors.
+   */
+  public abstract RandomAccess randomAccess();
+
+  /**
+   * Provides random access to vectors by dense ordinal
+   */
+  public interface RandomAccess {
+
+    /**
+     * Return the vector value as a floating point array.
+     * @param targetOrd a valid ordinal, &ge; 0 and &lt; {@link #size()}.
+     */
+    float[] vectorValue(int targetOrd) throws IOException;
+
+    /**
+     * Return the vector value as a byte array; these are the bytes 
corresponding to the float array
+     * encoded using little-endian byte order.
+     * @param targetOrd a valid ordinal, &ge; 0 and &lt; {@link #size()}.
+     */
+    BytesRef binaryValue(int targetOrd) throws IOException;
+
+    /**
+     * Return the k nearest neighbor documents as determined by comparison of 
their vector values
+     * for this field, to the given vector, by the field's score function. If 
the score function is
+     * reversed, lower values indicate nearer vectors, otherwise higher scores 
indicate nearer
+     * vectors. Unlike relevance scores, vector scores may be negative.
+     * @param target the vector-valued query
+     * @param k      the number of docs to return
+     * @param fanout control the accuracy/speed tradeoff - larger values give 
better recall at higher cost
+     * @return the k nearest neighbor documents, along with their 
(scoreFunction-specific) scores.
+     */
+    TopDocs search(float[] target, int k, int fanout) throws IOException;
+  }
+
+  /**
+   * Score function. This is used during indexing and searching of the vectors 
to determine the nearest neighbors.
+   * Score values may be negative. By default high scores indicate nearer 
documents, unless the function is reversed.
+   */
+  public enum ScoreFunction {
+    /** No distance function is used. Note: {@link 
VectorValues.RandomAccess#search(float[], int, int)}

Review comment:
       Thanks, I added a comment.  Really we cannot/should not do that issue 
until just before the release, when we have the "final" vector format we intend 
to release as 9.0.0.

##########
File path: lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
##########
@@ -0,0 +1,650 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.VectorField;
+import org.apache.lucene.index.VectorValues.ScoreFunction;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** Test Indexing/IndexWriter with vectors */
+public class TestVectorValues extends LuceneTestCase {
+
+  private IndexWriterConfig createIndexWriterConfig() {
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    iwc.setCodec(Codec.forName("Lucene90"));
+    return iwc;
+  }
+
+  // Suddenly add vectors to an existing field:
+  public void testUpgradeFieldToVectors() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(newStringField("dim", "foo", Store.NO));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+    }
+  }
+
+  public void testFieldConstructor() {
+    float[] v = new float[1];
+    VectorField field = new VectorField("f", v);
+    assertEquals(1, field.fieldType().vectorDimension());
+    assertEquals(ScoreFunction.EUCLIDEAN, 
field.fieldType().vectorScoreFunction());
+    assertSame(v, field.vectorValue());
+  }
+
+  public void testFieldConstructorExceptions() {
+    expectThrows(IllegalArgumentException.class, () -> new VectorField(null, 
new float[1]));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
null));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[1], null));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[0]));
+    expectThrows(IllegalArgumentException.class, () -> new VectorField("f", 
new float[VectorValues.MAX_DIMENSIONS + 1]));
+  }
+
+  public void testFieldSetValue() {
+    VectorField field = new VectorField("f", new float[1]);
+    float[] v1 = new float[1];
+    field.setVectorValue(v1);
+    assertSame(v1, field.vectorValue());
+    expectThrows(IllegalArgumentException.class, () -> 
field.setVectorValue(new float[2]));
+    expectThrows(NullPointerException.class, () -> field.setVectorValue(null));
+  }
+
+  // Illegal schema change tests:
+
+  public void testIllegalDimChangeTwoDocs() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        // sometimes test with two segments
+        w.commit();
+      }
+
+      Document doc2 = new Document();
+      doc2.add(new VectorField("dim", new float[3], 
ScoreFunction.DOT_PRODUCT));
+      IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+          () -> w.addDocument(doc2));
+      assertEquals("cannot change vector dimension from 4 to 3 for 
field=\"dim\"", expected.getMessage());
+    }
+  }
+
+  public void testIllegalScoreFunctionChange() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        // sometimes test with two segments
+        w.commit();
+      }
+
+      Document doc2 = new Document();
+      doc2.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+      IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+          () -> w.addDocument(doc2));
+      assertEquals("cannot change vector score function from DOT_PRODUCT to 
EUCLIDEAN for field=\"dim\"", expected.getMessage());
+    }
+  }
+
+  public void testIllegalDimChangeTwoWriters() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+
+      try (IndexWriter w2 = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc2 = new Document();
+        doc2.add(new VectorField("dim", new float[1], 
ScoreFunction.DOT_PRODUCT));
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addDocument(doc2));
+        assertEquals("cannot change vector dimension from 4 to 1 for 
field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeTwoWriters() throws Exception {
+    try (Directory dir = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+
+      try (IndexWriter w2 = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc2 = new Document();
+        doc2.add(new VectorField("dim", new float[4], 
ScoreFunction.EUCLIDEAN));
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addDocument(doc2));
+        assertEquals("cannot change vector score function from DOT_PRODUCT to 
EUCLIDEAN for field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesDirectory() throws Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addIndexes(new Directory[]{dir}));
+        assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesDirectory() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+        w2.addDocument(doc);
+        IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+            () -> w2.addIndexes(dir));
+        assertEquals("cannot change vector score function from EUCLIDEAN to 
DOT_PRODUCT for field=\"dim\"", expected.getMessage());
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesCodecReader() throws Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> w2.addIndexes(new CodecReader[]{(CodecReader) 
getOnlyLeafReader(r)}));
+          assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesCodecReader() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> w2.addIndexes(new CodecReader[]{(CodecReader) 
getOnlyLeafReader(r)}));
+          assertEquals("cannot change vector score function from EUCLIDEAN to 
DOT_PRODUCT for field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalDimChangeViaAddIndexesSlowCodecReader() throws 
Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[5], 
ScoreFunction.DOT_PRODUCT));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> TestUtil.addIndexesSlowly(w2, r));
+          assertEquals("cannot change vector dimension from 5 to 4 for 
field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalScoreFunctionChangeViaAddIndexesSlowCodecReader() 
throws Exception {
+    try (Directory dir = newDirectory();
+         Directory dir2 = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], 
ScoreFunction.DOT_PRODUCT));
+        w.addDocument(doc);
+      }
+      try (IndexWriter w2 = new IndexWriter(dir2, createIndexWriterConfig())) {
+        Document doc = new Document();
+        doc.add(new VectorField("dim", new float[4], ScoreFunction.EUCLIDEAN));
+        w2.addDocument(doc);
+        try (DirectoryReader r = DirectoryReader.open(dir)) {
+          IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+              () -> TestUtil.addIndexesSlowly(w2, r));
+          assertEquals("cannot change vector score function from EUCLIDEAN to 
DOT_PRODUCT for field=\"dim\"", expected.getMessage());
+        }
+      }
+    }
+  }
+
+  public void testIllegalMultipleValues() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      doc.add(new VectorField("dim", new float[4], ScoreFunction.DOT_PRODUCT));
+      IllegalArgumentException expected = 
expectThrows(IllegalArgumentException.class,
+          () -> w.addDocument(doc));
+      assertEquals("VectorValuesField \"dim\" appears more than once in this 
document (only one value is allowed per field)",
+          expected.getMessage());
+    }
+  }
+
+  public void testIllegalDimensionTooLarge() throws Exception {
+    try (Directory dir = newDirectory();
+         IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+      Document doc = new Document();
+      expectThrows(IllegalArgumentException.class,
+          () -> doc.add(new VectorField("dim", new 
float[VectorValues.MAX_DIMENSIONS + 1], ScoreFunction.DOT_PRODUCT)));
+
+      Document doc2 = new Document();
+      doc2.add(new VectorField("dim", new float[1], ScoreFunction.EUCLIDEAN));
+      w.addDocument(doc2);
+    }
+  }
+
+  public void testIllegalEmptyVector() throws Exception {

Review comment:
       Thank you for all the corner-case illegal tests!




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [lucene-solr] mikemccand commented on a change in pull request #1930: LUCENE-9322: add VectorValues to new Lucene90 codec

Reply via email to