mikemccand commented on issue #13880:
URL: https://github.com/apache/lucene/issues/13880#issuecomment-2402543191
Hmm, well, I coded that up, on top of my PR from #13874, with this diff:
```
raptorlake:912x[fix_back_compat]$ git diff
diff --git
a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java
b/lucene/backward-codecs/src/test/org/apache/lucene/backward_in\
dex/TestInt8HnswBackwardsCompatibility.java
index d4bb2d8b24f..321811b7dc9 100644
---
a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java
+++
b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java
@@ -23,17 +23,22 @@ import java.io.IOException;
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
import
org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
+import
org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.IndexSearcher;
@@ -145,4 +150,19 @@ public class TestInt8HnswBackwardsCompatibility extends
BackwardsCompatibilityTe
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
}
}
+
+ // #13880: make sure the BWC index really contains quantized HNSW not
float32
+ public void testIndexIsReallyQuantized() throws Exception {
+ try (DirectoryReader reader = DirectoryReader.open(directory)) {
+ for (LeafReaderContext leafContext : reader.leaves()) {
+ KnnVectorsReader knnVectorsReader = ((CodecReader)
leafContext.reader()).getVectorReader();
+ assertTrue("expected PerFieldKnnVectorsFormat.FieldsReader but got:
" + knnVectorsReader,
+ knnVectorsReader instanceof
PerFieldKnnVectorsFormat.FieldsReader);
+
+ KnnVectorsReader forField =
((PerFieldKnnVectorsFormat.FieldsReader)
knnVectorsReader).getFieldReader(KNN_VECTOR_FIELD);
+ assertTrue("KnnVectorsReader should be quantized, but got: " +
forField,
+ forField instanceof
Lucene99ScalarQuantizedVectorsReader);
+ }
+ }
+ }
}
```
I wanted to confirm the test will pass with that PR that fixed the BWC
indices to actually use quantized HNSW. But it fails! With exceptions like
this:
```
2> NOTE: reproduce with: gradlew test --tests
TestInt8HnswBackwardsCompatibility.testIndexIsQuantized
-Dtests.seed=848561A0021D6FE4 -Dtests.locale=en-JM -Dtests.timezone=Chile/Con\
tinental -Dtests.asserts=true -Dtests.file.encoding=UTF-8
> java.lang.AssertionError: KnnVectorsReader should be quantized, but
got: org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader@425e674f
> at
__randomizedtesting.SeedInfo.seed([848561A0021D6FE4:B93E033BC6EAF741]:0)
> at [email protected]/org.junit.Assert.fail(Assert.java:89)
> at [email protected]/org.junit.Assert.assertTrue(Assert.java:42)
> at
org.apache.lucene.backward_index.TestInt8HnswBackwardsCompatibility.testIndexIsQuantized(TestInt8HnswBackwardsCompatibility.java:163)
> at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1758)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:946)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:982)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:996)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:48)
> at
[email protected]/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44)
> at
[email protected]/org.junit.rules.RunRules.evaluate(RunRules.java:20)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
[email protected]/com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390)
> at
[email protected]/com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:843)
> at
[email protected]/com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:490)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:955)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:840)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:891)
> at
[email protected]/com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:902)
> at
[email protected]/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53)
> at
[email protected]/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60)
> at
[email protected]/org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47)
> at
[email protected]/org.junit.rules.RunRules.evaluate(RunRules.java:20)
> at
[email protected]/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
[email protected]/com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390)
> at
[email protected]/com.carrotsearch.randomizedtesting.ThreadLeakControl.lambda$forkTimeoutingTask$0(ThreadLeakControl.java:850)
> at java.base/java.lang.Thread.run(Thread.java:829)
```
[Aside: it's annoying that the `Reproduce with:` line does not have
specifics about which of the parameterized versions it ran with ... this seems
like a bug in Randomizedtesting? CC @dweiss. I must scroll up to see which zip
file it is testing...]
So now I am confused ... I need to take a break (I'm at Community Over Code
in Denver, and first session is about to start!!). Maybe after the break I'll
mull and understand why the new test case is failing on the zip files that I
thought I had fixed!!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]