diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 399bd8d1a2f5..400b700ca401 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -303,6 +303,8 @@ Other * GITHUB#15481: The `reverse` field of SortField is now final. If you have subclassed SortField, you should set `reverse` in the super constructor. (Alan Woodward) +* GITHUB#15476: Enforce fallback support for float vector retrieval in quantized KNN vector formats. (Pulkit Gupta) + * GITHUB#15513: Update documentation in DefaultBloomFilterFactory to reflect changes made in GITHUB#11900 (Greg Miller) Build diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102BinaryQuantizedVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102BinaryQuantizedVectorsFormat.java index f2b07786967d..db2aaf1f0625 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102BinaryQuantizedVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102BinaryQuantizedVectorsFormat.java @@ -186,4 +186,9 @@ public void testQuantizedVectorsWriteAndRead() throws IOException { } } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102HnswBinaryQuantizedVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102HnswBinaryQuantizedVectorsFormat.java index e7139e93b7c5..4cb007c5f838 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102HnswBinaryQuantizedVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene102/TestLucene102HnswBinaryQuantizedVectorsFormat.java @@ -177,4 +177,9 @@ public void testSimpleOffHeapSize() throws IOException { } } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java index b4840c9fd5b2..271b8046d3ad 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java @@ -88,4 +88,9 @@ public void testMergingWithDifferentByteKnnFields() { public void testMismatchedFields() throws Exception { // requires byte support } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java index 7bf2d426eacb..5d466d2685a4 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java @@ -87,4 +87,9 @@ public void testMergingWithDifferentByteKnnFields() { public void testMismatchedFields() throws Exception { // requires byte support } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java index 192f70a63972..7c2f91966b59 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java @@ -77,4 +77,9 @@ public void testMergingWithDifferentByteKnnFields() { public void testMismatchedFields() throws Exception { // requires byte support } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/TestLucene94HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/TestLucene94HnswVectorsFormat.java index 393c4a427e25..1335f58e9bde 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/TestLucene94HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene94/TestLucene94HnswVectorsFormat.java @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { "Lucene94RWHnswVectorsFormat(name=Lucene94RWHnswVectorsFormat, maxConn=10, beamWidth=20)"; assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString()); } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/TestLucene95HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/TestLucene95HnswVectorsFormat.java index a080e3bff7f7..00572e5995c8 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/TestLucene95HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene95/TestLucene95HnswVectorsFormat.java @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { "Lucene95RWHnswVectorsFormat(name=Lucene95RWHnswVectorsFormat, maxConn=10, beamWidth=20)"; assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString()); } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java index fb2ca112a0ab..6d9679428112 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java @@ -369,4 +369,9 @@ public void testVectorSimilarityFuncs() { var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList(); assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues); } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswScalarQuantizedVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswScalarQuantizedVectorsFormat.java index e2019719792f..948ada4ac3da 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswScalarQuantizedVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99HnswScalarQuantizedVectorsFormat.java @@ -64,4 +64,9 @@ public void testSimpleOffHeapSize() throws IOException { } } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java index ee9765f2ac0e..2244b71d7bf6 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java @@ -35,7 +35,6 @@ import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -48,7 +47,6 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; -import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.quantization.QuantizedByteVectorValues; @@ -86,21 +84,6 @@ private Codec getCodec(float confidenceInterval) { confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false)); } - protected List getRandomFloatVector(int numVectors, int dim, boolean normalize) { - List vectors = new ArrayList<>(numVectors); - for (int i = 0; i < numVectors; i++) { - float[] vec = randomVector(dim); - if (normalize) { - float[] copy = new float[vec.length]; - System.arraycopy(vec, 0, copy, 0, copy.length); - VectorUtil.l2normalize(copy); - vec = copy; - } - vectors.add(vec); - } - return vectors; - } - public void testSearch() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { @@ -219,75 +202,74 @@ public void testQuantizedVectorsWriteAndRead() throws Exception { } } - public void testReadQuantizedVectorWithEmptyRawVectors() throws Exception { - String vectorFieldName = "vec1"; - int numVectors = 1 + random().nextInt(50); - int dim = random().nextInt(64) + 1; - if (dim % 2 == 1) { - dim++; - } - VectorSimilarityFunction similarityFunction = randomSimilarity(); - List vectors = - getRandomFloatVector( - numVectors, dim, similarityFunction == VectorSimilarityFunction.COSINE); + public void testToString() { + FilterCodec customCodec = + new FilterCodec("foo", Codec.getDefault()) { + @Override + public KnnVectorsFormat knnVectorsFormat() { + return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false); + } + }; + String expectedPattern = + "Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, confidenceInterval=0.9, bits=4, compress=false, flatVectorScorer=%s, rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s))"; + var defaultScorer = + format( + Locale.ROOT, + expectedPattern, + "ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())", + "DefaultFlatVectorScorer()"); + var memSegScorer = + format( + Locale.ROOT, + expectedPattern, + "Lucene99MemorySegmentScalarQuantizedVectorScorer()", + "Lucene99MemorySegmentFlatVectorsScorer()"); + assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer))); + } - try (BaseDirectoryWrapper dir = newDirectory(); - IndexWriter w = - new IndexWriter( - dir, - new IndexWriterConfig() - .setMaxBufferedDocs(numVectors + 1) - .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) - .setMergePolicy(NoMergePolicy.INSTANCE) - .setUseCompoundFile(false) - .setCodec(getCodec(1f)))) { - dir.setCheckIndexOnClose(false); + public void testLimits() { + expectThrows( + IllegalArgumentException.class, + () -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false)); + expectThrows( + IllegalArgumentException.class, + () -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false)); + expectThrows( + IllegalArgumentException.class, + () -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false)); + expectThrows( + IllegalArgumentException.class, + () -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false)); + } - for (int i = 0; i < numVectors; i++) { - Document doc = new Document(); - doc.add(new KnnFloatVectorField(vectorFieldName, vectors.get(i), similarityFunction)); - w.addDocument(doc); - } - w.commit(); + @Override + public void testRandomWithUpdatesAndGraph() { + // graph not supported + } - simulateEmptyRawVectors(dir); + @Override + public void testSearchWithVisitedLimit() { + // search not supported + } - try (IndexReader reader = DirectoryReader.open(w)) { - LeafReader r = getOnlyLeafReader(reader); - if (r instanceof CodecReader codecReader) { - KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); - knnVectorsReader = knnVectorsReader.unwrapReaderForField(vectorFieldName); - if (knnVectorsReader instanceof Lucene99ScalarQuantizedVectorsReader quantizedReader) { - FloatVectorValues floatVectorValues = - quantizedReader.getFloatVectorValues(vectorFieldName); - if (floatVectorValues instanceof OffHeapQuantizedFloatVectorValues) { - KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator(); - for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) { - float[] dequantizedVector = floatVectorValues.vectorValue(iter.index()); - for (int i = 0; i < dim; i++) { - assertEquals( - "docId=" + docId + " i=" + i, - dequantizedVector[i], - vectors.get(docId)[i], - 0.2f); - } - } - } else { - fail("floatVectorValues is not OffHeapQuantizedFloatVectorValues"); - } - } else { - System.out.println("Vector READER:: " + knnVectorsReader.toString()); - fail("reader is not Lucene99ScalarQuantizedVectorsReader"); - } - } else { - fail("reader is not CodecReader"); - } - } - } + @Override + protected boolean supportsFloatVectorFallback() { + return true; + } + + @Override + protected int getQuantizationBits() { + return bits; + } + + @Override + protected Codec getCodecForFloatVectorFallbackTest() { + return getCodec(1f); } /** Simulates empty raw vectors by modifying index files. */ - private void simulateEmptyRawVectors(Directory dir) throws Exception { + @Override + protected void simulateEmptyRawVectors(Directory dir) throws Exception { final String[] indexFiles = dir.listAll(); final String RAW_VECTOR_EXTENSION = "vec"; final String VECTOR_META_EXTENSION = "vemf"; @@ -357,54 +339,4 @@ private void updateVectorMetadataFile(Directory dir, String fileName) throws Exc CodecUtil.writeFooter(out); } } - - public void testToString() { - FilterCodec customCodec = - new FilterCodec("foo", Codec.getDefault()) { - @Override - public KnnVectorsFormat knnVectorsFormat() { - return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false); - } - }; - String expectedPattern = - "Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, confidenceInterval=0.9, bits=4, compress=false, flatVectorScorer=%s, rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s))"; - var defaultScorer = - format( - Locale.ROOT, - expectedPattern, - "ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())", - "DefaultFlatVectorScorer()"); - var memSegScorer = - format( - Locale.ROOT, - expectedPattern, - "Lucene99MemorySegmentScalarQuantizedVectorScorer()", - "Lucene99MemorySegmentFlatVectorsScorer()"); - assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer))); - } - - public void testLimits() { - expectThrows( - IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false)); - expectThrows( - IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false)); - expectThrows( - IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false)); - expectThrows( - IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false)); - } - - @Override - public void testRandomWithUpdatesAndGraph() { - // graph not supported - } - - @Override - public void testSearchWithVisitedLimit() { - // search not supported - } } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextKnnVectorsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextKnnVectorsFormat.java index ea12e789cb4b..d9de99f82349 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextKnnVectorsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextKnnVectorsFormat.java @@ -41,4 +41,9 @@ public void testRandomBytes() throws Exception { public void testSortedIndexBytes() throws Exception { // unimplemented } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java index 4a4652e402de..40a38497ba96 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104HnswScalarQuantizedVectorsFormat.java @@ -206,4 +206,9 @@ public void testSimpleOffHeapSize() throws IOException { } } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java index 32d5b07ad3e0..43755f8f220c 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java @@ -23,19 +23,15 @@ import static org.hamcrest.Matchers.oneOf; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Locale; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding; import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; -import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -43,7 +39,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.KnnFloatVectorQuery; @@ -55,9 +50,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; -import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.util.TestUtil; -import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; import org.junit.Before; @@ -218,91 +211,19 @@ public void testQuantizedVectorsWriteAndRead() throws IOException { } } - protected List getRandomFloatVector(int numVectors, int dim, boolean normalize) { - List vectors = new ArrayList<>(numVectors); - for (int i = 0; i < numVectors; i++) { - float[] vec = randomVector(dim); - if (normalize) { - float[] copy = new float[vec.length]; - System.arraycopy(vec, 0, copy, 0, copy.length); - VectorUtil.l2normalize(copy); - vec = copy; - } - vectors.add(vec); - } - return vectors; + @Override + protected boolean supportsFloatVectorFallback() { + return true; } - public void testReadQuantizedVectorWithEmptyRawVectors() throws Exception { - String vectorFieldName = "vec1"; - int numVectors = 1 + random().nextInt(50); - int dim = random().nextInt(64) + 1; - if (dim % 2 == 1) { - dim++; - } - float eps = (1f / (float) (1 << (encoding.getBits()))); - VectorSimilarityFunction similarityFunction = randomSimilarity(); - List vectors = - getRandomFloatVector( - numVectors, dim, similarityFunction == VectorSimilarityFunction.COSINE); - - try (BaseDirectoryWrapper dir = newDirectory(); - IndexWriter w = - new IndexWriter( - dir, - new IndexWriterConfig() - .setMaxBufferedDocs(numVectors + 1) - .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) - .setMergePolicy(NoMergePolicy.INSTANCE) - .setUseCompoundFile(false) - .setCodec(getCodec()))) { - dir.setCheckIndexOnClose(false); - - for (int i = 0; i < numVectors; i++) { - Document doc = new Document(); - doc.add(new KnnFloatVectorField(vectorFieldName, vectors.get(i), similarityFunction)); - w.addDocument(doc); - } - w.commit(); - - simulateEmptyRawVectors(dir); - - try (IndexReader reader = DirectoryReader.open(w)) { - LeafReader r = getOnlyLeafReader(reader); - if (r instanceof CodecReader codecReader) { - KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); - knnVectorsReader = knnVectorsReader.unwrapReaderForField(vectorFieldName); - if (knnVectorsReader instanceof Lucene104ScalarQuantizedVectorsReader quantizedReader) { - FloatVectorValues floatVectorValues = - quantizedReader.getFloatVectorValues(vectorFieldName); - if (floatVectorValues instanceof OffHeapScalarQuantizedFloatVectorValues) { - KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator(); - for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) { - float[] dequantizedVector = floatVectorValues.vectorValue(iter.index()); - float mae = 0; - for (int i = 0; i < dim; i++) { - mae += Math.abs(dequantizedVector[i] - vectors.get(docId)[i]); - } - mae /= dim; - assertTrue( - "bits: " + encoding.getBits() + " mae: " + mae + " > eps: " + eps, mae <= eps); - } - } else { - fail("floatVectorValues is not OffHeapScalarQuantizedFloatVectorValues"); - } - } else { - System.out.println("Vector READER:: " + knnVectorsReader.toString()); - fail("reader is not Lucene104ScalarQuantizedVectorsReader"); - } - } else { - fail("reader is not CodecReader"); - } - } - } + @Override + protected int getQuantizationBits() { + return encoding.getBits(); } /** Simulates empty raw vectors by modifying index files. */ - private void simulateEmptyRawVectors(Directory dir) throws Exception { + @Override + protected void simulateEmptyRawVectors(Directory dir) throws Exception { final String[] indexFiles = dir.listAll(); final String RAW_VECTOR_EXTENSION = "vec"; final String VECTOR_META_EXTENSION = "vemf"; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java index 54ecccaa28f3..fb85f1c51595 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java @@ -95,4 +95,9 @@ public void testSimpleOffHeapSize() throws IOException { } } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormatV0.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormatV0.java index e30cafdaf680..57e64c30f565 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormatV0.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormatV0.java @@ -33,4 +33,9 @@ protected Codec getCodec() { new Lucene99HnswVectorsFormat( DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null, 0)); } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java index 5d7ccdb3055d..60548dd4e356 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java @@ -340,4 +340,9 @@ public int getMaxDimensions(String fieldName) { return 32; } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java index f66eae680db1..bd4bcf9c70fa 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java @@ -138,4 +138,9 @@ public void testLargeVectorData() throws IOException { Collections.nCopies(numDocs, List.of(new KnnFloatVectorField("vector", largeVector)))); } } + + @Override + protected boolean supportsFloatVectorFallback() { + return false; + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java index dacfeeaf2661..8d338bc9d494 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -94,6 +95,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.tests.codecs.asserting.AssertingKnnVectorsFormat; +import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -123,6 +125,20 @@ public void init() { similarityFunction = randomSimilarity(); } + protected abstract boolean supportsFloatVectorFallback(); + + /** + * Returns the number of bits used for quantization to compute epsilon tolerance of float + * quantization errors in test cases. Default is 8 bits, override in subclasses if needed + */ + protected int getQuantizationBits() { + return 8; + } + + protected Codec getCodecForFloatVectorFallbackTest() { + return getCodec(); // Default implementation + } + @Override protected void addRandomFields(Document doc) { switch (vectorEncoding) { @@ -1913,6 +1929,96 @@ public void testVectorValuesReportCorrectDocs() throws Exception { } } + private List getRandomFloatVector(int numVectors, int dim, boolean normalize) { + List vectors = new ArrayList<>(numVectors); + for (int i = 0; i < numVectors; i++) { + float[] vec = randomVector(dim); + if (normalize) { + VectorUtil.l2normalize(vec); + } + vectors.add(vec); + } + return vectors; + } + + /** + * Tests reading quantized vectors when raw vector data is empty. Verifies that scalar quantized + * formats can properly dequantize vectors and maintain accuracy within expected error bounds even + * when the original raw vector file is empty or corrupted. + */ + public void testReadQuantizedVectorWithEmptyRawVectors() throws Exception { + assumeTrue("Test only applies to scalar quantized formats", supportsFloatVectorFallback()); + + String vectorFieldName = "vec1"; + int numVectors = 1 + random().nextInt(50); + int dim = random().nextInt(64) + 1; + if (dim % 2 == 1) { + dim++; + } + float eps = (1f / (float) (1 << getQuantizationBits())); + VectorSimilarityFunction similarityFunction = randomSimilarity(); + List vectors = + getRandomFloatVector( + numVectors, dim, similarityFunction == VectorSimilarityFunction.COSINE); + + try (BaseDirectoryWrapper dir = newDirectory()) { + dir.setCheckIndexOnClose(false); + + try (IndexWriter w = + new IndexWriter( + dir, + new IndexWriterConfig() + .setMaxBufferedDocs(numVectors + 1) + .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setMergePolicy(NoMergePolicy.INSTANCE) + .setUseCompoundFile(false) + .setCodec(getCodecForFloatVectorFallbackTest()))) { + for (int i = 0; i < numVectors; i++) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField(vectorFieldName, vectors.get(i), similarityFunction)); + w.addDocument(doc); + } + } + simulateEmptyRawVectors(dir); + + try (IndexReader reader = DirectoryReader.open(dir)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + knnVectorsReader = knnVectorsReader.unwrapReaderForField(vectorFieldName); + FloatVectorValues floatVectorValues = + knnVectorsReader.getFloatVectorValues(vectorFieldName); + if (floatVectorValues.size() > 0) { + KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator(); + for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) { + float[] dequantizedVector = floatVectorValues.vectorValue(iter.index()); + float mae = 0; + for (int i = 0; i < dim; i++) { + mae += Math.abs(dequantizedVector[i] - vectors.get(docId)[i]); + } + mae /= dim; + assertTrue( + "bits: " + getQuantizationBits() + " mae: " + mae + " > eps: " + eps, mae <= eps); + } + } else { + fail("floatVectorValues size should be non zero"); + } + } else { + fail("reader is not CodecReader"); + } + } + } + } + + /** + * Simulates empty raw vectors by modifying index files. Override in codecs that support + * FloatVector fallback. + */ + protected void simulateEmptyRawVectors(Directory dir) throws Exception { + throw new Exception( + "simulateEmptyRawVectors must be implemented by codecs that support FloatVector fallback"); + } + public void testMismatchedFields() throws Exception { Directory dir1 = newDirectory(); IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());