Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ Other
* GITHUB#15481: The `reverse` field of SortField is now final. If you have subclassed SortField,
you should set `reverse` in the super constructor. (Alan Woodward)

* GITHUB#15476: Enforce fallback support for float vector retrieval in quantized KNN vector formats. (Pulkit Gupta)

* GITHUB#15513: Update documentation in DefaultBloomFilterFactory to reflect changes made in GITHUB#11900 (Greg Miller)

Build
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,9 @@ public void testQuantizedVectorsWriteAndRead() throws IOException {
}
}
}

@Override
protected boolean supportsFloatVectorFallback() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first I thought "huh, maybe base class should just have default return false; impl", but then realized that's bad: we want new formats to have to explicitly think about this question "do I support regenerated float[] from compressed/quantized forms" rather than inherit dangerous default (which would risk losing the feature again).

If Lucene had PQ (product quantization) working in a new KnnVectorsWriter, which I think is both dimensionality reducing, and quantizing scalar values, it could in theory re-hydrate float[] I think?

Copy link
Contributor Author

@Pulkitg64 Pulkitg64 Dec 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we want new formats to have to explicitly think about this question

Yes that's right :) and If any new test class implements the function and returns true, then the test case will fail with empty float vector error. This would trigger the user to implement fallback in the codec.

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene94RWHnswVectorsFormat(name=Lucene94RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene95RWHnswVectorsFormat(name=Lucene95RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,9 @@ public void testVectorSimilarityFuncs() {
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
Expand All @@ -48,7 +47,6 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
Expand Down Expand Up @@ -86,21 +84,6 @@ private Codec getCodec(float confidenceInterval) {
confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false));
}

protected List<float[]> getRandomFloatVector(int numVectors, int dim, boolean normalize) {
List<float[]> vectors = new ArrayList<>(numVectors);
for (int i = 0; i < numVectors; i++) {
float[] vec = randomVector(dim);
if (normalize) {
float[] copy = new float[vec.length];
System.arraycopy(vec, 0, copy, 0, copy.length);
VectorUtil.l2normalize(copy);
vec = copy;
}
vectors.add(vec);
}
return vectors;
}

public void testSearch() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Expand Down Expand Up @@ -219,75 +202,74 @@ public void testQuantizedVectorsWriteAndRead() throws Exception {
}
}

public void testReadQuantizedVectorWithEmptyRawVectors() throws Exception {
String vectorFieldName = "vec1";
int numVectors = 1 + random().nextInt(50);
int dim = random().nextInt(64) + 1;
if (dim % 2 == 1) {
dim++;
}
VectorSimilarityFunction similarityFunction = randomSimilarity();
List<float[]> vectors =
getRandomFloatVector(
numVectors, dim, similarityFunction == VectorSimilarityFunction.COSINE);
public void testToString() {
FilterCodec customCodec =
new FilterCodec("foo", Codec.getDefault()) {
@Override
public KnnVectorsFormat knnVectorsFormat() {
return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false);
}
};
String expectedPattern =
"Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, confidenceInterval=0.9, bits=4, compress=false, flatVectorScorer=%s, rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s))";
var defaultScorer =
format(
Locale.ROOT,
expectedPattern,
"ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())",
"DefaultFlatVectorScorer()");
var memSegScorer =
format(
Locale.ROOT,
expectedPattern,
"Lucene99MemorySegmentScalarQuantizedVectorScorer()",
"Lucene99MemorySegmentFlatVectorsScorer()");
assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer)));
}

try (BaseDirectoryWrapper dir = newDirectory();
IndexWriter w =
new IndexWriter(
dir,
new IndexWriterConfig()
.setMaxBufferedDocs(numVectors + 1)
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(NoMergePolicy.INSTANCE)
.setUseCompoundFile(false)
.setCodec(getCodec(1f)))) {
dir.setCheckIndexOnClose(false);
public void testLimits() {
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false));
}

for (int i = 0; i < numVectors; i++) {
Document doc = new Document();
doc.add(new KnnFloatVectorField(vectorFieldName, vectors.get(i), similarityFunction));
w.addDocument(doc);
}
w.commit();
@Override
public void testRandomWithUpdatesAndGraph() {
// graph not supported
}

simulateEmptyRawVectors(dir);
@Override
public void testSearchWithVisitedLimit() {
// search not supported
}

try (IndexReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
if (r instanceof CodecReader codecReader) {
KnnVectorsReader knnVectorsReader = codecReader.getVectorReader();
knnVectorsReader = knnVectorsReader.unwrapReaderForField(vectorFieldName);
if (knnVectorsReader instanceof Lucene99ScalarQuantizedVectorsReader quantizedReader) {
FloatVectorValues floatVectorValues =
quantizedReader.getFloatVectorValues(vectorFieldName);
if (floatVectorValues instanceof OffHeapQuantizedFloatVectorValues) {
KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) {
float[] dequantizedVector = floatVectorValues.vectorValue(iter.index());
for (int i = 0; i < dim; i++) {
assertEquals(
"docId=" + docId + " i=" + i,
dequantizedVector[i],
vectors.get(docId)[i],
0.2f);
}
}
} else {
fail("floatVectorValues is not OffHeapQuantizedFloatVectorValues");
}
} else {
System.out.println("Vector READER:: " + knnVectorsReader.toString());
fail("reader is not Lucene99ScalarQuantizedVectorsReader");
}
} else {
fail("reader is not CodecReader");
}
}
}
@Override
protected boolean supportsFloatVectorFallback() {
return true;
}

@Override
protected int getQuantizationBits() {
return bits;
}

@Override
protected Codec getCodecForFloatVectorFallbackTest() {
return getCodec(1f);
}

/** Simulates empty raw vectors by modifying index files. */
private void simulateEmptyRawVectors(Directory dir) throws Exception {
@Override
protected void simulateEmptyRawVectors(Directory dir) throws Exception {
final String[] indexFiles = dir.listAll();
final String RAW_VECTOR_EXTENSION = "vec";
final String VECTOR_META_EXTENSION = "vemf";
Expand Down Expand Up @@ -357,54 +339,4 @@ private void updateVectorMetadataFile(Directory dir, String fileName) throws Exc
CodecUtil.writeFooter(out);
}
}

public void testToString() {
FilterCodec customCodec =
new FilterCodec("foo", Codec.getDefault()) {
@Override
public KnnVectorsFormat knnVectorsFormat() {
return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false);
}
};
String expectedPattern =
"Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, confidenceInterval=0.9, bits=4, compress=false, flatVectorScorer=%s, rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=%s))";
var defaultScorer =
format(
Locale.ROOT,
expectedPattern,
"ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())",
"DefaultFlatVectorScorer()");
var memSegScorer =
format(
Locale.ROOT,
expectedPattern,
"Lucene99MemorySegmentScalarQuantizedVectorScorer()",
"Lucene99MemorySegmentFlatVectorsScorer()");
assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer)));
}

public void testLimits() {
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false));
expectThrows(
IllegalArgumentException.class,
() -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false));
}

@Override
public void testRandomWithUpdatesAndGraph() {
// graph not supported
}

@Override
public void testSearchWithVisitedLimit() {
// search not supported
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,9 @@ public void testRandomBytes() throws Exception {
public void testSortedIndexBytes() throws Exception {
// unimplemented
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean supportsFloatVectorFallback() {
return false;
}
}
Loading