Skip to content

Commit 1f659da

Browse files
authored
Handle corner in dynamic index with insufficient valid search results (#164)
This PR addresses a rare scenario in dynamic indexes where the search buffer may not be populated with enough results. This typically occurs when a large number of vectors have been deleted, resulting in a sparsely connected graph. To ensure robustness, the buffer is now supplemented with additional results when needed. This is a corner case and is not expected to occur frequently, but handling it improves stability and consistency in edge conditions.
1 parent 91b0816 commit 1f659da

File tree

6 files changed

+60
-16
lines changed

6 files changed

+60
-16
lines changed

include/svs/index/vamana/dynamic_index.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,8 @@ class MutableVamanaIndex {
491491
scratch.buffer,
492492
scratch.scratch,
493493
query,
494-
greedy_search_closure(scratch.prefetch_parameters, cancel)
494+
greedy_search_closure(scratch.prefetch_parameters, cancel),
495+
*this
495496
);
496497
}
497498

@@ -514,7 +515,7 @@ class MutableVamanaIndex {
514515
sp.prefetch_lookahead_, sp.prefetch_step_};
515516

516517
// Legalize search buffer for this search.
517-
if (buffer.target() < num_neighbors) {
518+
if (buffer.target_capacity() < num_neighbors) {
518519
buffer.change_maxsize(num_neighbors);
519520
}
520521
auto scratch = extensions::per_thread_batch_search_setup(data_, distance_);
@@ -527,6 +528,7 @@ class MutableVamanaIndex {
527528
results,
528529
threads::UnitRange{is},
529530
greedy_search_closure(prefetch_parameters, cancel),
531+
*this,
530532
cancel
531533
);
532534
}

include/svs/index/vamana/dynamic_search_buffer.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,14 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
193193
/// @brief Return the number of valid elements currently contained in the buffer.
194194
size_t valid() const { return valid_; }
195195

196-
/// @brief Return the target number of valid candidates.
197-
size_t target() const { return valid_capacity_; }
196+
/// @brief Return the target valid capacity as equivalent to buffer capacity
197+
size_t target_capacity() const { return valid_capacity_; }
198+
199+
/// @brief Return the target valid candidates as equivalent to the search window
200+
size_t target_window() const { return target_valid_; }
198201

199202
/// @brief Return whether or not the buffer contains its target number of candidates.
200-
bool full() const { return valid() == target(); }
203+
bool full() const { return valid() == target_capacity(); }
201204

202205
/// @brief Return the candidate at index `i`.
203206
///
@@ -577,7 +580,7 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
577580
/// If the number of valid candidates is *less* than the target, a negative number
578581
/// is returned.
579582
int64_t slack() const {
580-
return lib::narrow_cast<int64_t>(valid()) - lib::narrow_cast<int64_t>(target());
583+
return lib::narrow_cast<int64_t>(valid()) - lib::narrow_cast<int64_t>(target_capacity());
581584
}
582585

583586
/// Return the index of the first preceding valid candidate beginning at the provided
@@ -597,7 +600,7 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
597600
template <typename Idx, typename Cmp>
598601
std::ostream& operator<<(std::ostream& io, const MutableBuffer<Idx, Cmp>& buffer) {
599602
return io << "MutableBuffer<" << datatype_v<Idx> << ">("
600-
<< "target_valid = " << buffer.target()
603+
<< "target_valid = " << buffer.target_capacity()
601604
<< ", best_unvisited = " << buffer.best_unvisited()
602605
<< ", valid = " << buffer.valid() << ", size = " << buffer.size() << ")";
603606
}

include/svs/index/vamana/extensions.h

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -410,19 +410,44 @@ struct VamanaSingleSearchType {
410410
typename SearchBuffer,
411411
typename Scratch,
412412
typename Query,
413-
typename Search>
413+
typename Search,
414+
typename Index>
414415
void operator()(
415416
const Data& data,
416417
SearchBuffer& search_buffer,
417418
Scratch& scratch,
418419
const Query& query,
419420
const Search& search,
421+
const Index& index,
420422
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
421423
) const {
422-
svs::svs_invoke(*this, data, search_buffer, scratch, query, search, cancel);
424+
svs::svs_invoke(*this, data, search_buffer, scratch, query, search, index, cancel);
423425
}
424426
};
425427

428+
/// In rare cases, the search buffer may not be filled with enough results.
429+
/// This can occur in dynamic indexes when many vectors have been deleted
430+
/// and the graph becomes sparsely connected. It's a corner case and should
431+
/// not happen frequently, but when it does, we may need to supplement the buffer
432+
/// with additional results.
433+
template <typename Index, typename SearchBuffer, typename Query>
434+
void check_and_supplement_search_buffer(
435+
const Index& index, SearchBuffer& search_buffer, const Query& query
436+
) {
437+
if (search_buffer.valid() < search_buffer.target_window() &&
438+
search_buffer.valid() < index.size()) {
439+
for (auto external_id : index.external_ids()) {
440+
auto internal_id = index.translate_external_id(external_id);
441+
auto dist = index.get_distance(external_id, query);
442+
auto builder = index.internal_search_builder();
443+
search_buffer.insert(builder(internal_id, dist));
444+
if (search_buffer.valid() >= search_buffer.target_window()) {
445+
break;
446+
}
447+
}
448+
}
449+
}
450+
426451
/// Customization point object for processing single queries.
427452
inline constexpr VamanaSingleSearchType single_search{};
428453

@@ -434,14 +459,16 @@ template <
434459
typename SearchBuffer,
435460
typename Distance,
436461
typename Query,
437-
typename Search>
462+
typename Search,
463+
typename Index>
438464
SVS_FORCE_INLINE void svs_invoke(
439465
svs::tag_t<single_search>,
440466
const Data& SVS_UNUSED(dataset),
441467
SearchBuffer& search_buffer,
442468
Distance& distance,
443469
const Query& query,
444470
const Search& search,
471+
const Index& index,
445472
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
446473
) {
447474
// Check if request to cancel the search
@@ -451,6 +478,10 @@ SVS_FORCE_INLINE void svs_invoke(
451478
// Perform graph search.
452479
auto accessor = data::GetDatumAccessor();
453480
search(query, accessor, distance, search_buffer);
481+
482+
if constexpr (Index::needs_id_translation) {
483+
check_and_supplement_search_buffer(index, search_buffer, query);
484+
}
454485
}
455486

456487
///
@@ -488,7 +519,8 @@ struct VamanaPerThreadBatchSearchType {
488519
typename Scratch,
489520
data::ImmutableMemoryDataset Queries,
490521
std::integral I,
491-
typename Search>
522+
typename Search,
523+
typename Index>
492524
SVS_FORCE_INLINE void operator()(
493525
const Data& data,
494526
SearchBuffer& search_buffer,
@@ -497,6 +529,7 @@ struct VamanaPerThreadBatchSearchType {
497529
QueryResultView<I>& result,
498530
threads::UnitRange<size_t> thread_indices,
499531
const Search& search,
532+
const Index& index,
500533
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
501534
) const {
502535
svs::svs_invoke(
@@ -508,6 +541,7 @@ struct VamanaPerThreadBatchSearchType {
508541
result,
509542
thread_indices,
510543
search,
544+
index,
511545
cancel
512546
);
513547
}
@@ -523,7 +557,8 @@ template <
523557
typename Distance,
524558
typename Queries,
525559
std::integral I,
526-
typename Search>
560+
typename Search,
561+
typename Index>
527562
void svs_invoke(
528563
svs::tag_t<per_thread_batch_search>,
529564
const Data& dataset,
@@ -533,6 +568,7 @@ void svs_invoke(
533568
QueryResultView<I>& result,
534569
threads::UnitRange<size_t> thread_indices,
535570
const Search& search,
571+
const Index& index,
536572
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
537573
) {
538574
// Fallback implementation
@@ -544,7 +580,7 @@ void svs_invoke(
544580
}
545581
// Perform search - results will be queued in the search buffer.
546582
single_search(
547-
dataset, search_buffer, distance, queries.get_datum(i), search, cancel
583+
dataset, search_buffer, distance, queries.get_datum(i), search, index, cancel
548584
);
549585

550586
// Copy back results.

include/svs/index/vamana/index.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,8 @@ class VamanaIndex {
510510
scratch.buffer,
511511
scratch.scratch,
512512
query,
513-
greedy_search_closure(scratch.prefetch_parameters, cancel)
513+
greedy_search_closure(scratch.prefetch_parameters, cancel),
514+
*this
514515
);
515516
}
516517

@@ -592,6 +593,7 @@ class VamanaIndex {
592593
result,
593594
threads::UnitRange{is},
594595
greedy_search_closure(prefetch_parameters, cancel),
596+
*this,
595597
cancel
596598
);
597599
}

include/svs/index/vamana/iterator.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,8 @@ template <typename Index, typename QueryType> class BatchIterator {
303303
scratchspace_.buffer,
304304
scratchspace_.scratch,
305305
lib::as_const_span(query_),
306-
search_closure
306+
search_closure,
307+
*parent_
307308
);
308309
});
309310

tests/svs/index/vamana/search_buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ CATCH_TEST_CASE("MutableBuffer", "[core][search_buffer]") {
736736
CATCH_SECTION("Full Buffer") {
737737
// We should be able to add elements to the buffer.
738738
// Valid elements should only be appended until 4 have been added.
739-
CATCH_REQUIRE(b.target() == 4);
739+
CATCH_REQUIRE(b.target_capacity() == 4);
740740
CATCH_REQUIRE(b.size() == 0);
741741
CATCH_REQUIRE(b.valid() == 0);
742742
CATCH_REQUIRE(!b.full());

0 commit comments

Comments
 (0)