@@ -129,8 +129,10 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
129129 label_offset_ = size_links_level0_ + data_size_;
130130 offsetLevel0_ = 0 ;
131131
132+ // Allocate 64 more bytes for each chunk so we can safely prefetch a
133+ // cache line beyond the chunk.
132134 data_level0_memory_ = ChunkedArray (
133- size_data_per_element_, k_elements_per_chunk, max_elements);
135+ size_data_per_element_, k_elements_per_chunk, max_elements, 64 );
134136
135137 cur_element_count = 0 ;
136138
@@ -141,7 +143,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
141143 maxlevel_ = -1 ;
142144
143145 linkLists_ = ChunkedArray (
144- sizeof (void *), k_elements_per_chunk, max_elements);
146+ sizeof (void *), k_elements_per_chunk, max_elements, 0 );
145147
146148 size_links_per_element_ = maxM_ * sizeof (tableint) + sizeof (linklistsizeint);
147149 mult_ = 1 / log (1.0 * M_);
@@ -226,7 +228,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
226228 return (data_level0_memory_[internal_id] + offsetData_);
227229 }
228230
229-
230231 int getRandomLevel (double reverse_size) {
231232 std::uniform_real_distribution<double > distribution (0.0 , 1.0 );
232233 double r = -log (distribution (level_generator_)) * reverse_size;
@@ -286,36 +287,26 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
286287 }
287288 size_t size = getListCount ((linklistsizeint*)data);
288289 tableint *datal = (tableint *) (data + 1 );
289- #ifdef USE_SSE
290- #if HNSWLIB_USE_PREFETCH
291- _mm_prefetch ((char *) (visited_array + *(data + 1 )), _MM_HINT_T0);
292- _mm_prefetch ((char *) (visited_array + *(data + 1 ) + 64 ), _MM_HINT_T0);
293- _mm_prefetch (getDataByInternalId (*datal), _MM_HINT_T0);
294- _mm_prefetch (getDataByInternalId (*(datal + 1 )), _MM_HINT_T0);
295- #endif
296- #endif
290+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(data + 1 )), _MM_HINT_T0);
291+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(data + 1 ) + 64 ), _MM_HINT_T0);
292+ HNSWLIB_MM_PREFETCH (getDataByInternalId (*datal), _MM_HINT_T0);
293+ HNSWLIB_MM_PREFETCH (getDataByInternalId (*(datal + 1 )), _MM_HINT_T0);
297294
298295 for (size_t j = 0 ; j < size; j++) {
299296 tableint candidate_id = *(datal + j);
300297// if (candidate_id == 0) continue;
301- #ifdef USE_SSE
302- #if HNSWLIB_USE_PREFETCH
303- _mm_prefetch ((char *) (visited_array + *(datal + j + 1 )), _MM_HINT_T0);
304- _mm_prefetch (getDataByInternalId (*(datal + j + 1 )), _MM_HINT_T0);
305- #endif
306- #endif
298+ if (j + 1 < size) {
299+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(datal + j + 1 )), _MM_HINT_T0);
300+ HNSWLIB_MM_PREFETCH (getDataByInternalId (*(datal + j + 1 )), _MM_HINT_T0);
301+ }
307302 if (visited_array[candidate_id] == visited_array_tag) continue ;
308303 visited_array[candidate_id] = visited_array_tag;
309304 char *currObj1 = (getDataByInternalId (candidate_id));
310305
311306 dist_t dist1 = fstdistfunc_ (data_point, currObj1, dist_func_param_);
312307 if (top_candidates.size () < ef_construction_ || lowerBound > dist1) {
313308 candidateSet.emplace (-dist1, candidate_id);
314- #ifdef USE_SSE
315- #if HNSWLIB_USE_PREFETCH
316- _mm_prefetch (getDataByInternalId (candidateSet.top ().second ), _MM_HINT_T0);
317- #endif
318- #endif
309+ HNSWLIB_MM_PREFETCH (getDataByInternalId (candidateSet.top ().second ), _MM_HINT_T0);
319310
320311 if (!isMarkedDeleted (candidate_id))
321312 top_candidates.emplace (dist1, candidate_id);
@@ -396,25 +387,18 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
396387 metric_distance_computations+=size;
397388 }
398389
399- #ifdef USE_SSE
400- #if HNSWLIB_USE_PREFETCH
401- _mm_prefetch ((char *) (visited_array + *(data + 1 )), _MM_HINT_T0);
402- _mm_prefetch ((char *) (visited_array + *(data + 1 ) + 64 ), _MM_HINT_T0);
403- _mm_prefetch (data_level0_memory_[*(data + 1 )] + offsetData_, _MM_HINT_T0);
404- _mm_prefetch ((char *) (data + 2 ), _MM_HINT_T0);
405- #endif
406- #endif
390+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(data + 1 )), _MM_HINT_T0);
391+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(data + 1 ) + 64 ), _MM_HINT_T0);
392+ HNSWLIB_MM_PREFETCH (data_level0_memory_[*(data + 1 )] + offsetData_, _MM_HINT_T0);
393+ HNSWLIB_MM_PREFETCH ((char *) (data + 2 ), _MM_HINT_T0);
407394
408395 for (size_t j = 1 ; j <= size; j++) {
409396 int candidate_id = *(data + j);
410- // if (candidate_id == 0) continue;
411- #ifdef USE_SSE
412- #if HNSWLIB_USE_PREFETCH
413- _mm_prefetch ((char *) (visited_array + *(data + j + 1 )), _MM_HINT_T0);
414- _mm_prefetch (data_level0_memory_[*(data + j + 1 )] + offsetData_,
415- _MM_HINT_T0); // //////////
416- #endif
417- #endif
397+ if (j < size) {
398+ HNSWLIB_MM_PREFETCH ((char *) (visited_array + *(data + j + 1 )), _MM_HINT_T0);
399+ HNSWLIB_MM_PREFETCH (data_level0_memory_[*(data + j + 1 )] + offsetData_,
400+ _MM_HINT_T0);
401+ }
418402 if (!(visited_array[candidate_id] == visited_array_tag)) {
419403 visited_array[candidate_id] = visited_array_tag;
420404
@@ -430,13 +414,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
430414
431415 if (flag_consider_candidate) {
432416 candidate_set.emplace (-dist, candidate_id);
433- #ifdef USE_SSE
434- #if HNSWLIB_USE_PREFETCH
435- _mm_prefetch (data_level0_memory_[candidate_set.top ().second ] +
417+ HNSWLIB_MM_PREFETCH (data_level0_memory_[candidate_set.top ().second ] +
436418 offsetLevel0_, // /////////
437419 _MM_HINT_T0); // //////////////////////
438- #endif
439- #endif
440420
441421 if (bare_bone_search ||
442422 (!isMarkedDeleted (candidate_id) && ((!isIdAllowed) || (*isIdAllowed)(getExternalLabel (candidate_id))))) {
@@ -822,7 +802,8 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
822802 data_level0_memory_ = ChunkedArray (
823803 size_data_per_element_,
824804 k_elements_per_chunk,
825- max_elements);
805+ max_elements,
806+ 64 );
826807 data_level0_memory_.readFromStream (input, cur_element_count);
827808
828809 size_links_per_element_ = maxM_ * sizeof (tableint) + sizeof (linklistsizeint);
@@ -833,7 +814,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
833814
834815 visited_list_pool_.reset (new VisitedListPool (1 , max_elements));
835816
836- linkLists_.resize (max_elements);
817+ linkLists_ = ChunkedArray (
818+ sizeof (void *), k_elements_per_chunk, max_elements, 0 );
819+
837820 element_levels_ = std::vector<int >(max_elements);
838821 revSize_ = 1.0 / mult_;
839822 ef_ = 10 ;
@@ -1126,17 +1109,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
11261109 data = get_linklist_at_level (currObj, level);
11271110 int size = getListCount (data);
11281111 tableint *datal = (tableint *) (data + 1 );
1129- #ifdef USE_SSE
1130- #if HNSWLIB_USE_PREFETCH
1131- _mm_prefetch (getDataByInternalId (*datal), _MM_HINT_T0);
1132- #endif
1133- #endif
1112+ HNSWLIB_MM_PREFETCH (getDataByInternalId (*datal), _MM_HINT_T0);
11341113 for (int i = 0 ; i < size; i++) {
1135- #ifdef USE_SSE
1136- #if HNSWLIB_USE_PREFETCH
1137- _mm_prefetch (getDataByInternalId (*(datal + i + 1 )), _MM_HINT_T0);
1138- #endif
1139- #endif
1114+ HNSWLIB_MM_PREFETCH (getDataByInternalId (*(datal + i + 1 )), _MM_HINT_T0);
11401115 tableint cand = datal[i];
11411116 dist_t d = fstdistfunc_ (dataPoint, getDataByInternalId (cand), dist_func_param_);
11421117 if (d < curdist) {
@@ -1523,7 +1498,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
15231498 if (isMarkedDeleted (internalId)) {
15241499 unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId)) + 2 ;
15251500 *ll_cur &= ~DELETE_MARK;
1526- num_deleted_ -= 1 ;
1501+ num_deleted_ -= 1 ;
15271502 if (allow_replace_deleted_) {
15281503 std::unique_lock <std::mutex> lock_deleted_elements (deleted_elements_lock);
15291504 deleted_elements.erase (internalId);
0 commit comments