Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions faiss/gpu/impl/PQCodeDistances-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@
namespace faiss {
namespace gpu {

#if defined(USE_AMD_ROCM) && __AMDGCN_WAVEFRONT_SIZE == 64u
#define LAUNCH_BOUND 320
#else
#define LAUNCH_BOUND 288
#endif
__device__ constexpr inline int getLaunchBound() {
return kWarpSize == 32 ? 288 : 320;
}

// Kernel responsible for calculating distance from residual vector to
// each product quantizer code centroid
Expand All @@ -33,7 +31,7 @@ template <
typename CentroidT,
int DimsPerSubQuantizer,
bool L2Distance>
__global__ void __launch_bounds__(LAUNCH_BOUND, 3) pqCodeDistances(
__global__ void __launch_bounds__(getLaunchBound(), 3) pqCodeDistances(
Tensor<float, 2, true> queries,
int queriesPerBlock,
Tensor<CentroidT, 2, true> coarseCentroids,
Expand Down
11 changes: 11 additions & 0 deletions faiss/gpu/utils/DeviceDefs.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,28 @@
#pragma once

#include <cuda.h>
#ifdef USE_AMD_ROCM
#include <rocm-core/rocm_version.h> // ROCm version macros
#if ROCM_VERSION_MAJOR >= 7
#include <rocprim/intrinsics/arch.hpp> // rocprim::arch::wavefront
#endif
#endif

namespace faiss {
namespace gpu {

#ifdef USE_AMD_ROCM

#if ROCM_VERSION_MAJOR < 7
#if __AMDGCN_WAVEFRONT_SIZE == 32u
constexpr int kWarpSize = 32;
#else
constexpr int kWarpSize = 64;
#endif
#else
// ROCm 7.0 and above
constexpr __device__ int kWarpSize = rocprim::arch::wavefront::max_size();
#endif

// This is a memory barrier for intra-warp writes to shared memory.
__forceinline__ __device__ void warpFence() {
Expand Down