Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ project(faiss
DESCRIPTION "A library for efficient similarity search and clustering of dense vectors."
HOMEPAGE_URL "https://github.com/facebookresearch/faiss"
LANGUAGES ${FAISS_LANGUAGES})

# Force OpenMP_CUDA so MSVC+CUDA builds don’t abort when FAISS_ENABLE_GPU=ON
if(FAISS_ENABLE_GPU)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am just starting the CI workflows now, but I think we would want these blocks only if compiling on Windows.

set(OpenMP_CUDA_FOUND TRUE CACHE BOOL "Force OpenMP CUDA lookup" FORCE)
set(OpenMP_CUDA_FLAGS "" CACHE STRING "Empty OpenMP CUDA flags" FORCE)
set(OpenMP_CUDA_LIB_NAMES "" CACHE STRING "Empty OpenMP CUDA libs" FORCE)
endif()


include(GNUInstallDirs)

set(CMAKE_CXX_STANDARD 17)
Expand Down
8 changes: 7 additions & 1 deletion faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,13 @@ if(FAISS_USE_LTO)
endif()
endif()

find_package(OpenMP REQUIRED)
#find_package(OpenMP REQUIRED)
find_package(OpenMP REQUIRED COMPONENTS CXX)
find_package(OpenMP OPTIONAL_COMPONENTS CUDA)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CUDA may not be included for non GPU builds, so similar to above, if you need OpenMP for Windows CUDA builds, let's add a condition

if(NOT OpenMP_CUDA_FOUND AND FAISS_ENABLE_GPU)
message(WARNING "OpenMP CUDA‐offload not found; building without it.")
endif()

target_link_libraries(faiss PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(faiss_avx2 PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(faiss_avx512 PRIVATE OpenMP::OpenMP_CXX)
Expand Down
10 changes: 5 additions & 5 deletions faiss/gpu/impl/IVFPQ.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ void IVFPQ::appendVectors_(
// (vec x numSubQuantizer x dimPerSubQuantizer)
// transpose to
// (numSubQuantizer x vec x dimPerSubQuantizer)
auto residualsView = ivfCentroidResiduals.view<3>(
auto residualsView = ivfCentroidResiduals.template view<3>(
{ivfCentroidResiduals.getSize(0),
numSubQuantizers_,
dimPerSubQuantizer_});
Expand Down Expand Up @@ -217,7 +217,7 @@ void IVFPQ::appendVectors_(

// Now, we have the nearest sub-q centroid for each slice of the
// residual vector.
auto closestSubQIndex8View = closestSubQIndex8.view<2>(
auto closestSubQIndex8View = closestSubQIndex8.template view<2>(
{numSubQuantizers_, ivfCentroidResiduals.getSize(0)});

// The encodings are finally a transpose of this data
Expand Down Expand Up @@ -443,7 +443,7 @@ void IVFPQ::precomputeCodes_(Index* quantizer) {

// View (centroid id)(sub q)(code id) as
// (centroid id)(sub q * code id)
auto coarsePQProductTransposedView = coarsePQProductTransposed.view<2>(
auto coarsePQProductTransposedView = coarsePQProductTransposed.template view<2>(
{ivfCentroids_.getSize(0),
numSubQuantizers_ * numSubQuantizerCodes_});

Expand All @@ -453,7 +453,7 @@ void IVFPQ::precomputeCodes_(Index* quantizer) {
{
// Compute ||y_R||^2 by treating
// (sub q)(code id)(sub dim) as (sub q * code id)(sub dim)
auto pqCentroidsMiddleCodeView = pqCentroidsMiddleCode_.view<2>(
auto pqCentroidsMiddleCodeView = pqCentroidsMiddleCode_.template view<2>(
{numSubQuantizers_ * numSubQuantizerCodes_,
dimPerSubQuantizer_});
DeviceTensor<float, 1, true> subQuantizerNorms(
Expand Down Expand Up @@ -639,7 +639,7 @@ void IVFPQ::runPQPrecomputedCodes_(
// These allocations within are only temporary, so release them when
// we're done to maximize free space
{
auto querySubQuantizerView = queries.view<3>(
auto querySubQuantizerView = queries.template view<3>(
{queries.getSize(0), numSubQuantizers_, dimPerSubQuantizer_});
DeviceTensor<float, 3, true> queriesTransposed(
resources_,
Expand Down
12 changes: 6 additions & 6 deletions faiss/gpu/impl/PQCodeDistances-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ void runPQCodeDistancesMM(
// Perform a batch MM:
// (sub q) x {(q * c)(sub dim) x (sub dim)(code)} =>
// (sub q) x {(q * c)(code)}
auto residualView3 = residual.view<3>(
auto residualView3 = residual.template view<3>(
{pqCentroids.getSize(0),
coarseIndices.getSize(0) * coarseIndices.getSize(1),
pqCentroids.getSize(1)});
Expand Down Expand Up @@ -490,15 +490,15 @@ void runPQCodeDistancesMM(
{pqCentroids.getSize(0) * coarseIndices.getSize(0) *
coarseIndices.getSize(1)});

auto residualView2 = residual.view<2>(
auto residualView2 = residual.template view<2>(
{pqCentroids.getSize(0) * coarseIndices.getSize(0) *
coarseIndices.getSize(1),
pqCentroids.getSize(1)});

runL2Norm(residualView2, true, residualNorms, true, stream);

// Sum ||q - c||^2 along rows
auto residualDistanceView2 = residualDistance.view<2>(
auto residualDistanceView2 = residualDistance.template view<2>(
{pqCentroids.getSize(0) * coarseIndices.getSize(0) *
coarseIndices.getSize(1),
pqCentroids.getSize(2)});
Expand All @@ -509,7 +509,7 @@ void runPQCodeDistancesMM(
// Transpose (sub q)(q * c)(code) to (q * c)(sub q)(code) (which
// is where we build our output distances). L2 version of this has an added
// -2 multiplicative factor
auto outCodeDistancesView = outCodeDistancesF.view<3>(
auto outCodeDistancesView = outCodeDistancesF.template view<3>(
{coarseIndices.getSize(0) * coarseIndices.getSize(1),
outCodeDistances.getSize(2),
outCodeDistances.getSize(3)});
Expand All @@ -529,7 +529,7 @@ void runPQCodeDistancesMM(

runTransposeAny(pqCentroids, 1, 2, pqCentroidsTranspose, stream);

auto pqCentroidsTransposeView = pqCentroidsTranspose.view<2>(
auto pqCentroidsTransposeView = pqCentroidsTranspose.template view<2>(
{pqCentroids.getSize(0) * pqCentroids.getSize(2),
pqCentroids.getSize(1)});

Expand All @@ -544,7 +544,7 @@ void runPQCodeDistancesMM(

// View output as (q * c)(sub q * code), and add centroid norm to
// each row
auto outDistancesCodeViewCols = outCodeDistancesView.view<2>(
auto outDistancesCodeViewCols = outCodeDistancesView.template view<2>(
{coarseIndices.getSize(0) * coarseIndices.getSize(1),
outCodeDistances.getSize(2) * outCodeDistances.getSize(3)});

Expand Down
Loading