Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "(gdb) Debug build_memory_index",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/apps/build_memory_index",
"args": [
"--data_type",
"float",
"--dist_fn",
"l2",
"--data_path",
"data/sift/sift_learn.fbin",
"--index_path_prefix",
"data/sift/index_sift_learn_R32_L50_A1.2",
"-R",
"32",
"-L",
"50",
"--alpha",
"1.2"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/build",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
],
"preLaunchTask": "build-debug"
},
{
"name": "(gdb) Launch simple_test",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/apps/simple_test",
"args": [
"${workspaceFolder}/test_data.bin"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
],
"preLaunchTask": "generate-test-data"
},
{
"name": "(gdb) Launch test_streaming_scenario",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/apps/test_streaming_scenario",
"args": [
"--data_type",
"float",
"--dist_fn",
"l2",
"--index_path_prefix",
"test_index",
"--data_path",
"YOUR_DATA_PATH.bin",
"--active_window",
"10000",
"--consolidate_interval",
"1000",
"--start_point_norm",
"1.0"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
],
"preLaunchTask": "build-debug"
}
]
}
75 changes: 75 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"files.associations": {
"iostream": "cpp",
"ostream": "cpp",
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"any": "cpp",
"array": "cpp",
"atomic": "cpp",
"strstream": "cpp",
"bit": "cpp",
"bitset": "cpp",
"chrono": "cpp",
"codecvt": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"condition_variable": "cpp",
"cstdint": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"set": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"unordered_set": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"fstream": "cpp",
"future": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"mutex": "cpp",
"new": "cpp",
"numbers": "cpp",
"semaphore": "cpp",
"shared_mutex": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"stop_token": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"typeindex": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}
29 changes: 29 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "build-debug",
"type": "shell",
//"command": "rm -rf build && mkdir build && cd build && cmake .. && make -j",
"command": "cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j",
"group": {
"kind": "build",
"isDefault": true
},
"problemMatcher": [
"$gcc"
],
"detail": "Clean, configure and build the DiskANN project for debugging."
},
{
"label": "generate-test-data",
"type": "shell",
"command": "${workspaceFolder}/build/apps/utils/generate_test_data test_data.bin 10000 128",
"dependsOn": [
"build-debug"
],
"problemMatcher": [],
"detail": "Generate a binary data file for testing."
}
]
}
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
cmake_minimum_required(VERSION 3.15)
project(diskann)

# Set default build type to Debug for easier debugging setup
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build (Debug, Release, etc.).")
endif()

set(CMAKE_STANDARD 17)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down
5 changes: 5 additions & 0 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ target_link_libraries(test_streaming_scenario ${PROJECT_NAME} ${DISKANN_TOOLS_TC
add_executable(test_insert_deletes_consolidate test_insert_deletes_consolidate.cpp)
target_link_libraries(test_insert_deletes_consolidate ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options)

add_executable(simple_test simple_test.cpp)
target_link_libraries(simple_test ${PROJECT_NAME})
target_compile_options(simple_test PRIVATE -g)

if (NOT MSVC)
install(TARGETS build_memory_index
build_stitched_index
Expand All @@ -37,6 +41,7 @@ if (NOT MSVC)
range_search_disk_index
test_streaming_scenario
test_insert_deletes_consolidate
simple_test
RUNTIME
)
endif()
61 changes: 61 additions & 0 deletions apps/simple_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include <iostream>
#include "index.h"
#include "utils.h"

int main(int argc, char **argv)
{
if (argc != 2)
{
std::cout << "Usage: " << argv[0] << " <data_file>" << std::endl;
return 1;
}

std::string data_path = argv[1];
diskann::Metric metric = diskann::Metric::L2;
size_t num_points, dim;
diskann::get_bin_metadata(data_path, num_points, dim);

// Build parameters
unsigned R = 24;
unsigned L = 100;
float alpha = 1.2f;
unsigned num_threads = 4;

// Create index
auto write_params =
diskann::IndexWriteParametersBuilder(L, R).with_num_threads(num_threads).with_alpha(alpha).build();
auto search_params = std::make_shared<diskann::IndexSearchParams>(L, num_threads);
diskann::Index<float, uint32_t> index(metric, dim, num_points,
std::make_shared<diskann::IndexWriteParameters>(write_params), search_params);
index.build(data_path.c_str(), num_points);

// Search parameters
unsigned Lsearch = 100;
unsigned K = 10; // Number of neighbors to search for
unsigned num_queries = 1;
std::vector<uint32_t> query_result_ids(num_queries * K);
std::vector<float> query_result_dists(num_queries * K);

// Load query data (using the first point from the dataset as a query)
float *query_data = nullptr;
diskann::alloc_aligned((void **)&query_data, dim * sizeof(float), 8 * sizeof(float));
std::ifstream reader(data_path, std::ios::binary);
reader.seekg(2 * sizeof(int)); // Skip npts and dim
reader.read((char *)query_data, dim * sizeof(float));
reader.close();

// Add a pause to allow the debugger to attach properly
// std::cout << "\nPress Enter to start the search..." << std::endl;
// std::cin.get();

// Perform search
index.search(query_data, K, Lsearch, query_result_ids.data(), query_result_dists.data());

std::cout << "Search complete. Found " << K << " neighbors." << std::endl;
std::cout << "Nearest neighbor ID: " << query_result_ids[0] << " with distance " << query_result_dists[0]
<< std::endl;

diskann::aligned_free(query_data);

return 0;
}
5 changes: 4 additions & 1 deletion apps/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ target_link_libraries(generate_synthetic_labels ${PROJECT_NAME} Boost::program_o
add_executable(stats_label_data stats_label_data.cpp)
target_link_libraries(stats_label_data ${PROJECT_NAME} Boost::program_options)

add_executable(generate_test_data generate_test_data.cpp)

if (NOT MSVC)
include(GNUInstallDirs)
install(TARGETS fvecs_to_bin
Expand Down Expand Up @@ -105,6 +107,7 @@ if (NOT MSVC)
create_disk_layout
generate_synthetic_labels
stats_label_data
generate_test_data
RUNTIME
)
endif()
endif()
45 changes: 45 additions & 0 deletions apps/utils/generate_test_data.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <random>
#include <string>

void generate_random_data(const std::string& filename, int num_points, int dim) {
std::ofstream writer(filename, std::ios::binary);
if (!writer) {
std::cerr << "Error opening file for writing: " << filename << std::endl;
return;
}

writer.write(reinterpret_cast<char*>(&num_points), sizeof(int));
writer.write(reinterpret_cast<char*>(&dim), sizeof(int));

std::mt19937 rng(42); // Seed for reproducibility
std::uniform_real_distribution<float> dist(0.0f, 1.0f);

std::vector<float> buffer(dim);
for (int i = 0; i < num_points; ++i) {
for (int j = 0; j < dim; ++j) {
buffer[j] = dist(rng);
}
writer.write(reinterpret_cast<char*>(buffer.data()), dim * sizeof(float));
}

writer.close();
std::cout << "Successfully generated " << num_points << " points of dimension " << dim << " to " << filename << std::endl;
}

int main(int argc, char** argv) {
if (argc != 4) {
std::cout << "Usage: " << argv[0] << " <output_filename> <num_points> <dimensions>" << std::endl;
return 1;
}

std::string filename = argv[1];
int num_points = std::stoi(argv[2]);
int dim = std::stoi(argv[3]);

generate_random_data(filename, num_points, dim);

return 0;
}
Binary file added test_data.bin
Binary file not shown.