Skip to content

Commit 4cccd87

Browse files
committed
update
1 parent 3e4ac37 commit 4cccd87

File tree

3 files changed

+26
-32
lines changed

3 files changed

+26
-32
lines changed

glass/memory.hpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,21 @@
33
#include <sys/mman.h>
44

55
#include <cstdint>
6+
#include <cstdio>
67
#include <cstdlib>
78
#include <cstring>
89

9-
#include "glass/common.hpp"
10-
1110
namespace glass {
1211

1312
constexpr size_t size_64B = 64;
1413
constexpr size_t size_2M = 2 * 1024 * 1024;
1514
constexpr size_t size_1G = 1 * 1024 * 1024 * 1024;
1615

17-
template <size_t alignment>
18-
inline void *align_alloc_memory(size_t nbytes, bool set = true, uint8_t x = 0) {
16+
inline void *align_alloc_memory(size_t alignment, size_t nbytes, bool set = true, uint8_t x = 0) {
1917
size_t len = (nbytes + alignment - 1) / alignment * alignment;
20-
if (alignment == size_1G) {
21-
printf("Allocating %.2fG memory for %.2fG data\n", double(len) / size_1G, double(nbytes) / size_1G);
22-
}
2318
auto p = std::aligned_alloc(alignment, len);
24-
if constexpr (alignment >= size_2M) {
19+
if (alignment >= size_2M) {
20+
printf("Allocate %.2fMB for %.2fMB data\n", double(len) / 1024 / 1024, double(nbytes) / 1024 / 1024);
2521
madvise(p, len, MADV_HUGEPAGE);
2622
}
2723
if (set) {
@@ -31,12 +27,12 @@ inline void *align_alloc_memory(size_t nbytes, bool set = true, uint8_t x = 0) {
3127
}
3228

3329
inline void *align_alloc(size_t nbytes, bool set = true, uint8_t x = 0) {
34-
if (nbytes >= size_1G) {
35-
return align_alloc_memory<size_1G>(nbytes, set, x);
30+
if (nbytes >= size_1G / 2) {
31+
return align_alloc_memory(size_1G, nbytes, set, x);
3632
} else if (nbytes >= size_2M) {
37-
return align_alloc_memory<size_2M>(nbytes, set, x);
33+
return align_alloc_memory(size_2M, nbytes, set, x);
3834
} else {
39-
return align_alloc_memory<size_64B>(nbytes, set, x);
35+
return align_alloc_memory(size_64B, nbytes, set, x);
4036
}
4137
}
4238

glass/quant/computer.hpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ concept SymComputerConcept = ComputerBaseConcept<Computer> && requires(Computer
2929

3030
template <StorageConcept Storage>
3131
struct Computer {
32-
const Storage &tensor;
32+
const Storage &storage;
3333

34-
explicit Computer(const Tensor &tensor) : tensor(tensor) {}
34+
explicit Computer(const Storage &storage) : storage(storage) {}
3535

36-
void prefetch(int32_t u, int32_t lines) const { tensor.prefetch(u, lines); }
36+
void prefetch(int32_t u, int32_t lines) const { storage.prefetch(u, lines); }
3737
};
3838

3939
struct MemCpyTag {};
@@ -51,12 +51,12 @@ struct ComputerImpl : Computer<Storage> {
5151
mutable int64_t dist_cmps_{};
5252
mutable int64_t mem_read_bytes_{};
5353

54-
ComputerImpl(const Storage &tensor, const S *query, const auto &encoder, Args &&...args)
55-
: Computer<Storage>(tensor), args(std::forward<Args>(args)...) {
54+
ComputerImpl(const Storage &storage, const S *query, const auto &encoder, Args &&...args)
55+
: Computer<Storage>(storage), args(std::forward<Args>(args)...) {
5656
if constexpr (std::is_same_v<std::decay_t<decltype(encoder)>, MemCpyTag>) {
5757
static_assert(std::is_same_v<S, X>);
58-
q = (X *)align_alloc(this->tensor.dim_align() * sizeof(X));
59-
memcpy(q, query, this->tensor.dim() * sizeof(X));
58+
q = (X *)align_alloc(this->storage.dim_align() * sizeof(X));
59+
memcpy(q, query, this->storage.dim() * sizeof(X));
6060
} else {
6161
encoder((const S *)query, q);
6262
}
@@ -66,11 +66,10 @@ struct ComputerImpl : Computer<Storage> {
6666

6767
GLASS_INLINE dist_type operator()(const Y *p) const {
6868
dist_cmps_++;
69-
mem_read_bytes_ += this->tensor.code_size();
70-
return std::apply([&](auto &&...args) { return dist_func(q, p, this->tensor.dim_align(), args...); }, args);
69+
return std::apply([&](auto &&...args) { return dist_func(q, p, this->storage.dim_align(), args...); }, args);
7170
}
7271

73-
GLASS_INLINE dist_type operator()(int32_t u) const { return operator()((const Y *)this->tensor.get(u)); }
72+
GLASS_INLINE dist_type operator()(int32_t u) const { return operator()((const Y *)this->storage.get(u)); }
7473

7574
GLASS_INLINE size_t dist_cmps() const { return dist_cmps_; }
7675

@@ -86,15 +85,15 @@ struct SymComputerImpl : Computer<Storage> {
8685

8786
std::tuple<Args...> args;
8887

89-
SymComputerImpl(const Storage &tensor, Args &&...args)
90-
: Computer<Storage>(tensor), args(std::forward<Args>(args)...) {}
88+
SymComputerImpl(const Storage &storage, Args &&...args)
89+
: Computer<Storage>(storage), args(std::forward<Args>(args)...) {}
9190

9291
GLASS_INLINE dist_type operator()(const X *x, const X *y) const {
93-
return std::apply([&](auto &&...args) { return dist_func(x, y, this->tensor.dim_align(), args...); }, args);
92+
return std::apply([&](auto &&...args) { return dist_func(x, y, this->storage.dim_align(), args...); }, args);
9493
}
9594

9695
GLASS_INLINE dist_type operator()(int32_t u, int32_t v) const {
97-
return operator()((const X *)this->tensor.get(u), (const X *)this->tensor.get(v));
96+
return operator()((const X *)this->storage.get(u), (const X *)this->storage.get(v));
9897
}
9998
};
10099

glass/searcher/graph_searcher.hpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ struct GraphSearcher : public SearcherBase {
3939
int32_t sample_points_num;
4040
std::vector<float> optimize_queries;
4141

42-
mutable std::vector<LinearPool<typename Quant::ComputerType::dist_type, Bitset<>>> pools;
42+
mutable std::vector<LinearPool<typename Quant::ComputerType::dist_type, TwoLevelBitset<>>> pools;
4343

4444
GraphSearcher(Graph<int32_t> g)
4545
: graph(std::move(g)), graph_po(graph.K / 16), pools(std::thread::hardware_concurrency()) {
@@ -151,7 +151,6 @@ struct GraphSearcher : public SearcherBase {
151151
latencies.resize(nq);
152152
}
153153
std::atomic<int64_t> total_dist_cmps{0};
154-
std::atomic<int64_t> total_mem_read_bytes{0};
155154
#pragma omp parallel for schedule(dynamic)
156155
for (int32_t i = 0; i < nq; ++i) {
157156
std::chrono::high_resolution_clock::time_point start;
@@ -170,15 +169,15 @@ struct GraphSearcher : public SearcherBase {
170169
if (stats_enabled) {
171170
auto end = std::chrono::high_resolution_clock::now();
172171
latencies[i] = std::chrono::duration<float, std::milli>(end - start).count();
173-
total_dist_cmps.fetch_add(computer.dist_cmps());
174-
total_mem_read_bytes.fetch_add(computer.mem_read_bytes());
172+
total_dist_cmps.fetch_add(computer.dist_cmps(), std::memory_order_relaxed);
175173
}
176174
}
177175
if (stats_enabled) {
178176
std::sort(latencies.begin(), latencies.end());
179177
stats.p99_latency_ms = latencies.empty() ? 0.0f : latencies[static_cast<size_t>(0.99 * nq)];
180-
stats.avg_dist_comps = (double)total_dist_cmps.load() / nq;
181-
stats.mem_read_bytes = total_mem_read_bytes.load();
178+
size_t total_dist = total_dist_cmps.load(std::memory_order_relaxed);
179+
stats.avg_dist_comps = (double)total_dist / nq;
180+
stats.mem_read_bytes = total_dist * quant.code_size();
182181
}
183182
}
184183

0 commit comments

Comments
 (0)