Skip to content

Commit 6296ac6

Browse files
committed
Fix bug in HyperClockCache ApplyToEntries; cleanup (#10768)
Summary: We have seen some rare crash test failures in HyperClockCache, and the source could certainly be a bug fixed in this change, in ClockHandleTable::ConstApplyToEntriesRange. It wasn't properly accounting for the fact that incrementing the acquire counter could be ineffective, due to parallel updates. (When incrementing the acquire counter is ineffective, it is incorrect to then decrement it.) This change includes some other minor clean-up in HyperClockCache, and adds stats_dump_period_sec with a much lower period to the crash test. This should be the primary caller of ApplyToEntries, in collecting cache entry stats. Pull Request resolved: #10768 Test Plan: haven't been able to reproduce the failure, but should be in a better state (bug fix and improved crash test) Reviewed By: anand1976 Differential Revision: D40034747 Pulled By: anand1976 fbshipit-source-id: a06fcefe146e17ee35001984445cedcf3b63eb68
1 parent 59495ff commit 6296ac6

File tree

6 files changed

+52
-28
lines changed

6 files changed

+52
-28
lines changed

HISTORY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
# Rocksdb Change Log
2+
## Unreleased
3+
### Bug Fixes
4+
* Fixed a memory safety bug in experimental HyperClockCache (#10768)
5+
26
## 7.7.2 (10/05/2022)
37
### Bug Fixes
48
* Fixed a bug in iterator refresh that was not freeing up SuperVersion, which could cause excessive resource pinniung (#10770).

cache/clock_cache.cc

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ namespace ROCKSDB_NAMESPACE {
2323

2424
namespace hyper_clock_cache {
2525

26+
inline uint64_t GetRefcount(uint64_t meta) {
27+
return ((meta >> ClockHandle::kAcquireCounterShift) -
28+
(meta >> ClockHandle::kReleaseCounterShift)) &
29+
ClockHandle::kCounterMask;
30+
}
31+
2632
static_assert(sizeof(ClockHandle) == 64U,
2733
"Expecting size / alignment with common cache line size");
2834

@@ -49,6 +55,7 @@ ClockHandleTable::~ClockHandleTable() {
4955
break;
5056
case ClockHandle::kStateInvisible: // rare but possible
5157
case ClockHandle::kStateVisible:
58+
assert(GetRefcount(h.meta) == 0);
5259
h.FreeData();
5360
#ifndef NDEBUG
5461
Rollback(h.hash, &h);
@@ -562,10 +569,7 @@ bool ClockHandleTable::Release(ClockHandle* h, bool useful,
562569
}
563570
// Take ownership if no refs
564571
do {
565-
uint64_t refcount = ((old_meta >> ClockHandle::kAcquireCounterShift) -
566-
(old_meta >> ClockHandle::kReleaseCounterShift)) &
567-
ClockHandle::kCounterMask;
568-
if (refcount != 0) {
572+
if (GetRefcount(old_meta) != 0) {
569573
// Not last ref at some point in time during this Release call
570574
// Correct for possible (but rare) overflow
571575
CorrectNearOverflow(old_meta, h->meta);
@@ -622,6 +626,8 @@ void ClockHandleTable::Ref(ClockHandle& h) {
622626

623627
assert((old_meta >> ClockHandle::kStateShift) &
624628
ClockHandle::kStateShareableBit);
629+
// Must have already had a reference
630+
assert(GetRefcount(old_meta) > 0);
625631
(void)old_meta;
626632
}
627633

@@ -671,10 +677,7 @@ void ClockHandleTable::Erase(const CacheKeyBytes& key, uint32_t hash) {
671677
old_meta &= ~(uint64_t{ClockHandle::kStateVisibleBit}
672678
<< ClockHandle::kStateShift);
673679
for (;;) {
674-
uint64_t refcount =
675-
((old_meta >> ClockHandle::kAcquireCounterShift) -
676-
(old_meta >> ClockHandle::kReleaseCounterShift)) &
677-
ClockHandle::kCounterMask;
680+
uint64_t refcount = GetRefcount(old_meta);
678681
assert(refcount > 0);
679682
if (refcount > 1) {
680683
// Not last ref at some point in time during this Erase call
@@ -683,8 +686,10 @@ void ClockHandleTable::Erase(const CacheKeyBytes& key, uint32_t hash) {
683686
std::memory_order_release);
684687
break;
685688
} else if (h->meta.compare_exchange_weak(
686-
old_meta, uint64_t{ClockHandle::kStateConstruction}
687-
<< ClockHandle::kStateShift)) {
689+
old_meta,
690+
uint64_t{ClockHandle::kStateConstruction}
691+
<< ClockHandle::kStateShift,
692+
std::memory_order_acq_rel)) {
688693
// Took ownership
689694
assert(hash == h->hash);
690695
// TODO? Delay freeing?
@@ -740,20 +745,32 @@ void ClockHandleTable::ConstApplyToEntriesRange(
740745
for (uint32_t i = index_begin; i < index_end; i++) {
741746
ClockHandle& h = array_[i];
742747

748+
// Note: to avoid using compare_exchange, we have to be extra careful.
743749
uint64_t old_meta = h.meta.load(std::memory_order_relaxed);
744750
// Check if it's an entry visible to lookups
745751
if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
746-
// Increment acquire counter
752+
// Increment acquire counter. Note: it's possible that the entry has
753+
// completely changed since we loaded old_meta, but incrementing acquire
754+
// count is always safe. (Similar to optimistic Lookup here.)
747755
old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
748756
std::memory_order_acquire);
749-
// Double-check
750-
if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
751-
func(h);
757+
// Check whether we actually acquired a reference.
758+
if ((old_meta >> ClockHandle::kStateShift) &
759+
ClockHandle::kStateShareableBit) {
760+
// Apply func if appropriate
761+
if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
762+
func(h);
763+
}
764+
// Pretend we never took the reference
765+
h.meta.fetch_sub(ClockHandle::kAcquireIncrement,
766+
std::memory_order_release);
767+
// No net change, so don't need to check for overflow
768+
} else {
769+
// For other states, incrementing the acquire counter has no effect
770+
// so we don't need to undo it. Furthermore, we cannot safely undo
771+
// it because we did not acquire a read reference to lock the
772+
// entry in a Shareable state.
752773
}
753-
// Pretend we never took the reference
754-
h.meta.fetch_sub(ClockHandle::kAcquireIncrement,
755-
std::memory_order_release);
756-
// No net change, so don't need to check for overflow
757774
}
758775
}
759776
}
@@ -763,12 +780,9 @@ void ClockHandleTable::EraseUnRefEntries() {
763780
ClockHandle& h = array_[i];
764781

765782
uint64_t old_meta = h.meta.load(std::memory_order_relaxed);
766-
uint64_t refcount = ((old_meta >> ClockHandle::kAcquireCounterShift) -
767-
(old_meta >> ClockHandle::kReleaseCounterShift)) &
768-
ClockHandle::kCounterMask;
769783
if (old_meta & (uint64_t{ClockHandle::kStateShareableBit}
770784
<< ClockHandle::kStateShift) &&
771-
refcount == 0 &&
785+
GetRefcount(old_meta) == 0 &&
772786
h.meta.compare_exchange_strong(old_meta,
773787
uint64_t{ClockHandle::kStateConstruction}
774788
<< ClockHandle::kStateShift,
@@ -877,13 +891,12 @@ void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge,
877891
// Only clock update entries with no outstanding refs
878892
continue;
879893
}
880-
if (!(meta >> ClockHandle::kStateShift &
894+
if (!((meta >> ClockHandle::kStateShift) &
881895
ClockHandle::kStateShareableBit)) {
882896
// Only clock update Shareable entries
883897
continue;
884898
}
885-
// ModTableSize(old_clock_pointer + i));
886-
if (meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible &&
899+
if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) &&
887900
acquire_count > 0) {
888901
// Decrement clock
889902
uint64_t new_count = std::min(acquire_count - 1,
@@ -1101,9 +1114,7 @@ size_t ClockCacheShard::GetPinnedUsage() const {
11011114
table_.ConstApplyToEntriesRange(
11021115
[&table_pinned_usage, charge_metadata](const ClockHandle& h) {
11031116
uint64_t meta = h.meta.load(std::memory_order_relaxed);
1104-
uint64_t refcount = ((meta >> ClockHandle::kAcquireCounterShift) -
1105-
(meta >> ClockHandle::kReleaseCounterShift)) &
1106-
ClockHandle::kCounterMask;
1117+
uint64_t refcount = GetRefcount(meta);
11071118
// Holding one ref for ConstApplyToEntriesRange
11081119
assert(refcount > 0);
11091120
if (refcount > 1) {

db_stress_tool/db_stress_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ DECLARE_bool(mock_direct_io);
173173
DECLARE_bool(statistics);
174174
DECLARE_bool(sync);
175175
DECLARE_bool(use_fsync);
176+
DECLARE_uint64(stats_dump_period_sec);
176177
DECLARE_uint64(bytes_per_sync);
177178
DECLARE_uint64(wal_bytes_per_sync);
178179
DECLARE_int32(kill_random_test);

db_stress_tool/db_stress_gflags.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,4 +1052,8 @@ DEFINE_bool(
10521052
"be preserved similarly under `FLAGS_expected_values_dir/unverified` when "
10531053
"`--expected_values_dir` is nonempty.");
10541054

1055+
DEFINE_uint64(stats_dump_period_sec,
1056+
ROCKSDB_NAMESPACE::Options().stats_dump_period_sec,
1057+
"Gap between printing stats to log in seconds");
1058+
10551059
#endif // GFLAGS

db_stress_tool/db_stress_test_base.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3051,6 +3051,8 @@ void InitializeOptionsFromFlags(
30513051
options.experimental_mempurge_threshold =
30523052
FLAGS_experimental_mempurge_threshold;
30533053
options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds;
3054+
options.stats_dump_period_sec =
3055+
static_cast<unsigned int>(FLAGS_stats_dump_period_sec);
30543056
options.ttl = FLAGS_compaction_ttl;
30553057
options.enable_pipelined_write = FLAGS_enable_pipelined_write;
30563058
options.enable_write_thread_adaptive_yield =

tools/db_crashtest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@
131131
"use_multiget" : lambda: random.randint(0, 1),
132132
"periodic_compaction_seconds" :
133133
lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
134+
# 0 = never (used by some), 10 = often (for threading bugs), 600 = default
135+
"stats_dump_period_sec" : lambda: random.choice([0, 10, 600]),
134136
"compaction_ttl" : lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
135137
# Test small max_manifest_file_size in a smaller chance, as most of the
136138
# time we wnat manifest history to be preserved to help debug

0 commit comments

Comments
 (0)