Skip to content

Commit 53b185f

Browse files
committed
Add e2e test to reproduce issue #19406
Signed-off-by: Miancheng Lin <[email protected]>
1 parent 5122d43 commit 53b185f

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

server/storage/mvcc/kvstore_compaction.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ func (s *store) scheduleCompaction(compactMainRev, prevCompactRev int64) (KeyVal
4949

5050
tx := s.b.BatchTx()
5151
tx.LockOutsideApply()
52+
// gofail: var compactAfterAcquiredBatchTxLock struct{}
5253
keys, values := tx.UnsafeRange(schema.Key, last, end, int64(batchNum))
5354
for i := range keys {
5455
rev = BytesToRev(keys[i])
@@ -63,6 +64,7 @@ func (s *store) scheduleCompaction(compactMainRev, prevCompactRev int64) (KeyVal
6364
// gofail: var compactBeforeSetFinishedCompact struct{}
6465
UnsafeSetFinishedCompact(tx, compactMainRev)
6566
tx.Unlock()
67+
dbCompactionPauseMs.Observe(float64(time.Since(start) / time.Millisecond))
6668
// gofail: var compactAfterSetFinishedCompact struct{}
6769
hash := h.Hash()
6870
size, sizeInUse := s.b.Size(), s.b.SizeInUse()

tests/e2e/reproduce_19406_test.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Copyright 2025 The etcd Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package e2e
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"io"
21+
"net/http"
22+
"strconv"
23+
"strings"
24+
"testing"
25+
"time"
26+
27+
clientv3 "go.etcd.io/etcd/client/v3"
28+
"go.etcd.io/etcd/pkg/v3/stringutil"
29+
"go.etcd.io/etcd/tests/v3/framework/e2e"
30+
31+
"github.com/stretchr/testify/require"
32+
)
33+
34+
// TestReproduce19406 reproduces the issue: https://github.com/etcd-io/etcd/issues/19406
35+
func TestReproduce19406(t *testing.T) {
36+
e2e.BeforeTest(t)
37+
38+
compactionSleepInterval := 100 * time.Millisecond
39+
ctx := context.TODO()
40+
41+
clus, cerr := e2e.NewEtcdProcessCluster(ctx, t,
42+
e2e.WithClusterSize(1),
43+
e2e.WithGoFailEnabled(true),
44+
e2e.WithCompactionBatchLimit(1),
45+
e2e.WithCompactionSleepInterval(compactionSleepInterval),
46+
)
47+
require.NoError(t, cerr)
48+
t.Cleanup(func() { require.NoError(t, clus.Stop()) })
49+
50+
// Produce some data
51+
cli := newClient(t, clus.EndpointsGRPC(), e2e.ClientConfig{})
52+
valueSize := 10
53+
var latestRevision int64
54+
55+
produceKeyNum := 20
56+
for i := 0; i <= produceKeyNum; i++ {
57+
resp, err := cli.Put(ctx, fmt.Sprintf("%d", i), stringutil.RandString(uint(valueSize)))
58+
require.NoError(t, err)
59+
latestRevision = resp.Header.Revision
60+
}
61+
62+
// Sleep for PerCompactionInterationInterval to simulate a single iteration of compaction lasting at least this duration.
63+
PerCompactionInterationInterval := compactionSleepInterval
64+
require.NoError(t, clus.Procs[0].Failpoints().SetupHTTP(ctx, "compactAfterAcquiredBatchTxLock",
65+
fmt.Sprintf(`sleep("%s")`, PerCompactionInterationInterval)))
66+
67+
// start compaction
68+
t.Log("start compaction...")
69+
_, err := cli.Compact(ctx, latestRevision, clientv3.WithCompactPhysical())
70+
require.NoError(t, err)
71+
t.Log("finished compaction...")
72+
73+
// Validate that total compaction sleep interval
74+
// Compaction runs in batches. During each batch, it acquires a lock, releases it at the end,
75+
// and then waits for a compactionSleepInterval before starting the next batch. This pause
76+
// allows PUT requests to be processed.
77+
// Therefore, the total compaction sleep interval larger or equal to
78+
// (compaction iteration number - 1) * compactionSleepInterval
79+
httpEndpoint := clus.EndpointsHTTP()[0]
80+
totalKeys := produceKeyNum + 1
81+
pauseDuration, totalDuration, err := GetEtcdCompactionMetrics(t, httpEndpoint)
82+
require.NoError(t, err)
83+
actualSleepInterval := time.Duration(totalDuration-pauseDuration) * time.Millisecond
84+
expectSleepInterval := compactionSleepInterval * time.Duration(totalKeys)
85+
t.Logf("db_compaction_pause_duration: %.2f db_compaction_total_duration: %.2f, totalKeys: %d",
86+
pauseDuration, totalDuration, totalKeys)
87+
require.GreaterOrEqualf(t, actualSleepInterval, expectSleepInterval,
88+
"expect total compact sleep interval larger than (%v) but got (%v)",
89+
expectSleepInterval, actualSleepInterval)
90+
}
91+
92+
func GetEtcdCompactionMetrics(t *testing.T, httpEndpoint string) (pauseDuration, totalDuration float64, err error) {
93+
// make an http request to fetch all Prometheus metrics
94+
url := httpEndpoint + "/metrics"
95+
resp, err := http.Get(url)
96+
if err != nil {
97+
t.Fatalf("fetch error: %v", err)
98+
}
99+
b, err := io.ReadAll(resp.Body)
100+
resp.Body.Close()
101+
if err != nil {
102+
t.Fatalf("fetch error: reading %s: %v", url, err)
103+
}
104+
105+
for _, l := range strings.Split(string(b), "\n") {
106+
if strings.Contains(l, `etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds_sum`) {
107+
parts := strings.Fields(l)
108+
if len(parts) == 2 {
109+
pauseDuration, err = strconv.ParseFloat(parts[1], 64)
110+
if err != nil {
111+
return 0, 0, fmt.Errorf("invalid pause sum value: %w", err)
112+
}
113+
}
114+
}
115+
if strings.Contains(l, `etcd_debugging_mvcc_db_compaction_total_duration_milliseconds_sum`) {
116+
parts := strings.Fields(l)
117+
if len(parts) == 2 {
118+
totalDuration, err = strconv.ParseFloat(parts[1], 64)
119+
if err != nil {
120+
return 0, 0, fmt.Errorf("invalid pause sum value: %w", err)
121+
}
122+
}
123+
}
124+
}
125+
return pauseDuration, totalDuration, nil
126+
}

0 commit comments

Comments
 (0)