Skip to content

Commit 53de874

Browse files
committed
Merge pull request #799 from mattrjacobs/fix-rolling-percentile-write-thread-safety
Unit test demonstrating unsafe usage of IntCountsHistogram on write path, and corresponding fix
2 parents d065208 + 2639ce4 commit 53de874

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import java.util.concurrent.atomic.AtomicReferenceArray;
2424
import java.util.concurrent.locks.ReentrantLock;
2525

26+
import org.HdrHistogram.Histogram;
2627
import org.HdrHistogram.IntCountsHistogram;
28+
import org.HdrHistogram.Recorder;
2729
import org.slf4j.Logger;
2830
import org.slf4j.LoggerFactory;
2931

@@ -281,29 +283,26 @@ public void reset() {
281283
buckets.clear();
282284
}
283285

284-
private static class PercentileBucketData {
285-
private final IntCountsHistogram histogram;
286+
/*package-private*/ static class PercentileBucketData {
287+
final Recorder recorder;
288+
final AtomicReference<Histogram> stableHistogram = new AtomicReference<Histogram>(null);
286289

287290
public PercentileBucketData() {
288-
this.histogram = new IntCountsHistogram(4);
291+
this.recorder = new Recorder(4);
289292
}
290293

291294
public void addValue(int... latency) {
292295
for (int l: latency) {
293-
histogram.recordValue(l);
296+
recorder.recordValue(l);
294297
}
295298
}
296-
297-
public int length() {
298-
return (int) histogram.getTotalCount();
299-
}
300299
}
301300

302301
/**
303302
* @NotThreadSafe
304303
*/
305304
/* package for testing */ static class PercentileSnapshot {
306-
private final IntCountsHistogram aggregateHistogram;
305+
/* package-private*/ final IntCountsHistogram aggregateHistogram;
307306
private final long count;
308307
private final int mean;
309308
private final int p0;
@@ -331,7 +330,10 @@ public int length() {
331330
/* package for testing */ PercentileSnapshot(Bucket[] buckets) {
332331
aggregateHistogram = new IntCountsHistogram(4);
333332
for (Bucket bucket: buckets) {
334-
aggregateHistogram.add(bucket.bucketData.histogram);
333+
PercentileBucketData bucketData = bucket.bucketData;
334+
//if stable snapshot not already generated, generate it now
335+
bucketData.stableHistogram.compareAndSet(null, bucketData.recorder.getIntervalHistogram());
336+
aggregateHistogram.add(bucket.bucketData.stableHistogram.get());
335337
}
336338

337339
count = aggregateHistogram.getTotalCount();

hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,16 @@ public void testSampleDataOverTime1() {
162162
MockedTime time = new MockedTime();
163163
HystrixRollingPercentile p = new HystrixRollingPercentile(time, timeInMilliseconds, numberOfBuckets, enabled);
164164
int previousTime = 0;
165+
166+
int maxSoFar = -1;
167+
165168
for (int i = 0; i < SampleDataHolder1.data.length; i++) {
166169
int timeInMillisecondsSinceStart = SampleDataHolder1.data[i][0];
167170
int latency = SampleDataHolder1.data[i][1];
171+
if (latency > maxSoFar) {
172+
System.out.println("New MAX latency : " + latency);
173+
maxSoFar = latency;
174+
}
168175
time.increment(timeInMillisecondsSinceStart - previousTime);
169176
previousTime = timeInMillisecondsSinceStart;
170177
p.addValue(latency);
@@ -181,6 +188,8 @@ public void testSampleDataOverTime1() {
181188
System.out.println("Median: " + p.getPercentile(50));
182189
System.out.println("Median: " + p.getPercentile(50));
183190

191+
System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue());
192+
184193
/*
185194
* In a loop as a use case was found where very different values were calculated in subsequent requests.
186195
*/
@@ -216,6 +225,8 @@ public void testSampleDataOverTime2() {
216225
System.out.println("99.5th: " + p.getPercentile(99.5));
217226
System.out.println("99.99: " + p.getPercentile(99.99));
218227

228+
System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue());
229+
219230
if (p.getPercentile(50) > 90 || p.getPercentile(50) < 50) {
220231
fail("We expect around 60-70 but got: " + p.getPercentile(50));
221232
}
@@ -394,6 +405,42 @@ public void run() {
394405
System.out.println(p.getMean() + " : " + p.getPercentile(50) + " : " + p.getPercentile(75) + " : " + p.getPercentile(90) + " : " + p.getPercentile(95) + " : " + p.getPercentile(99));
395406
}
396407

408+
@Test
409+
public void testWriteThreadSafety() {
410+
final MockedTime time = new MockedTime();
411+
final HystrixRollingPercentile p = new HystrixRollingPercentile(time, HystrixProperty.Factory.asProperty(100), HystrixProperty.Factory.asProperty(25), HystrixProperty.Factory.asProperty(true));
412+
413+
final int NUM_THREADS = 1000;
414+
final int NUM_ITERATIONS = 1000000;
415+
416+
final CountDownLatch latch = new CountDownLatch(NUM_THREADS);
417+
418+
final Random r = new Random();
419+
420+
final AtomicInteger added = new AtomicInteger(0);
421+
422+
for (int i = 0; i < NUM_THREADS; i++) {
423+
threadPool.submit(new Runnable() {
424+
@Override
425+
public void run() {
426+
for (int j = 1; j < NUM_ITERATIONS / NUM_THREADS + 1; j++) {
427+
int nextInt = r.nextInt(100);
428+
p.addValue(nextInt);
429+
added.getAndIncrement();
430+
}
431+
latch.countDown();
432+
}
433+
});
434+
}
435+
436+
try {
437+
latch.await(100, TimeUnit.SECONDS);
438+
assertEquals(added.get(), p.buckets.peekLast().bucketData.recorder.getIntervalHistogram().getTotalCount());
439+
} catch (InterruptedException ex) {
440+
fail("Timeout on all threads writing percentiles");
441+
}
442+
}
443+
397444
@Test
398445
public void testThreadSafetyMulti() {
399446
for (int i = 0; i < 100; i++) {

0 commit comments

Comments
 (0)