Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ internal class DataStreamsWriter : IDataStreamsWriter
private readonly long _bucketDurationMs;
private readonly BoundedConcurrentQueue<StatsPoint> _buffer = new(queueLimit: 10_000);
private readonly BoundedConcurrentQueue<BacklogPoint> _backlogBuffer = new(queueLimit: 10_000);
private readonly TimeSpan _waitTimeSpan = TimeSpan.FromMilliseconds(10);
private readonly ManualResetEventSlim _resetEvent = new(false);
private readonly TimeSpan _waitTimeSpan = TimeSpan.FromMilliseconds(15);
private readonly DataStreamsAggregator _aggregator;
private readonly IDiscoveryService _discoveryService;
private readonly IDataStreamsApi _api;
Expand Down Expand Up @@ -155,6 +156,7 @@ public async Task DisposeAsync()
#endif
await FlushAndCloseAsync().ConfigureAwait(false);
_flushSemaphore.Dispose();
_resetEvent.Dispose();
}

private async Task FlushAndCloseAsync()
Expand Down Expand Up @@ -313,13 +315,8 @@ private async Task ProcessQueueLoopAsync()
continue;
}

// The logic is copied from https://github.com/dotnet/runtime/blob/main/src/libraries/Common/tests/System/Threading/Tasks/TaskTimeoutExtensions.cs#L26
// and modified to avoid dealing with exceptions
var tcs = new TaskCompletionSource<bool>();
using (new Timer(s => ((TaskCompletionSource<bool>)s!).SetResult(true), tcs, _waitTimeSpan, Timeout.InfiniteTimeSpan))
{
await Task.WhenAny(_processExit.Task, tcs.Task).ConfigureAwait(false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't responding / reacting to _processExit.Task anymore but I don't think a big deal

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't responding / reacting to _processExit.Task anymore but I don't think a big deal

Yeah, given it's only a 15ms wait, I think that's fine, but it does make me wonder... a 15ms wait period makes this is a pretty fast spinning loop, no? 😕For context, we flush traces every 1000ms (1s)... On low-core (1) machines would this not potentially be taking up a lot of thread/CPU time?

What's more, the _resetEvent.Wait() that's never triggered is effectively a Thread.Sleep(). And doing a Thread.Sleep on a thread pool thread is generally not a good idea, as it blocks anything else doing work. @tonyredondo did a bunch of work to remove all the Thread.Sleep() from our async code for exactly this reason. If we do want to dedicate a thread to this, I think we need to turn this loop into a sync loop and create a dedicated long-running thread, but that might be more work than we'd like.

Overall, I don't know if we have any evidence that the TCS and Tasks here are the source of the perf issue yet? and I wonder if it's the potentially small value of _waitTimeSpan that's causing the issue due to this loop spinning too fast? 🤔 Have we considered alternatives like increasing the buffering wait time? Or taking an approach similar to the agent writer where we have a longer buffering period, with dedicated flushing when the buffers get to a certain "fullness"? It'll be a trade-off obviously, but I'm not sure which is best

}
// _resetEvent is never set, we simply wait for a timeout
_resetEvent.Wait(_waitTimeSpan);
}
}

Expand Down
Loading