Skip to content

Commit d6dc8c0

Browse files
authored
[Profiler] Fix missing CPU samples (#4046)
1 parent ec43699 commit d6dc8c0

File tree

4 files changed

+72
-9
lines changed

4 files changed

+72
-9
lines changed

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/OsSpecificApi.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,17 @@ uint64_t GetThreadCpuTime(ManagedThreadInfo* pThreadInfo)
155155
return cpuTime;
156156
}
157157

158-
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime)
158+
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime, bool& failed)
159159
{
160160
bool isRunning = false;
161161
if (!GetCpuInfo(pThreadInfo->GetOsThreadId(), isRunning, cpuTime))
162162
{
163163
cpuTime = 0;
164+
failed = true;
164165
return false;
165166
}
166167

168+
failed = false;
167169
return isRunning;
168170
}
169171

profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/OsSpecificApi.cpp

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,38 @@ uint64_t GetThreadCpuTime(ManagedThreadInfo* pThreadInfo)
3535
FILETIME creationTime, exitTime = {}; // not used here
3636
FILETIME kernelTime = {};
3737
FILETIME userTime = {};
38+
static bool isFirstError = true;
3839

3940
if (::GetThreadTimes(pThreadInfo->GetOsThreadHandle(), &creationTime, &exitTime, &kernelTime, &userTime))
4041
{
4142
uint64_t milliseconds = GetTotalMilliseconds(userTime) + GetTotalMilliseconds(kernelTime);
4243
return milliseconds;
4344
}
44-
45+
else
46+
{
47+
DWORD errorCode = GetLastError();
48+
if (isFirstError && (errorCode != ERROR_INVALID_HANDLE)) // expected invalid handle case
49+
{
50+
isFirstError = false;
51+
LPVOID msgBuffer;
52+
53+
if (errorCode == 0)
54+
{
55+
Log::Error("GetThreadCpuTime() error calling GetThreadTimes (last error = 0)");
56+
}
57+
else
58+
{
59+
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
60+
NULL, errorCode, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&msgBuffer, 0, NULL);
61+
62+
if (msgBuffer != NULL)
63+
{
64+
Log::Error("GetThreadCpuTime() error calling GetThreadTimes (last error = 0x", std::hex, errorCode, std::dec, "): ", (LPCTSTR)msgBuffer);
65+
LocalFree(msgBuffer);
66+
}
67+
}
68+
}
69+
}
4570
return 0;
4671
}
4772

@@ -131,8 +156,11 @@ bool IsRunning(ULONG threadState)
131156
// If some callstacks show non cpu-bound frames at the top, return true only for Running state
132157
}
133158

134-
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime)
159+
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime, bool& failed)
135160
{
161+
failed = true;
162+
cpuTime = 0;
163+
136164
if (NtQueryInformationThread == nullptr)
137165
{
138166
if (!InitializeNtQueryInformationThreadCallback())
@@ -144,14 +172,41 @@ bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime)
144172
SYSTEM_THREAD_INFORMATION sti = {0};
145173
auto size = sizeof(SYSTEM_THREAD_INFORMATION);
146174
ULONG buflen = 0;
175+
static bool isFirstError = true;
147176
NTSTATUS lResult = NtQueryInformationThread(pThreadInfo->GetOsThreadHandle(), SYSTEMTHREADINFORMATION, &sti, static_cast<ULONG>(size), &buflen);
177+
// deal with an invalid thread handle case (thread might have died)
148178
if (lResult != 0)
149179
{
180+
#if BIT64 // Windows 64-bit
181+
if (isFirstError && (lResult != STATUS_INVALID_HANDLE))
182+
{
183+
isFirstError = false;
184+
LPVOID msgBuffer;
185+
DWORD errorCode = GetLastError();
186+
187+
if (errorCode == 0)
188+
{
189+
Log::Error("IsRunning() error 0x", std::hex, lResult, " calling NtQueryInformationThread");
190+
}
191+
else
192+
{
193+
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
194+
NULL, errorCode, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&msgBuffer, 0, NULL);
195+
196+
if (msgBuffer != NULL)
197+
{
198+
Log::Error("IsRunning() error 0x", std::hex, lResult, " calling NtQueryInformationThread (last error = 0x", std::hex, errorCode, std::dec, "): ", (LPTSTR)msgBuffer);
199+
LocalFree(msgBuffer);
200+
}
201+
}
202+
}
203+
#endif
150204
// This always happens in 32 bit so uses another API to at least get the CPU consumption
151205
cpuTime = GetThreadCpuTime(pThreadInfo);
152206
return false;
153207
}
154208

209+
failed = false;
155210
cpuTime = GetTotalMilliseconds(sti.UserTime) + GetTotalMilliseconds(sti.KernelTime);
156211

157212
return IsRunning(sti.ThreadState);

profiler/src/ProfilerEngine/Datadog.Profiler.Native/OsSpecificApi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ namespace OsSpecificApi
1919
{
2020
std::unique_ptr<StackFramesCollectorBase> CreateNewStackFramesCollectorInstance(ICorProfilerInfo4* pCorProfilerInfo, IConfiguration const* pConfiguration);
2121
uint64_t GetThreadCpuTime(ManagedThreadInfo* pThreadInfo);
22-
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime);
22+
bool IsRunning(ManagedThreadInfo* pThreadInfo, uint64_t& cpuTime, bool& failed);
2323
int32_t GetProcessorCount();
2424
} // namespace OsSpecificApi

profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,15 +244,21 @@ void StackSamplerLoop::CpuProfilingIteration()
244244
_targetThread = _pManagedThreadList->LoopNext(_iteratorCpuTime);
245245
if (_targetThread != nullptr)
246246
{
247+
// detect Windows API call failure
248+
bool failure = false;
249+
247250
// sample only if the thread is currently running on a core
248251
uint64_t currentConsumption = 0;
249252
uint64_t lastConsumption = _targetThread->GetCpuConsumptionMilliseconds();
250-
bool isRunning = OsSpecificApi::IsRunning(_targetThread.get(), currentConsumption);
251-
// Note: it is not possible to get this information on Windows 32-bit
252-
// so true is returned if this thread consumed some CPU since
253-
// the last iteration
253+
bool isRunning = OsSpecificApi::IsRunning(_targetThread.get(), currentConsumption, failure);
254+
// Note: it is not possible to get this information on Windows 32-bit or in some cases in 64-bit
255+
// so isRunning should be true if this thread consumed some CPU since the last iteration
254256
#if _WINDOWS
255-
#if BIT64 // nothing to do for Windows 64-bit
257+
#if BIT64 // Windows 64-bit
258+
if (failure)
259+
{
260+
isRunning = (lastConsumption < currentConsumption);
261+
}
256262
#else // Windows 32-bit
257263
isRunning = (lastConsumption < currentConsumption);
258264
#endif

0 commit comments

Comments
 (0)