@@ -43,7 +43,14 @@ static const size_t kPageSize_ = 4096;
4343static atomic_uint8_t amdgpu_runtime_shutdown{0 };
4444static atomic_uint8_t amdgpu_event_registered{0 };
4545
46- // Check if AMDGPU runtime shutdown state
46+ # define LOAD_HSA_FUNC_WITH_ERROR_CHECK (func, name, success ) \
47+ func = (decltype (func))dlsym(RTLD_NEXT, name); \
48+ if (!func) { \
49+ VReport (2 , " Amdgpu Init: Failed to load " #name " function\n " ); \
50+ success = false ; \
51+ }
52+
53+ // Check AMDGPU runtime shutdown state
4754bool AmdgpuMemFuncs::IsAmdgpuRuntimeShutdown () {
4855 return static_cast <bool >(
4956 atomic_load (&amdgpu_runtime_shutdown, memory_order_acquire));
@@ -54,38 +61,43 @@ void AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown() {
5461 uint8_t shutdown = 0 ;
5562 if (atomic_compare_exchange_strong (&amdgpu_runtime_shutdown, &shutdown, 1 ,
5663 memory_order_acq_rel)) {
57- VReport (1 , " Amdgpu Allocator: AMDGPU runtime shutdown detected\n " );
64+ VReport (2 , " Amdgpu Allocator: AMDGPU runtime shutdown detected\n " );
5865 }
5966}
6067
6168bool AmdgpuMemFuncs::Init () {
62- hsa_amd.memory_pool_allocate =
63- (decltype (hsa_amd.memory_pool_allocate ))dlsym (
64- RTLD_NEXT, " hsa_amd_memory_pool_allocate" );
65- hsa_amd.memory_pool_free = (decltype (hsa_amd.memory_pool_free ))dlsym (
66- RTLD_NEXT, " hsa_amd_memory_pool_free" );
67- hsa_amd.pointer_info = (decltype (hsa_amd.pointer_info ))dlsym (
68- RTLD_NEXT, " hsa_amd_pointer_info" );
69- hsa_amd.vmem_address_reserve_align =
70- (decltype (hsa_amd.vmem_address_reserve_align ))dlsym (
71- RTLD_NEXT, " hsa_amd_vmem_address_reserve_align" );
72- hsa_amd.vmem_address_free = (decltype (hsa_amd.vmem_address_free ))dlsym (
73- RTLD_NEXT, " hsa_amd_vmem_address_free" );
74- hsa_amd.register_system_event_handler =
75- (decltype (hsa_amd.register_system_event_handler ))dlsym (
76- RTLD_NEXT, " hsa_amd_register_system_event_handler" );
77- if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free ||
78- !hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align ||
79- !hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler )
69+ bool success = true ;
70+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.memory_pool_allocate ,
71+ " hsa_amd_memory_pool_allocate" , success);
72+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.memory_pool_free ,
73+ " hsa_amd_memory_pool_free" , success);
74+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.pointer_info , " hsa_amd_pointer_info" ,
75+ success);
76+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.vmem_address_reserve_align ,
77+ " hsa_amd_vmem_address_reserve_align" , success);
78+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.vmem_address_free ,
79+ " hsa_amd_vmem_address_free" , success);
80+ LOAD_HSA_FUNC_WITH_ERROR_CHECK (hsa_amd.register_system_event_handler ,
81+ " hsa_amd_register_system_event_handler" ,
82+ success);
83+ if (!success) {
84+ VReport (1 , " Amdgpu Init: Failed to load AMDGPU runtime functions\n " );
8085 return false ;
86+ }
8187 return true ;
8288}
8389
8490void *AmdgpuMemFuncs::Allocate (uptr size, uptr alignment,
8591 DeviceAllocationInfo *da_info) {
8692 // Do not allocate if AMDGPU runtime is shutdown
87- if (IsAmdgpuRuntimeShutdown ())
93+ if (IsAmdgpuRuntimeShutdown ()) {
94+ VReport (1 ,
95+ " Amdgpu Allocate: Runtime shutdown, skipping allocation for size "
96+ " %zu alignment %zu\n " ,
97+ size, alignment);
8898 return nullptr ;
99+ }
100+
89101 AmdgpuAllocationInfo *aa_info =
90102 reinterpret_cast <AmdgpuAllocationInfo *>(da_info);
91103 if (!aa_info->memory_pool .handle ) {
@@ -104,8 +116,14 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
104116
105117void AmdgpuMemFuncs::Deallocate (void *p) {
106118 // Deallocate does nothing after AMDGPU runtime shutdown
107- if (IsAmdgpuRuntimeShutdown ())
119+ if (IsAmdgpuRuntimeShutdown ()) {
120+ VReport (
121+ 1 ,
122+ " Amdgpu Deallocate: Runtime shutdown, skipping deallocation for %p\n " ,
123+ reinterpret_cast <void *>(p));
108124 return ;
125+ }
126+
109127 DevicePointerInfo DevPtrInfo;
110128 if (AmdgpuMemFuncs::GetPointerInfo (reinterpret_cast <uptr>(p), &DevPtrInfo)) {
111129 if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
@@ -118,6 +136,14 @@ void AmdgpuMemFuncs::Deallocate(void *p) {
118136}
119137
120138bool AmdgpuMemFuncs::GetPointerInfo (uptr ptr, DevicePointerInfo* ptr_info) {
139+ // GetPointerInfo returns false after AMDGPU runtime shutdown
140+ if (IsAmdgpuRuntimeShutdown ()) {
141+ VReport (1 ,
142+ " Amdgpu GetPointerInfo: Runtime shutdown, skipping query for %p\n " ,
143+ reinterpret_cast <void *>(ptr));
144+ return false ;
145+ }
146+
121147 hsa_amd_pointer_info_t info;
122148 info.size = sizeof (hsa_amd_pointer_info_t );
123149 hsa_status_t status =
@@ -138,9 +164,11 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) {
138164 // Register shutdown system event handler only once
139165 // TODO: Register multiple event handlers if needed in future
140166void AmdgpuMemFuncs::RegisterSystemEventHandlers () {
141- // Check if already registered
142- if (atomic_load (&amdgpu_event_registered, memory_order_acquire) == 0 ) {
143- // Callback to just detect runtime shutdown
167+ uint8_t registered = 0 ;
168+ // Check if shutdown event handler is already registered
169+ if (atomic_compare_exchange_strong (&amdgpu_event_registered, ®istered, 1 ,
170+ memory_order_acq_rel)) {
171+ // Callback to detect and notify AMDGPU runtime shutdown
144172 hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t * event,
145173 void * data) {
146174 if (!event)
@@ -149,12 +177,20 @@ void AmdgpuMemFuncs::RegisterSystemEventHandlers() {
149177 AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown ();
150178 return HSA_STATUS_SUCCESS;
151179 };
152- // Register the callback
180+ // Register the event callback
153181 hsa_status_t status =
154182 hsa_amd.register_system_event_handler (callback, nullptr );
155- // Mark as registered if successful
183+ // Check as registered if successful
156184 if (status == HSA_STATUS_SUCCESS)
157- atomic_store (&amdgpu_event_registered, 1 , memory_order_release);
185+ VReport (
186+ 1 ,
187+ " Amdgpu RegisterSystemEventHandlers: Registered shutdown event \n " );
188+ else {
189+ VReport (1 ,
190+ " Amdgpu RegisterSystemEventHandlers: Failed to register shutdown "
191+ " event \n " );
192+ atomic_store (&amdgpu_event_registered, 0 , memory_order_release);
193+ }
158194 }
159195}
160196
0 commit comments