diff --git a/onnxruntime/core/providers/openvino/ov_allocator.cc b/onnxruntime/core/providers/openvino/ov_allocator.cc index 0e5ff8ff98efb..431f5730c0342 100644 --- a/onnxruntime/core/providers/openvino/ov_allocator.cc +++ b/onnxruntime/core/providers/openvino/ov_allocator.cc @@ -10,12 +10,6 @@ namespace onnxruntime { using namespace openvino_ep; -constexpr size_t default_alignment = 4096; - -static inline size_t align_up(size_t size, size_t pow2_alignment) { - return (size + pow2_alignment - 1) & ~(pow2_alignment - 1); -} - OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, OrtDevice::DeviceId device_id, const char* name) : IAllocator(OrtMemoryInfo(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(device_type, OrtDevice::MemType::DEFAULT, device_id), device_id, OrtMemTypeCPUInput)), core_(core) { if (device_type == OrtDevice::NPU) { remote_ctx_ = core_.get_default_context("NPU").as(); @@ -26,16 +20,11 @@ OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, void* OVRTAllocator::Alloc(size_t size) { try { - size_t alloc_size = align_up(size + sizeof(ov::Tensor*) + default_alignment, default_alignment); ov::Tensor* tensor = new ov::Tensor(remote_ctx_.create_host_tensor(ov::element::Type_t::u8, - {alloc_size})); - uintptr_t data_ptr = reinterpret_cast(tensor->data()); - - ov::Tensor** ptr = reinterpret_cast(align_up(data_ptr + sizeof(ov::Tensor*), default_alignment)); - ptr[-1] = tensor; - - return reinterpret_cast(ptr); - + {size})); + std::unique_lock lock(mutex_); + allocated_.insert({tensor->data(), tensor}); + return reinterpret_cast(tensor->data()); } catch (const ov::Exception& e) { ORT_THROW(std::string("Alloc failed: ") + e.what()); } @@ -43,8 +32,14 @@ void* OVRTAllocator::Alloc(size_t size) { void OVRTAllocator::Free(void* p) { try { - ov::Tensor** ptr = reinterpret_cast(p); - delete ptr[-1]; + std::unique_lock lock(mutex_); + auto it = allocated_.find(p); + if (it != allocated_.end()) { + ov::Tensor* tensor = it->second; + allocated_.erase(it); + lock.unlock(); + delete tensor; + } } catch (const ov::Exception& e) { ORT_THROW(std::string("Free failed: ") + e.what()); } diff --git a/onnxruntime/core/providers/openvino/ov_allocator.h b/onnxruntime/core/providers/openvino/ov_allocator.h index 083cfc4d5aed3..f6e87111f47ff 100644 --- a/onnxruntime/core/providers/openvino/ov_allocator.h +++ b/onnxruntime/core/providers/openvino/ov_allocator.h @@ -3,9 +3,12 @@ #ifdef USE_OVEP_NPU_MEMORY #pragma once +#include + #include "core/common/inlined_containers.h" #include "core/framework/allocator.h" #include "openvino/runtime/remote_context.hpp" +#include "core/common/inlined_containers.h" namespace onnxruntime { @@ -18,6 +21,8 @@ class OVRTAllocator : public IAllocator { private: ov::Core& core_; ov::RemoteContext remote_ctx_; + InlinedHashMap allocated_; + std::mutex mutex_; }; } // namespace onnxruntime