Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 12 additions & 17 deletions onnxruntime/core/providers/openvino/ov_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@ namespace onnxruntime {

using namespace openvino_ep;

constexpr size_t default_alignment = 4096;

static inline size_t align_up(size_t size, size_t pow2_alignment) {
return (size + pow2_alignment - 1) & ~(pow2_alignment - 1);
}

OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, OrtDevice::DeviceId device_id, const char* name) : IAllocator(OrtMemoryInfo(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(device_type, OrtDevice::MemType::DEFAULT, device_id), device_id, OrtMemTypeCPUInput)), core_(core) {
if (device_type == OrtDevice::NPU) {
remote_ctx_ = core_.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
Expand All @@ -26,25 +20,26 @@ OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type,

void* OVRTAllocator::Alloc(size_t size) {
try {
size_t alloc_size = align_up(size + sizeof(ov::Tensor*) + default_alignment, default_alignment);
ov::Tensor* tensor = new ov::Tensor(remote_ctx_.create_host_tensor(ov::element::Type_t::u8,
{alloc_size}));
uintptr_t data_ptr = reinterpret_cast<uintptr_t>(tensor->data());

ov::Tensor** ptr = reinterpret_cast<ov::Tensor**>(align_up(data_ptr + sizeof(ov::Tensor*), default_alignment));
ptr[-1] = tensor;

return reinterpret_cast<void*>(ptr);

{size}));
std::unique_lock lock(mutex_);
allocated_.insert({tensor->data(), tensor});
return reinterpret_cast<void*>(tensor->data());
} catch (const ov::Exception& e) {
ORT_THROW(std::string("Alloc failed: ") + e.what());
}
}

void OVRTAllocator::Free(void* p) {
try {
ov::Tensor** ptr = reinterpret_cast<ov::Tensor**>(p);
delete ptr[-1];
std::unique_lock lock(mutex_);
auto it = allocated_.find(p);
if (it != allocated_.end()) {
ov::Tensor* tensor = it->second;
allocated_.erase(it);
lock.unlock();
delete tensor;
}
} catch (const ov::Exception& e) {
ORT_THROW(std::string("Free failed: ") + e.what());
}
Expand Down
5 changes: 5 additions & 0 deletions onnxruntime/core/providers/openvino/ov_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
#ifdef USE_OVEP_NPU_MEMORY
#pragma once

#include <mutex>

#include "core/common/inlined_containers.h"
#include "core/framework/allocator.h"
#include "openvino/runtime/remote_context.hpp"
#include "core/common/inlined_containers.h"

namespace onnxruntime {

Expand All @@ -18,6 +21,8 @@ class OVRTAllocator : public IAllocator {
private:
ov::Core& core_;
ov::RemoteContext remote_ctx_;
InlinedHashMap<void*, ov::Tensor*> allocated_;
std::mutex mutex_;
};

} // namespace onnxruntime
Expand Down