From 89b6bd1ceac70cdc643f204d6e71208ab5c8fda1 Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Wed, 11 Jun 2025 15:35:05 +0530 Subject: [PATCH 1/6] feat: Enable EpContext OVIR Encapsulation --- .../core/providers/openvino/backend_utils.cc | 27 +++++++++++++++ .../core/providers/openvino/backend_utils.h | 2 ++ .../openvino/backends/basic_backend.cc | 3 +- .../openvino/onnx_ctx_model_helper.cc | 12 ++++++- .../core/providers/openvino/ov_interface.cc | 34 ++++++++++++++++--- .../core/providers/openvino/ov_interface.h | 1 + 6 files changed, 73 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 49eedfb3e4fcd..180a4ded67fe7 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -440,6 +440,33 @@ void DestroyOVTensors(SharedContext::SharedWeights::Metadata::Map& metadata_map) metadata_map.clear(); } +bool IsModelStreamXML(std::istream& model_stream) { + std::streampos originalPos = model_stream.tellg(); + + // first, get the total size of model_stream in bytes + model_stream.seekg(0, std::ios::end); + auto end_pos = model_stream.tellg(); + // Restore the stream position + model_stream.seekg(originalPos); + auto total_size = end_pos - originalPos; + + // Choose 32 bytes to hold content of: + // ' header_check_len); + + // read 32 bytes into header + std::string header(header_check_len, '\0'); + model_stream.read(&header[0], header_check_len); + // Clear any read errors + model_stream.clear(); + // Restore the stream position + model_stream.seekg(originalPos); + + // return true if the header starts with '& performanceMap, void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName); +bool IsModelStreamXML(std::istream& model_stream); + } // namespace backend_utils } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 3105c307706ad..17cd8433fb1d7 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -76,7 +76,8 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr exe_network_ = OVCore::Get()->ImportModel(*model_stream, hw_target, device_config, - subgraph_context_.subgraph_name); + enable_causallm, + session_context_.onnx_model_path_name.string()); model_stream.reset(); // Delete stream after it is no longer needed } else if (!session_context_.has_external_weights && !subgraph_context_.has_dynamic_input_shape && diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 7bd4f8d96cc55..2e88b6d07dee0 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -7,6 +7,7 @@ #include #include "core/providers/openvino/onnx_ctx_model_helper.h" +#include "core/providers/openvino/backend_utils.h" namespace onnxruntime { namespace openvino_ep { @@ -123,6 +124,16 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string()); result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in)); } + + bool isXML = backend_utils::IsModelStreamXML(*result); + if (!isXML) { + // If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was + // exported with must match the version that is currently running. + ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_), + "EPCtx blob was exported / is compatible with with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() + + ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_); + } + LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; return result; } @@ -142,7 +153,6 @@ bool EPCtxHandler::CheckForOVEPCtxNode(const Node& node) const { if (node.OpType() == EPCONTEXT_OP) { auto& attrs = node.GetAttributes(); bool result = (attrs.count(SOURCE) == 1) && (attrs.at(SOURCE).s() == kOpenVINOExecutionProvider); - result &= (attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_); result &= attrs.count(EMBED_MODE) == 1; result &= attrs.count(EP_CACHE_CONTEXT) == 1; return result; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 3afe38ad12e71..fa5900022c8d6 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -95,10 +95,10 @@ OVExeNetwork OVCore::StatefulCompileModel(std::shared_ptr& model, LogBasicModelInfo(model); } - LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl; bool model_status = IsStateful(model); LOGS_DEFAULT(INFO) << log_tag << "Model IsStateful() Status:\t" << (model_status ? "True" : "False"); if (!model_status) { + LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl; PatchStatefulDecoder(model); } @@ -193,14 +193,40 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model, OVExeNetwork OVCore::ImportModel(std::istream& model_stream, std::string hw_target, const ov::AnyMap& device_config, + bool enable_causallm, std::string name) { try { - ov::CompiledModel obj; - obj = core.import_model(model_stream, hw_target, device_config); + OVExeNetwork exe; + + bool isXML = backend_utils::IsModelStreamXML(model_stream); + + if (!isXML) { + auto obj = core.import_model(model_stream, hw_target, device_config); + exe = OVExeNetwork(obj, hw_target); + } else { + // If the model is XML, we need to load it with the XML content in read_model() + // where weights from bin file is directly consumed + std::string xml_file_name = name; + if (name.size() >= 5 && name.substr(name.size() - 5) == ".onnx") { + xml_file_name.replace(name.size() - 5, 5, ".xml"); + } else { + throw std::runtime_error("Invalid model name. Make sure *.onnx, *.xml, and *.bin carry the same name."); + } + + // Load the model explicitly with XML contents + std::shared_ptr model = core.read_model(xml_file_name); + + if (enable_causallm) { + exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config); + } else { + auto obj = core.compile_model(model, hw_target, device_config); + exe = OVExeNetwork(obj, hw_target); + } + } + #ifndef NDEBUG printDebugInfo(exe.Get()); #endif - OVExeNetwork exe(obj, hw_target); return exe; } catch (const Exception& e) { ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 82a8c27fa035c..ea9a60666c97d 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -82,6 +82,7 @@ struct OVCore : WeakSingleton { OVExeNetwork ImportModel(std::istream& model_stream, std::string hw_target, const ov::AnyMap& device_config, + bool enable_causallm, std::string name); std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const; From 143f4c1f06216c6545662bcb52abdc718bdbeb12 Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Tue, 17 Jun 2025 20:03:41 +0530 Subject: [PATCH 2/6] fix: refactor EpCtx OVIR parsing logic to use ep.context_file_path --- .../providers/openvino/backend_manager.cc | 2 + .../openvino/backends/basic_backend.cc | 24 +++++--- .../core/providers/openvino/contexts.h | 1 + .../openvino/onnx_ctx_model_helper.cc | 29 +++++++++- .../openvino/onnx_ctx_model_helper.h | 1 + .../core/providers/openvino/ov_interface.cc | 58 +++++++++++++++---- .../core/providers/openvino/ov_interface.h | 6 ++ 7 files changed, 102 insertions(+), 19 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index cb7acfd2ca95a..82a13f22aa0e6 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -43,6 +43,8 @@ BackendManager::BackendManager(SessionContext& session_context, session_context_(session_context), shared_context_{shared_context} { subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph); + // If the graph contains a OVIR wrapped node, we check if it has xml file attribute + subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph, "xml"); bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos || session_context_.device_type.find("GPU") != std::string::npos; diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 17cd8433fb1d7..aaf8aa1883469 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -71,13 +71,23 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !session_context_.so_disable_cpu_ep_fallback && !subgraph_context_.is_ep_ctx_graph); if (subgraph_context_.is_ep_ctx_graph) { - // If the blob is held in an EPContext node, then skip FE+Compile - // and directly move on to creating a backend with the executable blob - exe_network_ = OVCore::Get()->ImportModel(*model_stream, - hw_target, - device_config, - enable_causallm, - session_context_.onnx_model_path_name.string()); + if (subgraph_context_.is_ep_ctx_ovir_encapsulated) { + // If the EPContext node with OVIR Encapsulation, then create + // an executable network from EP_CACHE_CONTEXT using read_model() & compile_model() + exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream, + hw_target, + device_config, + enable_causallm, + session_context_.so_context_file_path, + subgraph_context_.subgraph_name); + } else { + // If the blob is held in an EPContext node, then skip FE+Compile + // and directly move on to creating a backend with the executable blob + exe_network_ = OVCore::Get()->ImportModel(*model_stream, + hw_target, + device_config, + subgraph_context_.subgraph_name); + } model_stream.reset(); // Delete stream after it is no longer needed } else if (!session_context_.has_external_weights && !subgraph_context_.has_dynamic_input_shape && diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 09d48a5e916e1..e2369cf728ea6 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -137,6 +137,7 @@ struct SubGraphContext { string_index_map_t output_names; std::string model_precision; bool is_ep_ctx_graph = false; + bool is_ep_ctx_ovir_encapsulated = false; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 2e88b6d07dee0..49a4cb0a7e95a 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -130,7 +130,7 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy // If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was // exported with must match the version that is currently running. ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_), - "EPCtx blob was exported / is compatible with with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() + + "EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() + ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_); } @@ -165,5 +165,32 @@ InlinedVector EPCtxHandler::GetEPCtxNodes() const { return InlinedVector(epctx_nodes.begin(), epctx_nodes.end()); } +// Check if graph's only node is EPContext & EP_CACHE_CONTEXT attribute has target extension. +// @param graph_viewer: The graph to inspect. +// @param target_attr_extn: The string to search for in the EP_CACHE_CONTEXT attribute. +// @return true if the node exists, is of the correct type, and the attribute contains the extension; false otherwise. +bool EPCtxHandler::CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const { + // Only check if the graph has exactly one node + if (graph_viewer.NumberOfNodes() != 1) { + return false; + } + // Get the first node in topological order + auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin(); + const Node* node = graph_viewer.GetNode(first_index); + if (!node) { + return false; + } + // Check OpType and required attributes + if (node->OpType() != EPCONTEXT_OP) { + return false; + } + const auto& attrs = node->GetAttributes(); + auto it = attrs.find(EP_CACHE_CONTEXT); + if (it != attrs.end()) { + return it->second().s().find(target_attr_extn) != std::string::npos; + } + return false; +} + } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index ff978bd6534d8..b9ddb40a7a233 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -33,6 +33,7 @@ class EPCtxHandler { std::string&& model_blob_str) const; std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const; InlinedVector GetEPCtxNodes() const; + bool CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const; private: const std::string openvino_sdk_version_; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index fa5900022c8d6..e920de0b35dc2 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -191,30 +191,66 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model, } OVExeNetwork OVCore::ImportModel(std::istream& model_stream, + std::string hw_target, + const ov::AnyMap& device_config, + std::string name) { + try { + ov::CompiledModel obj; + obj = core.import_model(model_stream, hw_target, device_config); +#ifndef NDEBUG + printDebugInfo(exe.Get()); +#endif + OVExeNetwork exe(obj, hw_target); + return exe; + } catch (const Exception& e) { + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + } catch (...) { + ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); + } +} + +OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, std::string hw_target, const ov::AnyMap& device_config, bool enable_causallm, + std::filesystem::path context_file_path, std::string name) { try { OVExeNetwork exe; bool isXML = backend_utils::IsModelStreamXML(model_stream); - if (!isXML) { - auto obj = core.import_model(model_stream, hw_target, device_config); - exe = OVExeNetwork(obj, hw_target); - } else { + ORT_ENFORCE(!context_file_path.string().empty(), + "The session option ep.context_file_path is not set for EPContext node with OVIR Encapsulation. " + "Current value: '" + context_file_path.string() + "'"); + + // Helper function to check if file exists and is readable + const auto check_file_access = [&context_file_path](const std::filesystem::path& path) { + try { + const auto status = std::filesystem::status(path); + if (!std::filesystem::exists(status)) { + ORT_THROW(log_tag + "Required file missing: " + path.string()); + } + std::ifstream file(path); + if (!file.is_open()) { + ORT_THROW(log_tag + "Required file not readable: " + path.string()); + } + } catch (const std::exception& e) { + ORT_THROW(log_tag + "Exception while checking file access for: " + path.string() + " - " + e.what()); + } + }; + + if (isXML) { // If the model is XML, we need to load it with the XML content in read_model() // where weights from bin file is directly consumed - std::string xml_file_name = name; - if (name.size() >= 5 && name.substr(name.size() - 5) == ".onnx") { - xml_file_name.replace(name.size() - 5, 5, ".xml"); - } else { - throw std::runtime_error("Invalid model name. Make sure *.onnx, *.xml, and *.bin carry the same name."); - } + auto xml_file_path = context_file_path.parent_path() / (context_file_path.stem().string() + ".xml"); + + check_file_access(xml_file_path); + + LOGS_DEFAULT(INFO) << log_tag << "Reading OVIR from XML file path: " << xml_file_path.string(); // Load the model explicitly with XML contents - std::shared_ptr model = core.read_model(xml_file_name); + std::shared_ptr model = core.read_model(xml_file_path.string()); if (enable_causallm) { exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config); diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index ea9a60666c97d..1fd0144cc8f70 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -80,10 +80,16 @@ struct OVCore : WeakSingleton { const std::string& name); // OV Interface for Import model Stream OVExeNetwork ImportModel(std::istream& model_stream, + std::string hw_target, + const ov::AnyMap& device_config, + std::string name); + OVExeNetwork ImportEPCtxOVIREncapsulation(std::istream& model_stream, std::string hw_target, const ov::AnyMap& device_config, bool enable_causallm, + std::filesystem::path context_file_path, std::string name); + std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const; void SetCache(const std::string& cache_dir_path); From 04cce1b28889aa43be65048a1d16fcfc1b27107a Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Wed, 18 Jun 2025 14:02:59 +0530 Subject: [PATCH 3/6] fix: Fix logic for parsing model_file_path --- .../openvino/backends/basic_backend.cc | 19 ++++++++++-- .../core/providers/openvino/ov_interface.cc | 29 +++++++------------ .../core/providers/openvino/ov_interface.h | 9 +++--- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index aaf8aa1883469..e80a355f2a317 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -72,14 +72,29 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !subgraph_context_.is_ep_ctx_graph); if (subgraph_context_.is_ep_ctx_graph) { if (subgraph_context_.is_ep_ctx_ovir_encapsulated) { + // model_file_path will use so_context_file_path if the onnx_model_path_name is not available, + // especially in case of CreateSessionFormArray() where user must explicitly + // specify absolute path for so_context_file_path. + auto model_file_path = [this]() { + if (!session_context_.onnx_model_path_name.empty() && + std::filesystem::exists(session_context_.onnx_model_path_name)) return session_context_.onnx_model_path_name; + + ORT_ENFORCE(!session_context_.so_context_file_path.empty() && + std::filesystem::path(session_context_.so_context_file_path).is_absolute() && + std::filesystem::exists(session_context_.so_context_file_path), + log_tag + "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly." + "Please set a valid absolute path for ep.context_file_path in session options."); + // Return absolute context file path as input to ImportEPCtxOVIREncapsulation() function. + return session_context_.so_context_file_path; + + }; // If the EPContext node with OVIR Encapsulation, then create // an executable network from EP_CACHE_CONTEXT using read_model() & compile_model() exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream, hw_target, device_config, enable_causallm, - session_context_.so_context_file_path, - subgraph_context_.subgraph_name); + model_file_path()); } else { // If the blob is held in an EPContext node, then skip FE+Compile // and directly move on to creating a backend with the executable blob diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index e920de0b35dc2..00b8d2152dfb1 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -38,7 +38,6 @@ void printDebugInfo(const ov::CompiledModel& obj) { continue; OPENVINO_SUPPRESS_DEPRECATED_END std::cout << " " << item2.first << ": " << item2.second.as() << std::endl; - } } } else { std::cout << " " << cfg << ": " << prop.as() << std::endl; @@ -210,29 +209,23 @@ OVExeNetwork OVCore::ImportModel(std::istream& model_stream, } OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, - std::string hw_target, - const ov::AnyMap& device_config, - bool enable_causallm, - std::filesystem::path context_file_path, - std::string name) { + std::string& hw_target, + const ov::AnyMap& device_config, + bool enable_causallm, + std::filesystem::path model_file_path) { try { OVExeNetwork exe; bool isXML = backend_utils::IsModelStreamXML(model_stream); - ORT_ENFORCE(!context_file_path.string().empty(), - "The session option ep.context_file_path is not set for EPContext node with OVIR Encapsulation. " - "Current value: '" + context_file_path.string() + "'"); - // Helper function to check if file exists and is readable - const auto check_file_access = [&context_file_path](const std::filesystem::path& path) { + const auto check_file_access = [&model_file_path](const std::filesystem::path& path) { try { - const auto status = std::filesystem::status(path); - if (!std::filesystem::exists(status)) { - ORT_THROW(log_tag + "Required file missing: " + path.string()); + if (!std::filesystem::exists(path) || std::filesystem::is_empty(path)) { + ORT_THROW(log_tag + "Required file missing or empty: " + path.string()); } std::ifstream file(path); - if (!file.is_open()) { + if (!file) { ORT_THROW(log_tag + "Required file not readable: " + path.string()); } } catch (const std::exception& e) { @@ -243,7 +236,7 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, if (isXML) { // If the model is XML, we need to load it with the XML content in read_model() // where weights from bin file is directly consumed - auto xml_file_path = context_file_path.parent_path() / (context_file_path.stem().string() + ".xml"); + auto xml_file_path = model_file_path.parent_path() / (model_file_path.stem().string() + ".xml"); check_file_access(xml_file_path); @@ -265,9 +258,9 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, #endif return exe; } catch (const Exception& e) { - ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + ORT_THROW(log_tag + " Exception while Loading Network from OVIR model file: " + model_file_path.string() + e.what()); } catch (...) { - ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); + ORT_THROW(log_tag + " Exception while Loading Network from OVIR model file: " + model_file_path.string()); } } diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 1fd0144cc8f70..3208b071743d6 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -84,11 +84,10 @@ struct OVCore : WeakSingleton { const ov::AnyMap& device_config, std::string name); OVExeNetwork ImportEPCtxOVIREncapsulation(std::istream& model_stream, - std::string hw_target, - const ov::AnyMap& device_config, - bool enable_causallm, - std::filesystem::path context_file_path, - std::string name); + std::string& hw_target, + const ov::AnyMap& device_config, + bool enable_causallm, + std::filesystem::path model_file_path); std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const; From 9a77fcda77d62d1551a313eaad9f383d65a7930c Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Wed, 18 Jun 2025 15:21:36 +0530 Subject: [PATCH 4/6] fix: enable EPCtx OVIR encapsulation compiled blob caching --- onnxruntime/core/providers/openvino/backend_manager.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 82a13f22aa0e6..f40ae2b02d78f 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -199,9 +199,10 @@ BackendManager::BackendManager(SessionContext& session_context, } } } - if (session_context_.so_context_enable && !subgraph_context_.is_ep_ctx_graph) { + if (session_context_.so_context_enable && + (subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) { auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph); - if ((!status.IsOK())) { + if (!status.IsOK()) { ORT_THROW(status); } } From e40331e83a44baf73c2625187abf27cab148ea1b Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Thu, 19 Jun 2025 17:46:04 +0530 Subject: [PATCH 5/6] fix: fix merge conflicts --- onnxruntime/core/providers/openvino/ov_interface.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 63631f83b1515..f5028a218b0aa 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -211,7 +211,7 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, const ov::AnyMap& device_config, bool enable_causallm, std::filesystem::path model_file_path) { - try { + return OvExceptionBoundary([&]() { OVExeNetwork exe; bool isXML = backend_utils::IsModelStreamXML(model_stream); @@ -255,13 +255,11 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, printDebugInfo(exe.Get()); #endif return exe; - } catch (const Exception& e) { - ORT_THROW(log_tag + " Exception while Loading Network from OVIR model file: " + model_file_path.string() + e.what()); - } catch (...) { - ORT_THROW(log_tag + " Exception while Loading Network from OVIR model file: " + model_file_path.string()); - } + }, + "Exception while Loading Network from OVIR model file: {}", model_file_path.string()); } + void OVCore::SetCache(const std::string& cache_dir_path) { core.set_property(ov::cache_dir(cache_dir_path)); } From 01a26b74e65fd23b0d0660c8785c45379170a597 Mon Sep 17 00:00:00 2001 From: ankitm3k Date: Mon, 23 Jun 2025 19:03:34 +0530 Subject: [PATCH 6/6] fix: fix bugs --- onnxruntime/core/providers/openvino/backend_manager.cc | 5 +++-- .../core/providers/openvino/backends/basic_backend.cc | 6 +++--- onnxruntime/core/providers/openvino/ov_interface.cc | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index b35d3ef5cf02f..8887b183c4396 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -43,8 +43,9 @@ BackendManager::BackendManager(SessionContext& session_context, session_context_(session_context), shared_context_{shared_context} { subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph); - // If the graph contains a OVIR wrapped node, we check if it has xml file attribute - subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph, "xml"); + // If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute + subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph, + session_context_.onnx_model_path_name.filename().replace_extension("xml").string()); subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) { // return empty if graph has no inputs or if types are not one of FP32/FP16 diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 9b7ea11e5eef5..00a18bb0a45b6 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -82,9 +82,9 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr ORT_ENFORCE(!session_context_.so_context_file_path.empty() && std::filesystem::path(session_context_.so_context_file_path).is_absolute() && - std::filesystem::exists(session_context_.so_context_file_path), - log_tag + "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly." - "Please set a valid absolute path for ep.context_file_path in session options."); + std::filesystem::exists(session_context_.so_context_file_path), log_tag + + "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly." + " Please set a valid absolute path for ep.context_file_path in session options."); // Return absolute context file path as input to ImportEPCtxOVIREncapsulation() function. return session_context_.so_context_file_path; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index f5028a218b0aa..306fa6113b347 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -197,10 +197,10 @@ OVExeNetwork OVCore::ImportModel(std::istream& model_stream, return OvExceptionBoundary([&]() { ov::CompiledModel obj; obj = core.import_model(model_stream, hw_target, device_config); + OVExeNetwork exe(obj, hw_target); #ifndef NDEBUG printDebugInfo(exe.Get()); #endif - OVExeNetwork exe(obj, hw_target); return exe; }, "Exception while Loading Network for graph {}", name);