From 2d9ac13097ee5602cb7014fa08e591b877a09224 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Wed, 21 May 2025 06:05:45 -0700 Subject: [PATCH 1/3] update the statement so that we run CreateModelWithStrippedQDQNodes on GPU --- onnxruntime/core/providers/openvino/backend_manager.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 9ef7e4b86db5f..4fa7aa79e620f 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -381,7 +381,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, const auto& onnx_model_path_name = subgraph.ModelPath(); // QDQ stripping enabled only for the NPU - if (session_context_.device_type.find("NPU") != std::string::npos && + if ((session_context_.device_type.find("NPU") != std::string::npos || + session_context_.device_type.find("GPU") != std::string::npos) && (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) { std::unique_ptr model; Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights); From 0cfc14ae4970d787dd579ad403b039362b7a6e68 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Thu, 22 May 2025 04:12:04 -0700 Subject: [PATCH 2/3] ensure the capability checks are also updated --- onnxruntime/core/providers/openvino/ov_versions/capability.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index bbe5d5a4b966c..46d2f6e02c70e 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -34,7 +34,7 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler, graph_viewer_(graph_viewer_param), device_type_(std::move(device_type_param)) { bool npu_qdq_optimizer_enabled = false; - if (device_type_.find("NPU") != std::string::npos) { + if (device_type_.find("NPU") != std::string::npos || device_type_.find("GPU") != std::string::npos) { device_type_ = "CPU"; if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true; } From e9eb972fd4e608e62e6c5f790b34566e0934c2a6 Mon Sep 17 00:00:00 2001 From: bfilipek Date: Fri, 23 May 2025 01:04:27 -0700 Subject: [PATCH 3/3] update the comment Signed-off-by: bfilipek --- onnxruntime/core/providers/openvino/backend_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 4fa7aa79e620f..cf8e11826ce8b 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -380,7 +380,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, #endif const auto& onnx_model_path_name = subgraph.ModelPath(); - // QDQ stripping enabled only for the NPU + // QDQ stripping enabled only for the NPU and experimentally on the GPU if ((session_context_.device_type.find("NPU") != std::string::npos || session_context_.device_type.find("GPU") != std::string::npos) && (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {