diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 8887b183c4396..e150a7cd00ec6 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -45,7 +45,7 @@ BackendManager::BackendManager(SessionContext& session_context,
   subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
   // If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute
   subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph,
-                    session_context_.onnx_model_path_name.filename().replace_extension("xml").string());
+                                                                                              session_context_.onnx_model_path_name.filename().replace_extension("xml").string());
 
   subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
     // return empty if graph has no inputs or if types are not one of FP32/FP16
@@ -91,21 +91,20 @@ BackendManager::BackendManager(SessionContext& session_context,
   std::string device_type = session_context_.device_type;
 
   auto& sw = shared_context_.shared_weights;
-  if (session_context_.so_share_ep_contexts) {
+  if (session_context_.so_share_ep_contexts && !sw.metadata.empty()) {
     std::filesystem::path weight_filename = session_context_.onnx_model_path_name.parent_path();
-    if (sw.external_weight_filename.empty() && !sw.metadata.empty()) {
+    if (sw.external_weight_filename.empty()) {
       // Reasonable assumption that all metadata entries have the same external file location
       sw.external_weight_filename = sw.metadata.begin()->second.location;
     }
     weight_filename /= sw.external_weight_filename;
     std::ifstream weight_file(weight_filename);
 
-    if (weight_file) {
-      if (!sw.mapped_weights) {
-        sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
-      }
-      backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
+    ORT_ENFORCE(weight_file, "Initializer file not found: ", weight_filename.string());
+    if (!sw.mapped_weights) {
+      sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
     }
+    backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
   }
 
   if (ModelHasSymbolicInputDims(subgraph)) {
@@ -196,7 +195,7 @@ BackendManager::BackendManager(SessionContext& session_context,
     }
   }
   if (session_context_.so_context_enable &&
-    (subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {
+      (subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {
     auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph);
     if (!status.IsOK()) {
       ORT_THROW(status);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 00a18bb0a45b6..ee74a1b1ee4b3 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -78,24 +78,24 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
         // specify absolute path for so_context_file_path.
         auto model_file_path = [this]() {
           if (!session_context_.onnx_model_path_name.empty() &&
-          std::filesystem::exists(session_context_.onnx_model_path_name)) return session_context_.onnx_model_path_name;
+              std::filesystem::exists(session_context_.onnx_model_path_name)) return session_context_.onnx_model_path_name;
 
           ORT_ENFORCE(!session_context_.so_context_file_path.empty() &&
-          std::filesystem::path(session_context_.so_context_file_path).is_absolute() &&
-          std::filesystem::exists(session_context_.so_context_file_path), log_tag +
-          "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly."
-          " Please set a valid absolute path for ep.context_file_path in session options.");
+                          std::filesystem::path(session_context_.so_context_file_path).is_absolute() &&
+                          std::filesystem::exists(session_context_.so_context_file_path),
+                      log_tag +
+                          "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly."
+                          " Please set a valid absolute path for ep.context_file_path in session options.");
           // Return absolute context file path as input to ImportEPCtxOVIREncapsulation() function.
           return session_context_.so_context_file_path;
-
         };
         // If the EPContext node with OVIR Encapsulation, then create
         // an executable network from EP_CACHE_CONTEXT using read_model() & compile_model()
         exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream,
-                                                                    hw_target,
-                                                                    device_config,
-                                                                    enable_causallm,
-                                                                    model_file_path());
+                                                                   hw_target,
+                                                                   device_config,
+                                                                   enable_causallm,
+                                                                   model_file_path());
       } else {
         // If the blob is held in an EPContext node, then skip FE+Compile
         // and directly move on to creating a backend with the executable blob
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index e2369cf728ea6..6a2b375d733f9 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -64,6 +64,7 @@ class SharedContext : public WeakSingleton<SharedContext> {
     fs::path external_weight_filename;
     std::unique_ptr<WeightsFile> mapped_weights;
     Metadata::Map metadata;
+    fs::path metadata_filepath;
   } shared_weights;
 };
 
diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
index 49a4cb0a7e95a..9e70756a254aa 100644
--- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
@@ -131,7 +131,7 @@ std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesy
     // exported with must match the version that is currently running.
     ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_),
                 "EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() +
-                  ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_);
+                    ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_);
   }
 
   LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 5c8293a213f40..7f6a7909f1dec 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -102,15 +102,24 @@ common::Status OpenVINOExecutionProvider::Compile(
         graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
   }
 
-  // Temporary code to read metadata before it moves to the .bin
-  auto& metadata = shared_context_->shared_weights.metadata;
-  if (session_context_.so_share_ep_contexts && metadata.empty()) {
-    // Metadata is always read from model location, this could be a source or epctx model
-    fs::path metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin";
-    std::ifstream file(metadata_filename, std::ios::binary);
-    if (file) {
-      file >> metadata;
+  // The block below is executed during EP context model inference
+  auto& metadata = shared_context_->shared_weights.metadata;  // Metadata object in memory
+  if (session_context_.so_share_ep_contexts &&
+      !session_context_.so_context_enable &&
+      metadata.empty()) {
+    fs::path context_model_file_path = session_context_.so_context_file_path;
+    if (context_model_file_path.empty()) {
+      // If ep.context_file_path is not set the input model path is used
+      context_model_file_path = session_context_.onnx_model_path_name;
     }
+
+    // Metadata is always read from model location, this could be a source or epctx model
+    fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
+    fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
+    std::ifstream file(metadata_file_path, std::ios::binary);
+    ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
+    shared_context_->shared_weights.metadata_filepath = metadata_file_path;
+    file >> metadata;
   }
 
   struct OpenVINOEPFunctionState {
@@ -173,22 +182,29 @@ common::Status OpenVINOExecutionProvider::Compile(
     }
   }
 
-  if (session_context_.so_share_ep_contexts) {
-    fs::path metadata_filename;
-    if (session_context_.so_context_file_path.empty()) {
-      metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin";
-    } else {
-      metadata_filename = session_context_.so_context_file_path.parent_path() / "metadata.bin";
+  // The block below is executed during EP context model generation
+  if (session_context_.so_context_enable &&
+      session_context_.so_share_ep_contexts &&
+      !metadata.empty()) {
+    // For models after the first the metadata name comes from the shared context
+    fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
+    if (metadata_file_path.empty()) {
+      metadata_file_path = session_context_.so_context_file_path;
+      if (metadata_file_path.empty()) {
+        metadata_file_path = session_context_.onnx_model_path_name;
+      }
+      auto metadata_filename = metadata_file_path.stem().string() + "_metadata.bin";
+      metadata_file_path.replace_filename(metadata_filename);
+      shared_context_->shared_weights.metadata_filepath = metadata_file_path;
     }
 
     // Metadata is generated only for shared contexts
-    // If saving metadata then save it to the provided path or ose the original model path
+    // If saving metadata then save it to the provided path or use the original model path
     // Multiple calls to Compile() will update the metadata and for the last call
     //   the resulting file will contain the aggregated content
-    std::ofstream file(metadata_filename, std::ios::binary);
-    if (file) {
-      file << metadata;
-    }
+    std::ofstream file{metadata_file_path, std::ios::binary};
+    ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
+    file << metadata;
   }
 
   return status;
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 306fa6113b347..918940b9d9917 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -16,7 +16,7 @@ namespace onnxruntime {
 namespace openvino_ep {
 
 template <typename Func, typename... Args>
-inline auto OvExceptionBoundary(Func &&func, std::format_string<Args...>&& fmt, Args&&... args) {
+inline auto OvExceptionBoundary(Func&& func, std::format_string<Args...>&& fmt, Args&&... args) {
   try {
     return func();
   } catch (const ov::Exception& e) {
@@ -47,462 +47,462 @@ void printDebugInfo(const ov::CompiledModel& obj) {
               continue;
             OPENVINO_SUPPRESS_DEPRECATED_END
             std::cout << "    " << item2.first << ": " << item2.second.as<std::string>() << std::endl;
+          }
+        }
+        else {
+          std::cout << "  " << cfg << ": " << prop.as<std::string>() << std::endl;
         }
-      } else {
-        std::cout << "  " << cfg << ": " << prop.as<std::string>() << std::endl;
       }
     }
   }
-}
 #endif
 
-// Function to check if a given OV property is enabled
-std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type) {
-  try {
-    // Get the property value
-    auto supported_properties = OVCore::Get()->core.get_property(device_type, ov::supported_properties);
-    return std::find(supported_properties.begin(), supported_properties.end(), property) != supported_properties.end();
-  } catch (const std::exception&) {
-    return std::nullopt;  // Property not found or invalid
-  }
-}
-
-std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::string& model_path) {
-  return OvExceptionBoundary([&]() {
-    std::istringstream modelStringStream(std::move(model));
-    std::istream& modelStream = modelStringStream;
-    // Try to load with FrontEndManager
-    ov::frontend::FrontEndManager manager;
-    ov::frontend::FrontEnd::Ptr FE;
-    ov::frontend::InputModel::Ptr inputModel;
-
-    ov::AnyVector params{&modelStream, model_path};
-
-    FE = manager.load_by_model(params);
-    if (FE) {
-      inputModel = FE->load(params);
-      return FE->convert(inputModel);
-    } else {
-      ORT_THROW(log_tag + "Unknown exception while Reading network");
+  // Function to check if a given OV property is enabled
+  std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type) {
+    try {
+      // Get the property value
+      auto supported_properties = OVCore::Get()->core.get_property(device_type, ov::supported_properties);
+      return std::find(supported_properties.begin(), supported_properties.end(), property) != supported_properties.end();
+    } catch (const std::exception&) {
+      return std::nullopt;  // Property not found or invalid
     }
-  },
-                             "Exception while Reading network");
-}
-
-OVExeNetwork OVCore::StatefulCompileModel(std::shared_ptr<OVNetwork>& model,
-                                          std::string& hw_target,
-                                          const ov::AnyMap& device_config) {
-  ov::CompiledModel compiled_model;
-  ov::AnyMap config = device_config;
-
-  if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
-    std::cout << "Stateless OV Model Statistic:" << std::endl;
-    LogBasicModelInfo(model);
   }
 
-  bool model_status = IsStateful(model);
-  LOGS_DEFAULT(INFO) << log_tag << "Model IsStateful() Status:\t" << (model_status ? "True" : "False");
-  if (!model_status) {
-    LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl;
-    PatchStatefulDecoder(model);
-  }
-
-  if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
-    std::cout << "Stateful OV Model Statistic:" << std::endl;
-    LogBasicModelInfo(model);
+  std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string && model, const std::string& model_path) {
+    return OvExceptionBoundary([&]() {
+      std::istringstream modelStringStream(std::move(model));
+      std::istream& modelStream = modelStringStream;
+      // Try to load with FrontEndManager
+      ov::frontend::FrontEndManager manager;
+      ov::frontend::FrontEnd::Ptr FE;
+      ov::frontend::InputModel::Ptr inputModel;
+
+      ov::AnyVector params{&modelStream, model_path};
+
+      FE = manager.load_by_model(params);
+      if (FE) {
+        inputModel = FE->load(params);
+        return FE->convert(inputModel);
+      } else {
+        ORT_THROW(log_tag + "Unknown exception while Reading network");
+      }
+    },
+                               "Exception while Reading network");
   }
 
-  auto kv_pos = GetKVAxesPos(model);
+  OVExeNetwork OVCore::StatefulCompileModel(std::shared_ptr<OVNetwork> & model,
+                                            std::string & hw_target,
+                                            const ov::AnyMap& device_config) {
+    ov::CompiledModel compiled_model;
+    ov::AnyMap config = device_config;
 
-  if (hw_target.find("NPU") != std::string::npos) {
-    KVDesc kv_desc;
-    auto parse_genai_config = [&](const std::string& key, unsigned int default_value) {
-      return (config.count(key) && !config.at(key).empty() && config.at(key).as<std::string>() != "0") ? config.at(key).as<unsigned int>() : default_value;
-    };
-
-    kv_desc.max_prompt_len = parse_genai_config("MAX_PROMPT_LEN", CausalLMConfig().max_prompt_len);
-    kv_desc.min_response_len = parse_genai_config("MIN_RESPONSE_LEN", CausalLMConfig().min_response_len);
+    if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
+      std::cout << "Stateless OV Model Statistic:" << std::endl;
+      LogBasicModelInfo(model);
+    }
 
-    // For compilation, MAX_PROMPT_LEN & MIN_RESPONSE_LEN should not be 0
-    if (kv_desc.max_prompt_len == 0 || kv_desc.min_response_len == 0) {
-      ORT_THROW(log_tag + "MAX_PROMPT_LEN and MIN_RESPONSE_LEN cannot be 0 or empty");
+    bool model_status = IsStateful(model);
+    LOGS_DEFAULT(INFO) << log_tag << "Model IsStateful() Status:\t" << (model_status ? "True" : "False");
+    if (!model_status) {
+      LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl;
+      PatchStatefulDecoder(model);
     }
 
     if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
-      std::cout << "kv_pos.batch = " << kv_pos.batch << std::endl;
-      std::cout << "kv_pos.seq_len = " << kv_pos.seq_len << std::endl;
-      std::cout << "kv_desc.max_prompt_len:\t" << kv_desc.max_prompt_len << std::endl;
-      std::cout << "kv_desc.min_response_len:\t" << kv_desc.min_response_len << std::endl;
+      std::cout << "Stateful OV Model Statistic:" << std::endl;
+      LogBasicModelInfo(model);
     }
 
-    UpdateNPUConfig(config, kv_pos, kv_desc);
-  } else {
-    // This patches the OV IR model so that it only produces the logits required for sampling.
-    // Actually either way that happens within NPUW::LLMCompiledModel creation for NPU device,
-    // while this is here mostly to align this behavior for other devices viz. (CPU, GPU).
-    ApplySliceBeforeMatmulTransformation(model);
-  }
+    auto kv_pos = GetKVAxesPos(model);
 
-  LOGS_DEFAULT(INFO) << log_tag << "Compiling OV Model using Stateful Transformation flow";
-  compiled_model = OVCore::Get()->core.compile_model(model, hw_target, config);
-  OVExeNetwork exe(compiled_model, hw_target, true);
-  return exe;
-}
+    if (hw_target.find("NPU") != std::string::npos) {
+      KVDesc kv_desc;
+      auto parse_genai_config = [&](const std::string& key, unsigned int default_value) {
+        return (config.count(key) && !config.at(key).empty() && config.at(key).as<std::string>() != "0") ? config.at(key).as<unsigned int>() : default_value;
+      };
+
+      kv_desc.max_prompt_len = parse_genai_config("MAX_PROMPT_LEN", CausalLMConfig().max_prompt_len);
+      kv_desc.min_response_len = parse_genai_config("MIN_RESPONSE_LEN", CausalLMConfig().min_response_len);
 
-OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
-                                  std::string& hw_target,
-                                  ov::AnyMap& device_config,
-                                  bool enable_causallm,
-                                  const std::string& name) {
-  return OvExceptionBoundary([&]() {
-    OVExeNetwork exe;
-    if (enable_causallm) {
-    auto mutable_model = ie_cnn_network->clone();
-    exe = OVCore::Get()->StatefulCompileModel(mutable_model, hw_target, device_config);
+      // For compilation, MAX_PROMPT_LEN & MIN_RESPONSE_LEN should not be 0
+      if (kv_desc.max_prompt_len == 0 || kv_desc.min_response_len == 0) {
+        ORT_THROW(log_tag + "MAX_PROMPT_LEN and MIN_RESPONSE_LEN cannot be 0 or empty");
+      }
+
+      if (onnxruntime::openvino_ep::backend_utils::IsDebugEnabled()) {
+        std::cout << "kv_pos.batch = " << kv_pos.batch << std::endl;
+        std::cout << "kv_pos.seq_len = " << kv_pos.seq_len << std::endl;
+        std::cout << "kv_desc.max_prompt_len:\t" << kv_desc.max_prompt_len << std::endl;
+        std::cout << "kv_desc.min_response_len:\t" << kv_desc.min_response_len << std::endl;
+      }
+
+      UpdateNPUConfig(config, kv_pos, kv_desc);
     } else {
-    auto obj = core.compile_model(ie_cnn_network, hw_target, device_config);
-    exe = OVExeNetwork(obj, hw_target);
+      // This patches the OV IR model so that it only produces the logits required for sampling.
+      // Actually either way that happens within NPUW::LLMCompiledModel creation for NPU device,
+      // while this is here mostly to align this behavior for other devices viz. (CPU, GPU).
+      ApplySliceBeforeMatmulTransformation(model);
     }
 
+    LOGS_DEFAULT(INFO) << log_tag << "Compiling OV Model using Stateful Transformation flow";
+    compiled_model = OVCore::Get()->core.compile_model(model, hw_target, config);
+    OVExeNetwork exe(compiled_model, hw_target, true);
+    return exe;
+  }
+
+  OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork> & ie_cnn_network,
+                                    std::string & hw_target,
+                                    ov::AnyMap & device_config,
+                                    bool enable_causallm,
+                                    const std::string& name) {
+    return OvExceptionBoundary([&]() {
+      OVExeNetwork exe;
+      if (enable_causallm) {
+        auto mutable_model = ie_cnn_network->clone();
+        exe = OVCore::Get()->StatefulCompileModel(mutable_model, hw_target, device_config);
+      } else {
+        auto obj = core.compile_model(ie_cnn_network, hw_target, device_config);
+        exe = OVExeNetwork(obj, hw_target);
+      }
+
 #ifndef NDEBUG
-    printDebugInfo(exe.Get());
+      printDebugInfo(exe.Get());
 #endif
 
-    return exe;
-  },
-                             "Exception while Loading Network for graph {}", name);
-}
+      return exe;
+    },
+                               "Exception while Loading Network for graph {}", name);
+  }
 
-OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
-                                  std::string& hw_target,
-                                  ov::AnyMap& device_config,
-                                  const std::string& name) {
-  return OvExceptionBoundary([&]() {
-    ov::CompiledModel obj;
+  OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
+                                    std::string& hw_target,
+                                    ov::AnyMap& device_config,
+                                    const std::string& name) {
+    return OvExceptionBoundary([&]() {
+      ov::CompiledModel obj;
 
-    obj = core.compile_model(onnx_model, ov::Tensor(), hw_target, device_config);
+      obj = core.compile_model(onnx_model, ov::Tensor(), hw_target, device_config);
 #ifndef NDEBUG
-    printDebugInfo(obj);
+      printDebugInfo(obj);
 #endif
-    OVExeNetwork exe(obj, hw_target);
-    return exe;
-  },
-                             "Exception while Loading Network for graph {}", name);
-}
+      OVExeNetwork exe(obj, hw_target);
+      return exe;
+    },
+                               "Exception while Loading Network for graph {}", name);
+  }
 
-OVExeNetwork OVCore::ImportModel(std::istream& model_stream,
-                                 std::string hw_target,
-                                 const ov::AnyMap& device_config,
-                                 std::string name) {
-  return OvExceptionBoundary([&]() {
-    ov::CompiledModel obj;
-    obj = core.import_model(model_stream, hw_target, device_config);
-    OVExeNetwork exe(obj, hw_target);
+  OVExeNetwork OVCore::ImportModel(std::istream & model_stream,
+                                   std::string hw_target,
+                                   const ov::AnyMap& device_config,
+                                   std::string name) {
+    return OvExceptionBoundary([&]() {
+      ov::CompiledModel obj;
+      obj = core.import_model(model_stream, hw_target, device_config);
+      OVExeNetwork exe(obj, hw_target);
 #ifndef NDEBUG
-    printDebugInfo(exe.Get());
+      printDebugInfo(exe.Get());
 #endif
-    return exe;
-  },
-                             "Exception while Loading Network for graph {}", name);
-}
+      return exe;
+    },
+                               "Exception while Loading Network for graph {}", name);
+  }
 
-OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream,
-                                                  std::string& hw_target,
-                                                  const ov::AnyMap& device_config,
-                                                  bool enable_causallm,
-                                                  std::filesystem::path model_file_path) {
-  return OvExceptionBoundary([&]() {
-    OVExeNetwork exe;
-
-    bool isXML = backend_utils::IsModelStreamXML(model_stream);
-
-    // Helper function to check if file exists and is readable
-    const auto check_file_access = [&model_file_path](const std::filesystem::path& path) {
-      try {
-        if (!std::filesystem::exists(path) || std::filesystem::is_empty(path)) {
-          ORT_THROW(log_tag + "Required file missing or empty: " + path.string());
-        }
-        std::ifstream file(path);
-        if (!file) {
-          ORT_THROW(log_tag + "Required file not readable: " + path.string());
+  OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream & model_stream,
+                                                    std::string & hw_target,
+                                                    const ov::AnyMap& device_config,
+                                                    bool enable_causallm,
+                                                    std::filesystem::path model_file_path) {
+    return OvExceptionBoundary([&]() {
+      OVExeNetwork exe;
+
+      bool isXML = backend_utils::IsModelStreamXML(model_stream);
+
+      // Helper function to check if file exists and is readable
+      const auto check_file_access = [&model_file_path](const std::filesystem::path& path) {
+        try {
+          if (!std::filesystem::exists(path) || std::filesystem::is_empty(path)) {
+            ORT_THROW(log_tag + "Required file missing or empty: " + path.string());
+          }
+          std::ifstream file(path);
+          if (!file) {
+            ORT_THROW(log_tag + "Required file not readable: " + path.string());
+          }
+        } catch (const std::exception& e) {
+          ORT_THROW(log_tag + "Exception while checking file access for: " + path.string() + " - " + e.what());
         }
-      } catch (const std::exception& e) {
-        ORT_THROW(log_tag + "Exception while checking file access for: " + path.string() + " - " + e.what());
-      }
-    };
+      };
 
-    if (isXML) {
-      // If the model is XML, we need to load it with the XML content in read_model()
-      // where weights from bin file is directly consumed
-      auto xml_file_path = model_file_path.parent_path() / (model_file_path.stem().string() + ".xml");
+      if (isXML) {
+        // If the model is XML, we need to load it with the XML content in read_model()
+        // where weights from bin file is directly consumed
+        auto xml_file_path = model_file_path.parent_path() / (model_file_path.stem().string() + ".xml");
 
-      check_file_access(xml_file_path);
+        check_file_access(xml_file_path);
 
-      LOGS_DEFAULT(INFO) << log_tag << "Reading OVIR from XML file path: " << xml_file_path.string();
+        LOGS_DEFAULT(INFO) << log_tag << "Reading OVIR from XML file path: " << xml_file_path.string();
 
-      // Load the model explicitly with XML contents
-      std::shared_ptr<ov::Model> model = core.read_model(xml_file_path.string());
+        // Load the model explicitly with XML contents
+        std::shared_ptr<ov::Model> model = core.read_model(xml_file_path.string());
 
-      if (enable_causallm) {
-        exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config);
-      } else {
-        auto obj = core.compile_model(model, hw_target, device_config);
-        exe = OVExeNetwork(obj, hw_target);
+        if (enable_causallm) {
+          exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config);
+        } else {
+          auto obj = core.compile_model(model, hw_target, device_config);
+          exe = OVExeNetwork(obj, hw_target);
+        }
       }
-    }
 
 #ifndef NDEBUG
-    printDebugInfo(exe.Get());
+      printDebugInfo(exe.Get());
 #endif
-    return exe;
-  },
-                             "Exception while Loading Network from OVIR model file: {}", model_file_path.string());
-}
-
-
-void OVCore::SetCache(const std::string& cache_dir_path) {
-  core.set_property(ov::cache_dir(cache_dir_path));
-}
-
-std::vector<std::string> OVCore::GetAvailableDevices() const {
-  std::vector<std::string> available_devices = core.get_available_devices();
-  return available_devices;
-}
-
-std::vector<std::string> OVCore::GetAvailableDevices(const std::string& device_type) const {
-  std::vector<std::string> available_devices;
-  std::vector<std::string> devicesIDs;
-  // Uses logic from OpenVINO to only return available devices of the specified type (e.g. CPU, NPU or GPU)
-  try {
-    devicesIDs = core.get_property(device_type, ov::available_devices);
-  } catch (const ov::Exception&) {
-    // plugin is not created by e.g. invalid env
-    // Empty device list will be returned
-  } catch (const std::exception& ex) {
-    ORT_THROW(log_tag + "An exception occurred while trying to create the ",
-              device_type,
-              " device: ",
-              ex.what());
-  } catch (...) {
-    ORT_THROW(log_tag + "Unknown exception occurred while trying to create the ",
-              device_type,
-              " device");
+      return exe;
+    },
+                               "Exception while Loading Network from OVIR model file: {}", model_file_path.string());
   }
 
-  if (devicesIDs.size() > 1 ||
-      (devicesIDs.size() == 1 && devicesIDs[0] == "0")) {
-    for (const auto& deviceID : devicesIDs) {
-      available_devices.push_back(device_type + '.' + deviceID);
-    }
-  }
-  if (!devicesIDs.empty()) {
-    available_devices.push_back(device_type);
+  void OVCore::SetCache(const std::string& cache_dir_path) {
+    core.set_property(ov::cache_dir(cache_dir_path));
   }
 
-  return available_devices;
-}
-
-void OVCore::SetStreams(const std::string& device_type, int num_streams) {
-  core.set_property(device_type, {ov::num_streams(num_streams)});
-}
+  std::vector<std::string> OVCore::GetAvailableDevices() const {
+    std::vector<std::string> available_devices = core.get_available_devices();
+    return available_devices;
+  }
 
-std::shared_ptr<OVInferRequest> OVExeNetwork::CreateInferRequest() {
-   return OvExceptionBoundary([&]() {
-    auto infReq = compiled_model_obj.create_infer_request();
-    std::shared_ptr<OVInferRequest> ovInfReq;
-    if (is_stateful_causallm) {
-      ovInfReq = std::make_shared<StatefulOVInferRequest>(std::move(infReq), target_device);
-    } else {
-      ovInfReq = std::make_shared<OVInferRequest>(std::move(infReq));
+  std::vector<std::string> OVCore::GetAvailableDevices(const std::string& device_type) const {
+    std::vector<std::string> available_devices;
+    std::vector<std::string> devicesIDs;
+    // Uses logic from OpenVINO to only return available devices of the specified type (e.g. CPU, NPU or GPU)
+    try {
+      devicesIDs = core.get_property(device_type, ov::available_devices);
+    } catch (const ov::Exception&) {
+      // plugin is not created by e.g. invalid env
+      // Empty device list will be returned
+    } catch (const std::exception& ex) {
+      ORT_THROW(log_tag + "An exception occurred while trying to create the ",
+                device_type,
+                " device: ",
+                ex.what());
+    } catch (...) {
+      ORT_THROW(log_tag + "Unknown exception occurred while trying to create the ",
+                device_type,
+                " device");
     }
-    return ovInfReq;
-  },
-
-                             "Exception while creating InferRequest object");
-}
 
-OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
-  return OvExceptionBoundary([&]() {
-    auto tobj = ovInfReq.get_tensor(input_name);
-    OVTensorPtr blob = std::make_shared<OVTensor>(tobj);
-    return blob;
-  },
-                             " Cannot access IE Blob for input: {}", input_name);
-}
+    if (devicesIDs.size() > 1 ||
+        (devicesIDs.size() == 1 && devicesIDs[0] == "0")) {
+      for (const auto& deviceID : devicesIDs) {
+        available_devices.push_back(device_type + '.' + deviceID);
+      }
+    }
+    if (!devicesIDs.empty()) {
+      available_devices.push_back(device_type);
+    }
 
-std::string OVInferRequest::GetInputTensorName(uint32_t index) {
-  return OvExceptionBoundary([&]() {
-    const auto& model = ovInfReq.get_compiled_model();
-    return *model.input(index).get_names().begin();
-  },
-                             " Cannot access IE Blob for input number: {}", index);
-}
+    return available_devices;
+  }
 
-void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) {
-  OvExceptionBoundary([&]() {
-    ovInfReq.set_tensor(name, *(blob.get()));
-  },
-                      " Cannot set Remote Blob for output: {}", name);
-}
+  void OVCore::SetStreams(const std::string& device_type, int num_streams) {
+    core.set_property(device_type, {ov::num_streams(num_streams)});
+  }
 
-uint32_t OVInferRequest::GetNumInputs() {
-  return static_cast<uint32_t>(ovInfReq.get_compiled_model().inputs().size());
-}
+  std::shared_ptr<OVInferRequest> OVExeNetwork::CreateInferRequest() {
+    return OvExceptionBoundary([&]() {
+      auto infReq = compiled_model_obj.create_infer_request();
+      std::shared_ptr<OVInferRequest> ovInfReq;
+      if (is_stateful_causallm) {
+        ovInfReq = std::make_shared<StatefulOVInferRequest>(std::move(infReq), target_device);
+      } else {
+        ovInfReq = std::make_shared<OVInferRequest>(std::move(infReq));
+      }
+      return ovInfReq;
+    },
 
-void OVInferRequest::Infer() {
-  OvExceptionBoundary([&]() {
-    ovInfReq.infer();
-  },
-                      "In Error Couldn't start Inference");
-}
+                               "Exception while creating InferRequest object");
+  }
 
-StatefulOVInferRequest::StatefulOVInferRequest(ov::InferRequest infer_request, std::string device)
-    : OVInferRequest(std::move(infer_request)), target_device(device) {
-  bool gpu_or_npu = ((device.find("NPU") != std::string::npos) || (device.find("GPU") != std::string::npos));
-  if (gpu_or_npu) {
-    prefill_use_full_chat_history = true;
+  OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
+    return OvExceptionBoundary([&]() {
+      auto tobj = ovInfReq.get_tensor(input_name);
+      OVTensorPtr blob = std::make_shared<OVTensor>(tobj);
+      return blob;
+    },
+                               " Cannot access IE Blob for input: {}", input_name);
   }
-}
 
-void StatefulOVInferRequest::FillTensor(const std::string& tensor_name, const ov::element::Type& type,
-                                        const std::vector<size_t>& shape, int32_t fill_value) {
-  ov::Tensor tensor = ov::Tensor(type, shape);
-  std::fill_n(tensor.data<int32_t>(), tensor.get_size(), fill_value);
-  ovInfReq.set_tensor(tensor_name, tensor);
-}
+  std::string OVInferRequest::GetInputTensorName(uint32_t index) {
+    return OvExceptionBoundary([&]() {
+      const auto& model = ovInfReq.get_compiled_model();
+      return *model.input(index).get_names().begin();
+    },
+                               " Cannot access IE Blob for input number: {}", index);
+  }
 
-void StatefulOVInferRequest::CacheTensor(const std::string& tensor_name, std::vector<int64_t>& cache) {
-  auto tensor = ovInfReq.get_tensor(tensor_name);
-  auto* pData = tensor.data<int64_t>();
-  for (size_t i = 0; i < tensor.get_size(); i++) {
-    cache.emplace_back(pData[i]);
+  void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) {
+    OvExceptionBoundary([&]() {
+      ovInfReq.set_tensor(name, *(blob.get()));
+    },
+                        " Cannot set Remote Blob for output: {}", name);
   }
-}
 
-void StatefulOVInferRequest::SetTensorFromCache(const std::string& tensor_name,
-                                                const std::vector<int64_t>& cache_data) {
-  auto tensor = ovInfReq.get_tensor(tensor_name);
-  auto new_shape = tensor.get_shape();
-  new_shape[1] = cache_data.size();
+  uint32_t OVInferRequest::GetNumInputs() {
+    return static_cast<uint32_t>(ovInfReq.get_compiled_model().inputs().size());
+  }
 
-  auto new_tensor = ov::Tensor(tensor.get_element_type(), new_shape);
-  auto* pNewData = new_tensor.data<int64_t>();
-  std::memcpy(pNewData, cache_data.data(), cache_data.size() * sizeof(int64_t));
+  void OVInferRequest::Infer() {
+    OvExceptionBoundary([&]() {
+      ovInfReq.infer();
+    },
+                        "In Error Couldn't start Inference");
+  }
 
-  ovInfReq.set_tensor(tensor_name, new_tensor);
-}
+  StatefulOVInferRequest::StatefulOVInferRequest(ov::InferRequest infer_request, std::string device)
+      : OVInferRequest(std::move(infer_request)), target_device(device) {
+    bool gpu_or_npu = ((device.find("NPU") != std::string::npos) || (device.find("GPU") != std::string::npos));
+    if (gpu_or_npu) {
+      prefill_use_full_chat_history = true;
+    }
+  }
 
-std::optional<ov::Tensor> StatefulOVInferRequest::FindTensor(const std::string& tensor_name) {
-  // Check if tensor exists by examining input names in the compiled model
-  const auto& model = ovInfReq.get_compiled_model();
-  bool tensor_exists = false;
+  void StatefulOVInferRequest::FillTensor(const std::string& tensor_name, const ov::element::Type& type,
+                                          const std::vector<size_t>& shape, int32_t fill_value) {
+    ov::Tensor tensor = ov::Tensor(type, shape);
+    std::fill_n(tensor.data<int32_t>(), tensor.get_size(), fill_value);
+    ovInfReq.set_tensor(tensor_name, tensor);
+  }
 
-  for (const auto& input : model.inputs()) {
-    const auto& names = input.get_names();
-    if (names.find(tensor_name) != names.end()) {
-      tensor_exists = true;
-      break;
+  void StatefulOVInferRequest::CacheTensor(const std::string& tensor_name, std::vector<int64_t>& cache) {
+    auto tensor = ovInfReq.get_tensor(tensor_name);
+    auto* pData = tensor.data<int64_t>();
+    for (size_t i = 0; i < tensor.get_size(); i++) {
+      cache.emplace_back(pData[i]);
     }
   }
 
-  if (tensor_exists) {
-    return ovInfReq.get_tensor(tensor_name);
-  }
+  void StatefulOVInferRequest::SetTensorFromCache(const std::string& tensor_name,
+                                                  const std::vector<int64_t>& cache_data) {
+    auto tensor = ovInfReq.get_tensor(tensor_name);
+    auto new_shape = tensor.get_shape();
+    new_shape[1] = cache_data.size();
 
-  return std::nullopt;
-}
+    auto new_tensor = ov::Tensor(tensor.get_element_type(), new_shape);
+    auto* pNewData = new_tensor.data<int64_t>();
+    std::memcpy(pNewData, cache_data.data(), cache_data.size() * sizeof(int64_t));
 
-void StatefulOVInferRequest::PreProcessInferRequest() {
-  // Workaround: Setting the value here as it cannot be set at the ORT GenAI layer currently.
-  // TODO(ankit): Address this issue and implement the fix at the appropriate layer.
-  FillTensor("beam_idx", ov::element::i32, {1}, 0);
+    ovInfReq.set_tensor(tensor_name, new_tensor);
+  }
 
-  // If 'prefill use full chat history' mode is enabled, we need to cache input_ids and position_ids.
-  if (prefill_use_full_chat_history) {
-    auto input_ids_tensor = ovInfReq.get_tensor("input_ids");
-    CacheTensor("input_ids", cached_input_ids);
+  std::optional<ov::Tensor> StatefulOVInferRequest::FindTensor(const std::string& tensor_name) {
+    // Check if tensor exists by examining input names in the compiled model
+    const auto& model = ovInfReq.get_compiled_model();
+    bool tensor_exists = false;
 
-    // "position_ids" (GQA with Rotary Embeddings doesnt have position_ids) - check if exists
-    auto position_ids_opt = FindTensor("position_ids");
-    bool has_position_ids = position_ids_opt.has_value();
+    for (const auto& input : model.inputs()) {
+      const auto& names = input.get_names();
+      if (names.find(tensor_name) != names.end()) {
+        tensor_exists = true;
+        break;
+      }
+    }
 
-    if (has_position_ids) {
-      CacheTensor("position_ids", cached_position_ids);
+    if (tensor_exists) {
+      return ovInfReq.get_tensor(tensor_name);
     }
 
-    // If we're about to run the prefill model
-    if (input_ids_tensor.get_size() > 1) {
-      // Check if the size of the current "input_ids" tensor does not match the size of the cached "input_ids".
-      // This indicates that we are running a subsequent prompt (not the initial prefill).
-      if (input_ids_tensor.get_shape()[1] != cached_input_ids.size()) {
-        // Clear the internal KVCache state. For NPU device, this operation is a no-op.
-        ovInfReq.reset_state();
+    return std::nullopt;
+  }
+
+  void StatefulOVInferRequest::PreProcessInferRequest() {
+    // Workaround: Setting the value here as it cannot be set at the ORT GenAI layer currently.
+    // TODO(ankit): Address this issue and implement the fix at the appropriate layer.
+    FillTensor("beam_idx", ov::element::i32, {1}, 0);
+
+    // If 'prefill use full chat history' mode is enabled, we need to cache input_ids and position_ids.
+    if (prefill_use_full_chat_history) {
+      auto input_ids_tensor = ovInfReq.get_tensor("input_ids");
+      CacheTensor("input_ids", cached_input_ids);
 
-        // Set tensors using cached values
-        SetTensorFromCache("input_ids", cached_input_ids);
+      // "position_ids" (GQA with Rotary Embeddings doesnt have position_ids) - check if exists
+      auto position_ids_opt = FindTensor("position_ids");
+      bool has_position_ids = position_ids_opt.has_value();
 
-        // Only set position_ids if it exists and we have cached values
-        if (has_position_ids && !cached_position_ids.empty()) {
-          SetTensorFromCache("position_ids", cached_position_ids);
+      if (has_position_ids) {
+        CacheTensor("position_ids", cached_position_ids);
+      }
+
+      // If we're about to run the prefill model
+      if (input_ids_tensor.get_size() > 1) {
+        // Check if the size of the current "input_ids" tensor does not match the size of the cached "input_ids".
+        // This indicates that we are running a subsequent prompt (not the initial prefill).
+        if (input_ids_tensor.get_shape()[1] != cached_input_ids.size()) {
+          // Clear the internal KVCache state. For NPU device, this operation is a no-op.
+          ovInfReq.reset_state();
+
+          // Set tensors using cached values
+          SetTensorFromCache("input_ids", cached_input_ids);
+
+          // Only set position_ids if it exists and we have cached values
+          if (has_position_ids && !cached_position_ids.empty()) {
+            SetTensorFromCache("position_ids", cached_position_ids);
+          }
         }
       }
     }
   }
-}
 
-void StatefulOVInferRequest::Infer() {
-  PreProcessInferRequest();
-  OVInferRequest::Infer();
-}
+  void StatefulOVInferRequest::Infer() {
+    PreProcessInferRequest();
+    OVInferRequest::Infer();
+  }
 
-void StatefulOVInferRequest::RewindKVCache(size_t index) {
-  LOGS_DEFAULT(INFO) << log_tag << "RewindKVCache: Rewinding OpenVINO-internal KVCache state to index=" << index;
+  void StatefulOVInferRequest::RewindKVCache(size_t index) {
+    LOGS_DEFAULT(INFO) << log_tag << "RewindKVCache: Rewinding OpenVINO-internal KVCache state to index=" << index;
 
-  if (prefill_use_full_chat_history) {
-    // Clear the internal KVCache state. For NPU device, this operation is a no-op.
-    ovInfReq.reset_state();
+    if (prefill_use_full_chat_history) {
+      // Clear the internal KVCache state. For NPU device, this operation is a no-op.
+      ovInfReq.reset_state();
 
-    // Resize the cached "input_ids" and "position_ids" to the specified index.
-    if (cached_input_ids.size() > index) {
-      cached_input_ids.resize(index);
-    }
+      // Resize the cached "input_ids" and "position_ids" to the specified index.
+      if (cached_input_ids.size() > index) {
+        cached_input_ids.resize(index);
+      }
 
-    if (cached_position_ids.size() > index) {
-      cached_position_ids.resize(index);
-    }
-  } else {
-    if (index == 0) {
-      // In this case, since we're resetting the entire KVCache, simply reset the state.
-      ovInfReq.reset_state();
+      if (cached_position_ids.size() > index) {
+        cached_position_ids.resize(index);
+      }
     } else {
-      // Retrieve KVCache states and trim them to the specified index.
-      // The following logic is adapted from:
-      // https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/1/src/cpp/src/utils.cpp#L329
-      auto states = ovInfReq.query_state();
-      for (auto& state : states) {
-        ov::Tensor old_tensor = state.get_state();
-        // Tensor shape: [batch_size, num_kv_heads, seq_len, head_size]
-        auto shape = old_tensor.get_shape();
-
-        if (shape[2] > index) {
-          // Update the sequence length dimension to the specified index.
-          shape[2] = index;
-
-          ov::Coordinate new_shape_begin{0, 0, 0, 0};
-          ov::Coordinate new_shape_end{shape};
-
-          // Create a trimmed tensor with the updated shape.
-          auto trimmed_tensor = ov::Tensor(old_tensor, new_shape_begin, new_shape_end);
-
-          // Copy the trimmed tensor into a new tensor and update the state.
-          ov::Tensor new_tensor(old_tensor.get_element_type(), shape);
-          trimmed_tensor.copy_to(new_tensor);
-
-          state.set_state(new_tensor);
+      if (index == 0) {
+        // In this case, since we're resetting the entire KVCache, simply reset the state.
+        ovInfReq.reset_state();
+      } else {
+        // Retrieve KVCache states and trim them to the specified index.
+        // The following logic is adapted from:
+        // https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/1/src/cpp/src/utils.cpp#L329
+        auto states = ovInfReq.query_state();
+        for (auto& state : states) {
+          ov::Tensor old_tensor = state.get_state();
+          // Tensor shape: [batch_size, num_kv_heads, seq_len, head_size]
+          auto shape = old_tensor.get_shape();
+
+          if (shape[2] > index) {
+            // Update the sequence length dimension to the specified index.
+            shape[2] = index;
+
+            ov::Coordinate new_shape_begin{0, 0, 0, 0};
+            ov::Coordinate new_shape_end{shape};
+
+            // Create a trimmed tensor with the updated shape.
+            auto trimmed_tensor = ov::Tensor(old_tensor, new_shape_begin, new_shape_end);
+
+            // Copy the trimmed tensor into a new tensor and update the state.
+            ov::Tensor new_tensor(old_tensor.get_element_type(), shape);
+            trimmed_tensor.copy_to(new_tensor);
+
+            state.set_state(new_tensor);
+          }
         }
       }
     }
   }
-}
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index 0e019342bc86e..fb1757199698b 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -117,7 +117,7 @@ class OVInferRequest {
     const void* ort_ptr;
   };
 
-  protected:
+ protected:
   ov::InferRequest ovInfReq;
   std::unordered_map<std::string, ov_tensor_data_t> bindings_cache_;
 
@@ -127,7 +127,7 @@ class OVInferRequest {
   std::string GetInputTensorName(uint32_t index);
 
   // Set tensor described param_info and ort_ptr. Overrides shape in param_info with shape_override. Call infer req tensor if ort_ptr is last set.
-  void SetTensor(const std::string& name, const ov::element::Type &type, const ov::Shape& shape, void* ort_ptr) {
+  void SetTensor(const std::string& name, const ov::element::Type& type, const ov::Shape& shape, void* ort_ptr) {
     auto& cached_binding = bindings_cache_[name];
     if (cached_binding.ort_ptr != ort_ptr) {
       auto tensor_ptr = std::make_shared<ov::Tensor>(type, shape, const_cast<void*>(ort_ptr));
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 45ea822685710..88ddde8610c6e 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -38,7 +38,7 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
     device_type_ = "CPU";
     if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
   } else if (enable_qdq_optimizer && device_type_.find("GPU") != std::string::npos) {
-    npu_qdq_optimizer_enabled = true;   // see data_ops.cc ~615 where we check for int16 types for gpu, this may change to a better approach later
+    npu_qdq_optimizer_enabled = true;  // see data_ops.cc ~615 where we check for int16 types for gpu, this may change to a better approach later
   }
 
 #if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 99d6e4b7ab5ef..27d8dd7822c41 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -615,7 +615,7 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
         }
         // experimentally for GPU and qdq stripping mode allow int16 types
         if (npu_qdq_optimizer_enabled_ && (dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16 || dtype == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16))
-            return true;
+          return true;
       }
 #ifndef NDEBUG
       if (openvino_ep::backend_utils::IsDebugEnabled()) {