@@ -167,7 +167,10 @@ BackendManager::BackendManager(SessionContext& session_context,
167167 exception_str.find (" intel_npu" ) != std::string::npos) {
168168 // Handle NPU device related errors
169169#ifndef NDEBUG
170- ORT_THROW (exception_str + " \n Model needs to be recompiled\n " );
170+ std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
171+ " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " :
172+ " \n Model needs to be recompiled\n " ;
173+ ORT_THROW (exception_str + suffix);
171174#else
172175 std::string error_message = " UNKNOWN NPU ERROR" ;
173176 std::string error_code = " code 0x0" ;
@@ -180,7 +183,10 @@ BackendManager::BackendManager(SessionContext& session_context,
180183 if (std::regex_search (exception_str, matches, error_code_pattern)) {
181184 error_code = matches[0 ];
182185 }
183- throw std::runtime_error (error_message + " , " + error_code + " \n Model needs to be recompiled\n " );
186+ std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
187+ " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " :
188+ " \n Model needs to be recompiled\n " ;
189+ throw std::runtime_error (error_message + " , " + error_code + suffix);
184190#endif
185191 } else {
186192 ORT_THROW (exception_str);
@@ -628,8 +634,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
628634 // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
629635#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
630636 constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32 ;
631- const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
632- external_initializers_offset_and_length.size () > 1 &&
637+ const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
638+ external_initializers_offset_and_length.size () > 1 &&
633639 extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
634640#else
635641 const bool include_initializer_data_in_proto = true ;
@@ -639,7 +645,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
639645 auto model = subgraph.CreateModel (logger);
640646 auto model_proto = model->ToProto ();
641647 model_proto->set_ir_version (ONNX_NAMESPACE::Version::IR_VERSION);
642- subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true ,
648+ subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true ,
643649 /* include_outer_scope_args*/ true , /* execution_order*/ 0 , /* include_initializer_data*/ include_initializer_data_in_proto);
644650
645651 print_model_proto_duration ();
@@ -878,7 +884,25 @@ void BackendManager::Compute(OrtKernelContext* context) {
878884 ORT_THROW (msg);
879885 }
880886 } else {
881- ORT_THROW (ex.what ());
887+ std::string exception_str = ex.what ();
888+ if (session_context_.so_disable_cpu_ep_fallback ){
889+ std::string error_message = " UNKNOWN NPU ERROR" ;
890+ std::string error_code = " code 0x0" ;
891+ std::regex error_message_pattern (R"( \bZE_\w*\b)" );
892+ std::regex error_code_pattern (" code 0x[0-9a-fA-F]+" );
893+ std::smatch matches;
894+ if (std::regex_search (exception_str, matches, error_message_pattern)) {
895+ error_message = matches[0 ];
896+ }
897+ if (std::regex_search (exception_str, matches, error_code_pattern)) {
898+ error_code = matches[0 ];
899+ }
900+ std::string suffix = " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " ;
901+ throw std::runtime_error (error_message + " , " + error_code + suffix);
902+ }
903+ else {
904+ ORT_THROW (exception_str);
905+ }
882906 }
883907#endif
884908 }
0 commit comments