intel
diff --git a/‎.github/workflows/gradle-wrapper-validation.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/gradle-wrapper-validation.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎VERSION_NUMBER‎
Lines changed: 1 addition & 1 deletion b/‎VERSION_NUMBER‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎cmake/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/onnxruntime_mlas.cmake‎
Lines changed: 15 additions & 9 deletions b/‎cmake/onnxruntime_mlas.cmake‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎cmake/onnxruntime_test_pch.cmake‎
Lines changed: 5 additions & 3 deletions b/‎cmake/onnxruntime_test_pch.cmake‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎cmake/onnxruntime_unittests.cmake‎
Lines changed: 8 additions & 9 deletions b/‎cmake/onnxruntime_unittests.cmake‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎docs/python/README.rst‎
Lines changed: 5 additions & 0 deletions b/‎docs/python/README.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/python/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎docs/python/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎js/.nvmrc‎
Lines changed: 1 addition & 0 deletions b/‎js/.nvmrc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎js/common/lib/inference-session.ts‎
Lines changed: 16 additions & 0 deletions b/‎js/common/lib/inference-session.ts‎
Lines changed: 16 additions & 0 deletions
@@ -16,7 +16,7 @@ jobs:
     runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Ubuntu2204-AMD-CPU"]
     steps:
       - uses: actions/checkout@v5
-      - uses: gradle/actions/wrapper-validation@v4
+      - uses: gradle/actions/wrapper-validation@v5
 concurrency:
   group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
   cancel-in-progress: true
@@ -1 +1 @@
-1.23.0
+1.24.0
@@ -101,7 +101,7 @@ option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
 cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
 option(onnxruntime_USE_LEAN_ATTENTION "Build lean attention kernel for scaled dot product attention" OFF)
 cmake_dependent_option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
-cmake_dependent_option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" ON "onnxruntime_USE_CUDA" OFF)
+option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" OFF)
 
 option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
 option(onnxruntime_USE_AVX "Use AVX instructions" OFF)
 
@@ -5,6 +5,9 @@ set(MLAS_ROOT ${ONNXRUNTIME_ROOT}/core/mlas)
 set(MLAS_SRC_DIR ${MLAS_ROOT}/lib)
 set(MLAS_INC_DIR ${MLAS_ROOT}/inc)
 
+# mlas_private_compile_definitions contains compile definitions that are private to onnxruntime_mlas and targets which
+# use internal MLAS headers like mlasi.h.
+set(mlas_private_compile_definitions)
 #
 # All hardware agnostic source files here
 # hardware specific files would cause trouble in
@@ -133,9 +136,9 @@ function(setup_mlas_source_for_windows)
       )
 
       if (onnxruntime_USE_ARM_NEON_NCHWC)
-		setup_arm_neon_nchwc()	
+		setup_arm_neon_nchwc()
 	  endif()
-      
+
 	  if (onnxruntime_USE_KLEIDIAI)
         setup_kleidiai()
       endif()
@@ -293,11 +296,12 @@ endfunction()
 
 function (setup_arm_neon_nchwc)
   target_sources(onnxruntime_mlas PRIVATE
-   ${MLAS_SRC_DIR}/sconv.h  
+   ${MLAS_SRC_DIR}/sconv.h
    ${MLAS_SRC_DIR}/sconv_kernel_neon.cpp
    ${MLAS_SRC_DIR}/spool_kernel_neon.cpp
   )
-  target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_ARM_NEON_NCHWC)
+  list(APPEND mlas_private_compile_definitions MLAS_USE_ARM_NEON_NCHWC)
+  set(mlas_private_compile_definitions ${mlas_private_compile_definitions} PARENT_SCOPE)
 endfunction ()
 
 if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
@@ -445,25 +449,25 @@ else()
           ${MLAS_SRC_DIR}/eltwise_kernel_neon.cpp
           ${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
         )
-        
+
         # Conditionally add the SVE implementation if compiler supports it
         if (onnxruntime_USE_SVE)
           list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/mlasi_sve.h)
           list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/elementwise_sve.cpp)
           set_source_files_properties(${MLAS_SRC_DIR}/sve/elementwise_sve.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+sve+fp16 ")
-          target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_SVE)
+          list(APPEND mlas_private_compile_definitions MLAS_USE_SVE)
         endif()
 
         if (onnxruntime_USE_ARM_NEON_NCHWC)
-		  setup_arm_neon_nchwc()	
+		  setup_arm_neon_nchwc()
 		endif()
-        
+
 		if (onnxruntime_USE_KLEIDIAI)
           setup_kleidiai()
         endif()
         set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp
                                     PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
-        set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp 
+        set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
 				    PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")
 
         if (NOT APPLE)
@@ -806,6 +810,8 @@ foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
     target_include_directories(${mlas_target} PRIVATE ${MLAS_INC_DIR} ${MLAS_SRC_DIR})
     onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
 
+    target_compile_definitions(${mlas_target} PRIVATE ${mlas_private_compile_definitions})
+
     set_target_properties(${mlas_target} PROPERTIES FOLDER "ONNXRuntime")
 endforeach()
 
 
@@ -5,9 +5,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
   target_precompile_headers(onnxruntime_test_all PRIVATE
     "${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
   )
-  target_precompile_headers(onnxruntime_provider_test PRIVATE
-    "${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
-  )
+  if (TARGET onnxruntime_provider_test)
+    target_precompile_headers(onnxruntime_provider_test PRIVATE
+      "${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
+    )
+  endif()
 endif()
 
 # Exclude certain files that might conflict with PCH
 
@@ -1228,6 +1228,11 @@ block()
     LIBS ${onnxruntime_provider_test_libs}
     DEPENDS ${onnxruntime_provider_test_deps}
   )
+  if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
+    # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+    # simply ignore them for TensorRT EP build
+    set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+  endif()
 
   # enable dynamic plugin EP usage
   target_compile_definitions(onnxruntime_provider_test PRIVATE ORT_UNIT_TEST_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)
@@ -1325,9 +1330,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       ${BENCHMARK_DIR}/layer_normalization.cc)
     target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc)
     target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
-    if (onnxruntime_USE_SVE)
-      target_compile_definitions(onnxruntime_benchmark PRIVATE MLAS_USE_SVE)
-    endif()
+    target_compile_definitions(onnxruntime_benchmark PRIVATE ${mlas_private_compile_definitions})
     if(WIN32)
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4141>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
@@ -1355,9 +1358,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc)
     target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS})
     target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
-    if (onnxruntime_USE_SVE)
-      target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE MLAS_USE_SVE)
-    endif()
+    target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE ${mlas_private_compile_definitions})
     if(WIN32)
       target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp)
       # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
@@ -1655,9 +1656,7 @@ endif()
         XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
       )
     endif()
-    if (onnxruntime_USE_SVE)
-      target_compile_definitions(onnxruntime_mlas_test PRIVATE MLAS_USE_SVE)
-    endif()
+    target_compile_definitions(onnxruntime_mlas_test PRIVATE ${mlas_private_compile_definitions})
     target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}
             ${CMAKE_CURRENT_BINARY_DIR})
     target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
 
@@ -8,6 +8,11 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://ak
 Changes
 -------
 
+1.24.0
+^^^^^^
+
+Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.24.0
+
 1.23.0
 ^^^^^^
 
 
@@ -21,4 +21,4 @@ onnx
 sphinx_exec_code
 sphinx_tabs
 furo
-torch
+torch >= 2.6.0
@@ -0,0 +1 @@
+v24.9.0
@@ -245,7 +245,23 @@ export declare namespace InferenceSession {
   }
   export interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'webgpu';
+
+    /**
+     * Specify the preferred layout when running layout sensitive operators.
+     *
+     * @default 'NCHW'
+     */
     preferredLayout?: 'NCHW' | 'NHWC';
+
+    /**
+     * Specify a list of node names that should be executed on CPU even when WebGPU EP is used.
+     */
+    forceCpuNodeNames?: readonly string[];
+
+    /**
+     * Specify an optional WebGPU device to be used by the WebGPU execution provider.
+     */
+    device?: TryGetGlobalType<'GPUDevice'>;
   }
 
   // #region WebNN options
-Original file line number
+Diff line change
 Changes
 -------
 +1.24.0
 +^^^^^^
++
 +Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.24.0
++
 .23.0
 ^^^^^^