Skip to content

Commit 2652479

Browse files
authored
Merge pull request #826 from intel/sync_msft_10102025
Sync with Microsoft ONNX Runtime - 10/10/2025
2 parents d102554 + 9a5e182 commit 2652479

File tree

96 files changed

+1067
-669
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+1067
-669
lines changed

.github/workflows/gradle-wrapper-validation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Ubuntu2204-AMD-CPU"]
1717
steps:
1818
- uses: actions/checkout@v5
19-
- uses: gradle/actions/wrapper-validation@v4
19+
- uses: gradle/actions/wrapper-validation@v5
2020
concurrency:
2121
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
2222
cancel-in-progress: true

VERSION_NUMBER

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.23.0
1+
1.24.0

cmake/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
101101
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
102102
option(onnxruntime_USE_LEAN_ATTENTION "Build lean attention kernel for scaled dot product attention" OFF)
103103
cmake_dependent_option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
104-
cmake_dependent_option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" ON "onnxruntime_USE_CUDA" OFF)
104+
option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" OFF)
105105

106106
option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
107107
option(onnxruntime_USE_AVX "Use AVX instructions" OFF)

cmake/onnxruntime_mlas.cmake

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ set(MLAS_ROOT ${ONNXRUNTIME_ROOT}/core/mlas)
55
set(MLAS_SRC_DIR ${MLAS_ROOT}/lib)
66
set(MLAS_INC_DIR ${MLAS_ROOT}/inc)
77

8+
# mlas_private_compile_definitions contains compile definitions that are private to onnxruntime_mlas and targets which
9+
# use internal MLAS headers like mlasi.h.
10+
set(mlas_private_compile_definitions)
811
#
912
# All hardware agnostic source files here
1013
# hardware specific files would cause trouble in
@@ -133,9 +136,9 @@ function(setup_mlas_source_for_windows)
133136
)
134137

135138
if (onnxruntime_USE_ARM_NEON_NCHWC)
136-
setup_arm_neon_nchwc()
139+
setup_arm_neon_nchwc()
137140
endif()
138-
141+
139142
if (onnxruntime_USE_KLEIDIAI)
140143
setup_kleidiai()
141144
endif()
@@ -293,11 +296,12 @@ endfunction()
293296

294297
function (setup_arm_neon_nchwc)
295298
target_sources(onnxruntime_mlas PRIVATE
296-
${MLAS_SRC_DIR}/sconv.h
299+
${MLAS_SRC_DIR}/sconv.h
297300
${MLAS_SRC_DIR}/sconv_kernel_neon.cpp
298301
${MLAS_SRC_DIR}/spool_kernel_neon.cpp
299302
)
300-
target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_ARM_NEON_NCHWC)
303+
list(APPEND mlas_private_compile_definitions MLAS_USE_ARM_NEON_NCHWC)
304+
set(mlas_private_compile_definitions ${mlas_private_compile_definitions} PARENT_SCOPE)
301305
endfunction ()
302306

303307
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
@@ -445,25 +449,25 @@ else()
445449
${MLAS_SRC_DIR}/eltwise_kernel_neon.cpp
446450
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
447451
)
448-
452+
449453
# Conditionally add the SVE implementation if compiler supports it
450454
if (onnxruntime_USE_SVE)
451455
list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/mlasi_sve.h)
452456
list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/elementwise_sve.cpp)
453457
set_source_files_properties(${MLAS_SRC_DIR}/sve/elementwise_sve.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+sve+fp16 ")
454-
target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_SVE)
458+
list(APPEND mlas_private_compile_definitions MLAS_USE_SVE)
455459
endif()
456460

457461
if (onnxruntime_USE_ARM_NEON_NCHWC)
458-
setup_arm_neon_nchwc()
462+
setup_arm_neon_nchwc()
459463
endif()
460-
464+
461465
if (onnxruntime_USE_KLEIDIAI)
462466
setup_kleidiai()
463467
endif()
464468
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp
465469
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
466-
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
470+
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
467471
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")
468472

469473
if (NOT APPLE)
@@ -806,6 +810,8 @@ foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
806810
target_include_directories(${mlas_target} PRIVATE ${MLAS_INC_DIR} ${MLAS_SRC_DIR})
807811
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
808812

813+
target_compile_definitions(${mlas_target} PRIVATE ${mlas_private_compile_definitions})
814+
809815
set_target_properties(${mlas_target} PROPERTIES FOLDER "ONNXRuntime")
810816
endforeach()
811817

cmake/onnxruntime_test_pch.cmake

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
55
target_precompile_headers(onnxruntime_test_all PRIVATE
66
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
77
)
8-
target_precompile_headers(onnxruntime_provider_test PRIVATE
9-
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
10-
)
8+
if (TARGET onnxruntime_provider_test)
9+
target_precompile_headers(onnxruntime_provider_test PRIVATE
10+
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
11+
)
12+
endif()
1113
endif()
1214

1315
# Exclude certain files that might conflict with PCH

cmake/onnxruntime_unittests.cmake

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,11 @@ block()
12281228
LIBS ${onnxruntime_provider_test_libs}
12291229
DEPENDS ${onnxruntime_provider_test_deps}
12301230
)
1231+
if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
1232+
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
1233+
# simply ignore them for TensorRT EP build
1234+
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
1235+
endif()
12311236

12321237
# enable dynamic plugin EP usage
12331238
target_compile_definitions(onnxruntime_provider_test PRIVATE ORT_UNIT_TEST_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)
@@ -1325,9 +1330,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
13251330
${BENCHMARK_DIR}/layer_normalization.cc)
13261331
target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc)
13271332
target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
1328-
if (onnxruntime_USE_SVE)
1329-
target_compile_definitions(onnxruntime_benchmark PRIVATE MLAS_USE_SVE)
1330-
endif()
1333+
target_compile_definitions(onnxruntime_benchmark PRIVATE ${mlas_private_compile_definitions})
13311334
if(WIN32)
13321335
target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4141>"
13331336
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
@@ -1355,9 +1358,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
13551358
target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc)
13561359
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS})
13571360
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
1358-
if (onnxruntime_USE_SVE)
1359-
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE MLAS_USE_SVE)
1360-
endif()
1361+
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE ${mlas_private_compile_definitions})
13611362
if(WIN32)
13621363
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp)
13631364
# Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
@@ -1655,9 +1656,7 @@ endif()
16551656
XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
16561657
)
16571658
endif()
1658-
if (onnxruntime_USE_SVE)
1659-
target_compile_definitions(onnxruntime_mlas_test PRIVATE MLAS_USE_SVE)
1660-
endif()
1659+
target_compile_definitions(onnxruntime_mlas_test PRIVATE ${mlas_private_compile_definitions})
16611660
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}
16621661
${CMAKE_CURRENT_BINARY_DIR})
16631662
target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)

docs/python/README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://ak
88
Changes
99
-------
1010

11+
1.24.0
12+
^^^^^^
13+
14+
Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.24.0
15+
1116
1.23.0
1217
^^^^^^
1318

docs/python/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ onnx
2121
sphinx_exec_code
2222
sphinx_tabs
2323
furo
24-
torch
24+
torch >= 2.6.0

js/.nvmrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v24.9.0

js/common/lib/inference-session.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,23 @@ export declare namespace InferenceSession {
245245
}
246246
export interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
247247
readonly name: 'webgpu';
248+
249+
/**
250+
* Specify the preferred layout when running layout sensitive operators.
251+
*
252+
* @default 'NCHW'
253+
*/
248254
preferredLayout?: 'NCHW' | 'NHWC';
255+
256+
/**
257+
* Specify a list of node names that should be executed on CPU even when WebGPU EP is used.
258+
*/
259+
forceCpuNodeNames?: readonly string[];
260+
261+
/**
262+
* Specify an optional WebGPU device to be used by the WebGPU execution provider.
263+
*/
264+
device?: TryGetGlobalType<'GPUDevice'>;
249265
}
250266

251267
// #region WebNN options

0 commit comments

Comments
 (0)