Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
978bfca
Fix CPU EP Tile 0D overvalidation (#25821)
fdwr Oct 2, 2025
8af9f58
Fix Local Attention off by 1 bug (#25927)
aciddelgado Oct 2, 2025
4cb138a
Add CMake mlas_private_compile_definitions variable for internal MLAS…
edgchen1 Oct 3, 2025
be655f6
Add skip_pip_install to build.py (#25982)
qti-hungjuiw Oct 3, 2025
591d554
Bump gradle/actions from 4 to 5 (#26244)
dependabot[bot] Oct 6, 2025
4a6c0e5
[js/node] enable handling EP options for WebGPU EP (#26099)
fs-eire Oct 7, 2025
5ed340f
Move macOS build jobs to a dedicated pool (#26252)
snnn Oct 7, 2025
74c1afd
Update check_emulator_running_using_avd_name function (#26251)
snnn Oct 7, 2025
ffe1693
Ignore deprecation warnings when building TRT/NV EP (#26250)
kevinch-nv Oct 7, 2025
11b23ad
[CUDA] replace 90a-virtual by 90-virtual for forward compatible (#26230)
tianleiwu Oct 7, 2025
def8a93
Fix minimal build (make it build with the recent changes) (#26222)
yuslepukhin Oct 7, 2025
f8c92c2
Add check for ARM64 SME to MlasDynamicQGemmBatch() unit tests. (#26253)
edgchen1 Oct 8, 2025
a0cc084
Add basic detection of whether a GPU is discrete on Linux (#26155)
edgchen1 Oct 8, 2025
bb7c295
Update the version number in the main branch to 1.24 (#26256)
snnn Oct 8, 2025
a60c307
upgrade torch version for component governance (#26220)
vraspar Oct 8, 2025
535f1f3
[webgpu] Split large inputs into smaller buffers to bypass maxStorage…
xiaofeihan1 Oct 9, 2025
0453cbf
[VitisAI] Fix OrtShapeInferContext for optional inputs (#26199)
zpye Oct 9, 2025
81ff69b
Allow all single-element tensors as value for ConstantOfShape (#26227)
justinchuby Oct 9, 2025
1d0ee39
[js/webgpu] enable handling EP options for WebGPU EP (#26093)
fs-eire Oct 9, 2025
e94153e
[WebGPU] bug fix related to bounds checking in conv kernel (#26268)
prathikr Oct 9, 2025
9a5e182
Merge branch 'master' into sync_msft_10102025
Jaswanth51 Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/gradle-wrapper-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Ubuntu2204-AMD-CPU"]
steps:
- uses: actions/checkout@v5
- uses: gradle/actions/wrapper-validation@v4
- uses: gradle/actions/wrapper-validation@v5
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.23.0
1.24.0
2 changes: 1 addition & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
option(onnxruntime_USE_LEAN_ATTENTION "Build lean attention kernel for scaled dot product attention" OFF)
cmake_dependent_option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
cmake_dependent_option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" ON "onnxruntime_USE_CUDA" OFF)
option(onnxruntime_USE_FPA_INTB_GEMM "Build FpA IntB gemm cuda kernels" OFF)

option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
option(onnxruntime_USE_AVX "Use AVX instructions" OFF)
Expand Down
24 changes: 15 additions & 9 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ set(MLAS_ROOT ${ONNXRUNTIME_ROOT}/core/mlas)
set(MLAS_SRC_DIR ${MLAS_ROOT}/lib)
set(MLAS_INC_DIR ${MLAS_ROOT}/inc)

# mlas_private_compile_definitions contains compile definitions that are private to onnxruntime_mlas and targets which
# use internal MLAS headers like mlasi.h.
set(mlas_private_compile_definitions)
#
# All hardware agnostic source files here
# hardware specific files would cause trouble in
Expand Down Expand Up @@ -133,9 +136,9 @@ function(setup_mlas_source_for_windows)
)

if (onnxruntime_USE_ARM_NEON_NCHWC)
setup_arm_neon_nchwc()
setup_arm_neon_nchwc()
endif()

if (onnxruntime_USE_KLEIDIAI)
setup_kleidiai()
endif()
Expand Down Expand Up @@ -293,11 +296,12 @@ endfunction()

function (setup_arm_neon_nchwc)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/sconv.h
${MLAS_SRC_DIR}/sconv.h
${MLAS_SRC_DIR}/sconv_kernel_neon.cpp
${MLAS_SRC_DIR}/spool_kernel_neon.cpp
)
target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_ARM_NEON_NCHWC)
list(APPEND mlas_private_compile_definitions MLAS_USE_ARM_NEON_NCHWC)
set(mlas_private_compile_definitions ${mlas_private_compile_definitions} PARENT_SCOPE)
endfunction ()

if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
Expand Down Expand Up @@ -445,25 +449,25 @@ else()
${MLAS_SRC_DIR}/eltwise_kernel_neon.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
)

# Conditionally add the SVE implementation if compiler supports it
if (onnxruntime_USE_SVE)
list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/mlasi_sve.h)
list(APPEND mlas_platform_srcs ${MLAS_SRC_DIR}/sve/elementwise_sve.cpp)
set_source_files_properties(${MLAS_SRC_DIR}/sve/elementwise_sve.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+sve+fp16 ")
target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_USE_SVE)
list(APPEND mlas_private_compile_definitions MLAS_USE_SVE)
endif()

if (onnxruntime_USE_ARM_NEON_NCHWC)
setup_arm_neon_nchwc()
setup_arm_neon_nchwc()
endif()

if (onnxruntime_USE_KLEIDIAI)
setup_kleidiai()
endif()
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")

if (NOT APPLE)
Expand Down Expand Up @@ -806,6 +810,8 @@ foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
target_include_directories(${mlas_target} PRIVATE ${MLAS_INC_DIR} ${MLAS_SRC_DIR})
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})

target_compile_definitions(${mlas_target} PRIVATE ${mlas_private_compile_definitions})

set_target_properties(${mlas_target} PROPERTIES FOLDER "ONNXRuntime")
endforeach()

Expand Down
8 changes: 5 additions & 3 deletions cmake/onnxruntime_test_pch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
target_precompile_headers(onnxruntime_test_all PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
)
target_precompile_headers(onnxruntime_provider_test PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
)
if (TARGET onnxruntime_provider_test)
target_precompile_headers(onnxruntime_provider_test PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/test_pch.h"
)
endif()
endif()

# Exclude certain files that might conflict with PCH
Expand Down
17 changes: 8 additions & 9 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,11 @@ block()
LIBS ${onnxruntime_provider_test_libs}
DEPENDS ${onnxruntime_provider_test_deps}
)
if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
# simply ignore them for TensorRT EP build
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
endif()

# enable dynamic plugin EP usage
target_compile_definitions(onnxruntime_provider_test PRIVATE ORT_UNIT_TEST_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)
Expand Down Expand Up @@ -1325,9 +1330,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
${BENCHMARK_DIR}/layer_normalization.cc)
target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc)
target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
if (onnxruntime_USE_SVE)
target_compile_definitions(onnxruntime_benchmark PRIVATE MLAS_USE_SVE)
endif()
target_compile_definitions(onnxruntime_benchmark PRIVATE ${mlas_private_compile_definitions})
if(WIN32)
target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4141>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
Expand Down Expand Up @@ -1355,9 +1358,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc)
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS})
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
if (onnxruntime_USE_SVE)
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE MLAS_USE_SVE)
endif()
target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE ${mlas_private_compile_definitions})
if(WIN32)
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp)
# Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
Expand Down Expand Up @@ -1655,9 +1656,7 @@ endif()
XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
)
endif()
if (onnxruntime_USE_SVE)
target_compile_definitions(onnxruntime_mlas_test PRIVATE MLAS_USE_SVE)
endif()
target_compile_definitions(onnxruntime_mlas_test PRIVATE ${mlas_private_compile_definitions})
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}
${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
Expand Down
5 changes: 5 additions & 0 deletions docs/python/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://ak
Changes
-------

1.24.0
^^^^^^

Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.24.0

1.23.0
^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion docs/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ onnx
sphinx_exec_code
sphinx_tabs
furo
torch
torch >= 2.6.0
1 change: 1 addition & 0 deletions js/.nvmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
v24.9.0
16 changes: 16 additions & 0 deletions js/common/lib/inference-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,23 @@ export declare namespace InferenceSession {
}
export interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
readonly name: 'webgpu';

/**
* Specify the preferred layout when running layout sensitive operators.
*
* @default 'NCHW'
*/
preferredLayout?: 'NCHW' | 'NHWC';

/**
* Specify a list of node names that should be executed on CPU even when WebGPU EP is used.
*/
forceCpuNodeNames?: readonly string[];

/**
* Specify an optional WebGPU device to be used by the WebGPU execution provider.
*/
device?: TryGetGlobalType<'GPUDevice'>;
}

// #region WebNN options
Expand Down
2 changes: 1 addition & 1 deletion js/common/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.23.0';
export const version = '1.24.0';
4 changes: 2 additions & 2 deletions js/common/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion js/common/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"license": "MIT",
"type": "module",
"name": "onnxruntime-common",
"version": "1.23.0",
"version": "1.24.0",
"repository": {
"url": "https://github.com/Microsoft/onnxruntime.git",
"type": "git"
Expand Down
2 changes: 1 addition & 1 deletion js/node/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.23.0';
export const version = '1.24.0';
6 changes: 3 additions & 3 deletions js/node/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion js/node/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
6
]
},
"version": "1.23.0",
"version": "1.24.0",
"dependencies": {
"adm-zip": "^0.5.16",
"global-agent": "^3.0.0",
Expand Down
2 changes: 1 addition & 1 deletion js/node/script/install-metadata-versions.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

module.exports = { nuget: [{ feed: 'nuget', version: '1.23.0' }] };
module.exports = { nuget: [{ feed: 'nuget', version: '1.24.0' }] };
35 changes: 30 additions & 5 deletions js/node/src/session_options_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,37 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions& sess
for (const auto& nameIter : obj.GetPropertyNames()) {
Napi::Value nameVar = nameIter.second;
std::string name = nameVar.As<Napi::String>().Utf8Value();
if (name != "name") {
Napi::Value valueVar = obj.Get(nameVar);
ORT_NAPI_THROW_TYPEERROR_IF(!valueVar.IsString(), epList.Env(), "Invalid argument: sessionOptions.executionProviders must be a string or an object with property 'name'.");
std::string value = valueVar.As<Napi::String>().Utf8Value();
webgpu_options[name] = value;
Napi::Value valueVar = obj.Get(nameVar);
std::string value;
if (name == "preferredLayout" ||
name == "validationMode" ||
name == "storageBufferCacheMode" ||
name == "uniformBufferCacheMode" ||
name == "queryResolveBufferCacheMode" ||
name == "defaultBufferCacheMode") {
ORT_NAPI_THROW_TYPEERROR_IF(!valueVar.IsString(), epList.Env(),
"Invalid argument: \"", name, "\" must be a string.");
value = valueVar.As<Napi::String>().Utf8Value();
} else if (name == "forceCpuNodeNames") {
ORT_NAPI_THROW_TYPEERROR_IF(!valueVar.IsArray(), epList.Env(),
"Invalid argument: \"forceCpuNodeNames\" must be a string array.");
auto arr = valueVar.As<Napi::Array>();
for (uint32_t i = 0; i < arr.Length(); i++) {
Napi::Value v = arr[i];
ORT_NAPI_THROW_TYPEERROR_IF(!v.IsString(), epList.Env(),
"Invalid argument: elements of \"forceCpuNodeNames\" must be strings.");
if (i > 0) {
value += '\n';
}
value += v.As<Napi::String>().Utf8Value();
}
} else {
// unrecognized option
ORT_NAPI_THROW_TYPEERROR_IF(name != "name", epList.Env(),
"Invalid argument: WebGPU EP has an unrecognized option: '", name, "'.");
continue;
}
webgpu_options[name] = value;
}
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion js/react_native/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.23.0';
export const version = '1.24.0';
6 changes: 3 additions & 3 deletions js/react_native/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion js/react_native/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"registry": "https://registry.npmjs.org/"
},
"source": "lib/index",
"version": "1.23.0",
"version": "1.24.0",
"main": "dist/commonjs/index",
"homepage": "https://github.com/microsoft/onnxruntime/blob/main/js/react_native/README.md",
"files": [
Expand Down
2 changes: 1 addition & 1 deletion js/web/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.23.0';
export const version = '1.24.0';
Loading
Loading