Skip to content

Commit 9194c4f

Browse files
committed
add channel_wised_quantize args to MatMulNBitsQuantizer
1 parent 02e5d04 commit 9194c4f

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

onnxruntime/python/tools/quantization/matmul_nbits_quantizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,7 @@ def __init__(
11891189
quant_format=QuantFormat.QOperator,
11901190
op_types_to_quantize: tuple[str, ...] | None = None,
11911191
quant_axes: tuple[tuple[str, int], ...] | None = None,
1192+
channel_wised_quantize: bool = False,
11921193
algo_config: WeightOnlyQuantConfig | None = None,
11931194
):
11941195
if nodes_to_exclude is None:
@@ -1211,6 +1212,7 @@ def __init__(
12111212
op_types_to_quantize=op_types_to_quantize,
12121213
quant_axes=quant_axes,
12131214
bits=4, # default to 4 bits
1215+
channel_wised_quantize=channel_wised_quantize,
12141216
)
12151217

12161218
self.algo_config = algo_config

0 commit comments

Comments
 (0)