vllm-project
diff --git a/‎examples/awq/llama_example.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/awq/llama_example.py‎
Lines changed: 3 additions & 1 deletion
@@ -50,7 +50,9 @@ def tokenize(sample):
 
 # Configure the quantization algorithm to run.
 recipe = [
-    AWQModifier(ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"]),
+    AWQModifier(
+        ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"], duo_scaling="both"
+    ),
 ]
 
 # Apply algorithms.
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,9 @@ def tokenize(sample):`
`50`	`50`
`51`	`51`	`# Configure the quantization algorithm to run.`
`52`	`52`	`recipe = [`
`53`		`- AWQModifier(ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"]),`
	`53`	`+ AWQModifier(`
	`54`	`+ ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"], duo_scaling="both"`
	`55`	`+ ),`
`54`	`56`	`]`
`55`	`57`
`56`	`58`	`# Apply algorithms.`