diff --git a/calibration/calibrate_model.sh b/calibration/calibrate_model.sh index 5acca0149..55b1f5e34 100755 --- a/calibration/calibrate_model.sh +++ b/calibration/calibrate_model.sh @@ -43,7 +43,7 @@ create_measure_config() { if [[ $model_name_lower =~ ^mixtral ]]; then tmp_config="{\"method\": \"HOOKS\",\"mode\": \"MEASURE\",\"observer\": \"maxabs\",\"allowlist\": {\"types\": [], \"names\": []},\"blocklist\": {\"types\": [], \"names\": [\"self_attn\", \"lm_head\"]},\"quantize_weight\": false,\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" - elif [[ $model_name_lower =~ ^deepseek ]]; then + elif [[ $model_name_lower =~ ^deepseek && ! $model_name_lower =~ distill ]]; then tmp_config="{\"method\": \"HOOKS\",\"mode\": \"MEASURE\",\"observer\": \"maxabs\",\"allowlist\": {\"types\": [], \"names\": []},\"blocklist\": {\"types\": [], \"names\": [\"lm_head\", \"mlp\\\.gate\\\b\"]},\"quantize_weight\": false,\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" else tmp_config="{\"method\": \"HOOKS\",\"mode\": \"MEASURE\",\"observer\": \"maxabs\",\"allowlist\": {\"types\": [], \"names\": []},\"blocklist\": {\"types\": [], \"names\": []},\"quantize_weight\": false,\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" @@ -63,7 +63,7 @@ create_quant_config() { else tmp_config="{\"mode\": \"QUANTIZE\",\"observer\": \"maxabs\",\"scale_method\": \"maxabs_hw\",\"allowlist\": {\"types\": [],\"names\": []},\"blocklist\": {\"types\": [],\"names\": [\"self_attn\", \"lm_head\"]},\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" fi - elif [[ $model_name_lower =~ ^deepseek ]]; then + elif [[ $model_name_lower =~ ^deepseek && ! $model_name_lower =~ distill ]]; then tmp_config="{\"mode\": \"QUANTIZE\",\"observer\": \"maxabs\",\"scale_method\": \"maxabs_hw\", \"scale_format\": \"scalar\", \"allowlist\": {\"types\": [],\"names\": []},\"blocklist\": {\"types\": [],\"names\": [\"lm_head\", \"mlp\\\.gate\\\b\"]},\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" else tmp_config="{\"mode\": \"QUANTIZE\",\"observer\": \"maxabs\",\"scale_method\": \"maxabs_hw\",\"allowlist\": {\"types\": [],\"names\": []},\"blocklist\": {\"types\": [],\"names\": []},\"dump_stats_path\": \"$1/$2/$3/inc_output\"}" @@ -202,7 +202,7 @@ else fi -if [[ "$model_name_lower" == *"deepseek"* ]]; then +if [[ "$model_name_lower" == *"deepseek"* && "$model_name_lower" != *distill* ]]; then EXTRA_FLAGS_STEP_2+="--block-quant --expert-parallel " EXTRA_ENVS_STEP_2="VLLM_HPU_FORCE_CHANNEL_FP8=0" EXTRA_FLAGS_STEP_3+="--deepseek "