@@ -410,7 +410,7 @@ def __init__(
410410 q_lora_rank : int ,
411411 kv_lora_rank : int ,
412412 rope_theta : float = 10000 ,
413- rope_scaling : dict [str , Any ] | None = None ,
413+ rope_parameters : dict [str , Any ] | None = None ,
414414 max_position_embeddings : int = 8192 ,
415415 cache_config : CacheConfig | None = None ,
416416 quant_config : QuantizationConfig | None = None ,
@@ -485,21 +485,21 @@ def __init__(
485485 quant_config = quant_config ,
486486 prefix = f"{ prefix } .o_proj" ,
487487 )
488- if rope_scaling :
489- rope_scaling ["rope_type" ] = "deepseek_yarn"
488+ if rope_parameters :
489+ rope_parameters ["rope_type" ] = "deepseek_yarn"
490490
491491 self .rotary_emb = get_rope (
492492 qk_rope_head_dim ,
493493 rotary_dim = qk_rope_head_dim ,
494494 max_position = max_position_embeddings ,
495495 base = rope_theta ,
496- rope_scaling = rope_scaling ,
496+ rope_parameters = rope_parameters ,
497497 is_neox_style = False ,
498498 )
499499
500- if rope_scaling :
501- mscale_all_dim = rope_scaling .get ("mscale_all_dim" , False )
502- scaling_factor = rope_scaling ["factor" ]
500+ if rope_parameters :
501+ mscale_all_dim = rope_parameters .get ("mscale_all_dim" , False )
502+ scaling_factor = rope_parameters ["factor" ]
503503 mscale = yarn_get_mscale (scaling_factor , float (mscale_all_dim ))
504504 self .scaling = self .scaling * mscale * mscale
505505
@@ -904,7 +904,7 @@ def __init__(
904904 q_lora_rank : int | None ,
905905 kv_lora_rank : int ,
906906 rope_theta : float = 10000 ,
907- rope_scaling : dict [str , Any ] | None = None ,
907+ rope_parameters : dict [str , Any ] | None = None ,
908908 max_position_embeddings : int = 8192 ,
909909 cache_config : CacheConfig | None = None ,
910910 quant_config : QuantizationConfig | None = None ,
@@ -981,19 +981,19 @@ def __init__(
981981 prefix = f"{ prefix } .o_proj" ,
982982 )
983983
984- if rope_scaling :
985- rope_scaling ["rope_type" ] = "deepseek_yarn"
984+ if rope_parameters :
985+ rope_parameters ["rope_type" ] = "deepseek_yarn"
986986 self .rotary_emb = get_rope (
987987 qk_rope_head_dim ,
988988 rotary_dim = qk_rope_head_dim ,
989989 max_position = max_position_embeddings ,
990990 base = rope_theta ,
991- rope_scaling = rope_scaling ,
991+ rope_parameters = rope_parameters ,
992992 is_neox_style = False ,
993993 )
994- if rope_scaling :
995- mscale_all_dim = rope_scaling .get ("mscale_all_dim" , False )
996- scaling_factor = rope_scaling ["factor" ]
994+ if rope_parameters :
995+ mscale_all_dim = rope_parameters .get ("mscale_all_dim" , False )
996+ scaling_factor = rope_parameters ["factor" ]
997997 mscale = yarn_get_mscale (scaling_factor , float (mscale_all_dim ))
998998 self .scaling = self .scaling * mscale * mscale
999999
@@ -1073,8 +1073,6 @@ def __init__(
10731073 parallel_config = vllm_config .parallel_config
10741074
10751075 self .hidden_size = config .hidden_size
1076- rope_theta = getattr (config , "rope_theta" , 10000 )
1077- rope_scaling = getattr (config , "rope_scaling" , None )
10781076 max_position_embeddings = getattr (config , "max_position_embeddings" , 8192 )
10791077 moe_layer_freq = getattr (config , "moe_layer_freq" , 1 )
10801078 # DecoderLayers are created with `make_layers` which passes the prefix
@@ -1107,8 +1105,8 @@ def __init__(
11071105 v_head_dim = v_head_dim ,
11081106 q_lora_rank = config .q_lora_rank if hasattr (config , "q_lora_rank" ) else None ,
11091107 kv_lora_rank = kv_lora_rank ,
1110- rope_theta = rope_theta ,
1111- rope_scaling = rope_scaling ,
1108+ rope_theta = config . rope_parameters [ " rope_theta" ] ,
1109+ rope_parameters = config . rope_parameters ,
11121110 max_position_embeddings = max_position_embeddings ,
11131111 cache_config = cache_config ,
11141112 quant_config = quant_config ,
0 commit comments