@@ -298,6 +298,7 @@ def __init__(
298298
299299 # Model-specific adjustment
300300 self .model_specific_adjustment ()
301+ self .check_quantized_moe_compatibility ()
301302
302303 # Set the global server_args in the scheduler process
303304 set_global_server_args_for_scheduler (server_args )
@@ -570,34 +571,34 @@ def model_specific_adjustment(self):
570571 if not server_args .disable_chunked_prefix_cache :
571572 log_info_on_rank0 (logger , "Chunked prefix cache is turned on." )
572573
573- if self . model_config . hf_config . model_type == "qwen3_vl_moe" :
574- if (
575- quantization_config := getattr (
576- self .model_config .hf_config , "quantization_config" , None
577- )
578- ) is not None and "weight_block_size" in quantization_config :
579- weight_block_size_n = quantization_config ["weight_block_size" ][0 ]
574+ def check_quantized_moe_compatibility ( self ) :
575+ if (
576+ quantization_config := getattr (
577+ self .model_config .hf_config , "quantization_config" , None
578+ )
579+ ) is not None and "weight_block_size" in quantization_config :
580+ weight_block_size_n = quantization_config ["weight_block_size" ][0 ]
580581
581- if self .tp_size % self .moe_ep_size != 0 :
582- raise ValueError (
583- f"tp_size { self .tp_size } must be divisible by moe_ep_size { self .moe_ep_size } "
584- )
585- moe_tp_size = self .tp_size // self .moe_ep_size
582+ if self .tp_size % self .moe_ep_size != 0 :
583+ raise ValueError (
584+ f"tp_size { self .tp_size } must be divisible by ep_size { self .moe_ep_size } "
585+ )
586+ moe_tp_size = self .tp_size // self .moe_ep_size
586587
587- moe_intermediate_size = (
588- self .model_config .hf_text_config .moe_intermediate_size
588+ moe_intermediate_size = (
589+ self .model_config .hf_text_config .moe_intermediate_size
590+ )
591+ if moe_intermediate_size % moe_tp_size != 0 :
592+ raise ValueError (
593+ f"moe_intermediate_size { moe_intermediate_size } must be divisible by moe_tp_size ({ moe_tp_size } ) which is tp_size ({ self .tp_size } ) divided by moe_ep_size ({ self .moe_ep_size } )."
589594 )
590- if moe_intermediate_size % moe_tp_size != 0 :
591- raise ValueError (
592- f"moe_intermediate_size { moe_intermediate_size } must be divisible by moe_tp_size ({ moe_tp_size } ) which is tp_size ({ self .tp_size } ) divided by moe_ep_size ({ self .moe_ep_size } )."
593- )
594595
595- if (moe_intermediate_size // moe_tp_size ) % weight_block_size_n != 0 :
596- raise ValueError (
597- f"For qwen3-vl-fp8 models, please make sure ({ moe_intermediate_size = } / { moe_tp_size = } ) % { weight_block_size_n = } == 0 "
598- f"where moe_tp_size is equal to tp_size ({ self .tp_size } ) divided by moe_ep_size ({ self .moe_ep_size } ). "
599- f"You can fix this by setting arguments `--tp-size ` and `--ep-size ` correctly."
600- )
596+ if (moe_intermediate_size // moe_tp_size ) % weight_block_size_n != 0 :
597+ raise ValueError (
598+ f"For quantized MoE models, please make sure ({ moe_intermediate_size = } / { moe_tp_size = } ) % { weight_block_size_n = } == 0 "
599+ f"where moe_tp_size is equal to tp_size ({ self .tp_size } ) divided by ep_size ({ self .moe_ep_size } ). "
600+ f"You can fix this by setting arguments `--tp` and `--ep` correctly."
601+ )
601602
602603 def init_torch_distributed (self ):
603604 logger .info ("Init torch distributed begin." )
0 commit comments