Skip to content

Commit c2c5855

Browse files
JustinTong0323alisonshao
authored andcommitted
Expend compatibility check for all quantized MoE models (#13465)
Signed-off-by: Xinyuan Tong <[email protected]>
1 parent d8afd20 commit c2c5855

File tree

1 file changed

+25
-24
lines changed

1 file changed

+25
-24
lines changed

python/sglang/srt/model_executor/model_runner.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ def __init__(
298298

299299
# Model-specific adjustment
300300
self.model_specific_adjustment()
301+
self.check_quantized_moe_compatibility()
301302

302303
# Set the global server_args in the scheduler process
303304
set_global_server_args_for_scheduler(server_args)
@@ -570,34 +571,34 @@ def model_specific_adjustment(self):
570571
if not server_args.disable_chunked_prefix_cache:
571572
log_info_on_rank0(logger, "Chunked prefix cache is turned on.")
572573

573-
if self.model_config.hf_config.model_type == "qwen3_vl_moe":
574-
if (
575-
quantization_config := getattr(
576-
self.model_config.hf_config, "quantization_config", None
577-
)
578-
) is not None and "weight_block_size" in quantization_config:
579-
weight_block_size_n = quantization_config["weight_block_size"][0]
574+
def check_quantized_moe_compatibility(self):
575+
if (
576+
quantization_config := getattr(
577+
self.model_config.hf_config, "quantization_config", None
578+
)
579+
) is not None and "weight_block_size" in quantization_config:
580+
weight_block_size_n = quantization_config["weight_block_size"][0]
580581

581-
if self.tp_size % self.moe_ep_size != 0:
582-
raise ValueError(
583-
f"tp_size {self.tp_size} must be divisible by moe_ep_size {self.moe_ep_size}"
584-
)
585-
moe_tp_size = self.tp_size // self.moe_ep_size
582+
if self.tp_size % self.moe_ep_size != 0:
583+
raise ValueError(
584+
f"tp_size {self.tp_size} must be divisible by ep_size {self.moe_ep_size}"
585+
)
586+
moe_tp_size = self.tp_size // self.moe_ep_size
586587

587-
moe_intermediate_size = (
588-
self.model_config.hf_text_config.moe_intermediate_size
588+
moe_intermediate_size = (
589+
self.model_config.hf_text_config.moe_intermediate_size
590+
)
591+
if moe_intermediate_size % moe_tp_size != 0:
592+
raise ValueError(
593+
f"moe_intermediate_size {moe_intermediate_size} must be divisible by moe_tp_size ({moe_tp_size}) which is tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size})."
589594
)
590-
if moe_intermediate_size % moe_tp_size != 0:
591-
raise ValueError(
592-
f"moe_intermediate_size {moe_intermediate_size} must be divisible by moe_tp_size ({moe_tp_size}) which is tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size})."
593-
)
594595

595-
if (moe_intermediate_size // moe_tp_size) % weight_block_size_n != 0:
596-
raise ValueError(
597-
f"For qwen3-vl-fp8 models, please make sure ({moe_intermediate_size=} / {moe_tp_size=}) % {weight_block_size_n=} == 0 "
598-
f"where moe_tp_size is equal to tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size}). "
599-
f"You can fix this by setting arguments `--tp-size` and `--ep-size` correctly."
600-
)
596+
if (moe_intermediate_size // moe_tp_size) % weight_block_size_n != 0:
597+
raise ValueError(
598+
f"For quantized MoE models, please make sure ({moe_intermediate_size=} / {moe_tp_size=}) % {weight_block_size_n=} == 0 "
599+
f"where moe_tp_size is equal to tp_size ({self.tp_size}) divided by ep_size ({self.moe_ep_size}). "
600+
f"You can fix this by setting arguments `--tp` and `--ep` correctly."
601+
)
601602

602603
def init_torch_distributed(self):
603604
logger.info("Init torch distributed begin.")

0 commit comments

Comments
 (0)