Skip to content

Commit f5d3e35

Browse files
Fridge00300INDEX
authored andcommitted
Add bfloat16 tuned fused moe config for Dpsk-MTP layer on B200 (sgl-project#13455)
1 parent 050338c commit f5d3e35

File tree

2 files changed

+164
-9
lines changed

2 files changed

+164
-9
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 128,
5+
"BLOCK_SIZE_K": 128,
6+
"GROUP_SIZE_M": 32,
7+
"num_warps": 4,
8+
"num_stages": 4
9+
},
10+
"2": {
11+
"BLOCK_SIZE_M": 16,
12+
"BLOCK_SIZE_N": 64,
13+
"BLOCK_SIZE_K": 64,
14+
"GROUP_SIZE_M": 1,
15+
"num_warps": 4,
16+
"num_stages": 5
17+
},
18+
"4": {
19+
"BLOCK_SIZE_M": 16,
20+
"BLOCK_SIZE_N": 64,
21+
"BLOCK_SIZE_K": 128,
22+
"GROUP_SIZE_M": 64,
23+
"num_warps": 4,
24+
"num_stages": 3
25+
},
26+
"8": {
27+
"BLOCK_SIZE_M": 16,
28+
"BLOCK_SIZE_N": 64,
29+
"BLOCK_SIZE_K": 128,
30+
"GROUP_SIZE_M": 1,
31+
"num_warps": 4,
32+
"num_stages": 2
33+
},
34+
"16": {
35+
"BLOCK_SIZE_M": 16,
36+
"BLOCK_SIZE_N": 128,
37+
"BLOCK_SIZE_K": 128,
38+
"GROUP_SIZE_M": 1,
39+
"num_warps": 8,
40+
"num_stages": 3
41+
},
42+
"24": {
43+
"BLOCK_SIZE_M": 16,
44+
"BLOCK_SIZE_N": 64,
45+
"BLOCK_SIZE_K": 128,
46+
"GROUP_SIZE_M": 1,
47+
"num_warps": 4,
48+
"num_stages": 3
49+
},
50+
"32": {
51+
"BLOCK_SIZE_M": 16,
52+
"BLOCK_SIZE_N": 128,
53+
"BLOCK_SIZE_K": 128,
54+
"GROUP_SIZE_M": 1,
55+
"num_warps": 8,
56+
"num_stages": 3
57+
},
58+
"48": {
59+
"BLOCK_SIZE_M": 16,
60+
"BLOCK_SIZE_N": 128,
61+
"BLOCK_SIZE_K": 128,
62+
"GROUP_SIZE_M": 1,
63+
"num_warps": 8,
64+
"num_stages": 3
65+
},
66+
"64": {
67+
"BLOCK_SIZE_M": 16,
68+
"BLOCK_SIZE_N": 64,
69+
"BLOCK_SIZE_K": 128,
70+
"GROUP_SIZE_M": 32,
71+
"num_warps": 4,
72+
"num_stages": 3
73+
},
74+
"96": {
75+
"BLOCK_SIZE_M": 16,
76+
"BLOCK_SIZE_N": 128,
77+
"BLOCK_SIZE_K": 128,
78+
"GROUP_SIZE_M": 32,
79+
"num_warps": 4,
80+
"num_stages": 3
81+
},
82+
"128": {
83+
"BLOCK_SIZE_M": 16,
84+
"BLOCK_SIZE_N": 128,
85+
"BLOCK_SIZE_K": 128,
86+
"GROUP_SIZE_M": 1,
87+
"num_warps": 8,
88+
"num_stages": 3
89+
},
90+
"256": {
91+
"BLOCK_SIZE_M": 16,
92+
"BLOCK_SIZE_N": 128,
93+
"BLOCK_SIZE_K": 128,
94+
"GROUP_SIZE_M": 1,
95+
"num_warps": 8,
96+
"num_stages": 3
97+
},
98+
"512": {
99+
"BLOCK_SIZE_M": 32,
100+
"BLOCK_SIZE_N": 64,
101+
"BLOCK_SIZE_K": 128,
102+
"GROUP_SIZE_M": 32,
103+
"num_warps": 4,
104+
"num_stages": 3
105+
},
106+
"1024": {
107+
"BLOCK_SIZE_M": 64,
108+
"BLOCK_SIZE_N": 128,
109+
"BLOCK_SIZE_K": 64,
110+
"GROUP_SIZE_M": 1,
111+
"num_warps": 4,
112+
"num_stages": 3
113+
},
114+
"1536": {
115+
"BLOCK_SIZE_M": 64,
116+
"BLOCK_SIZE_N": 128,
117+
"BLOCK_SIZE_K": 64,
118+
"GROUP_SIZE_M": 16,
119+
"num_warps": 4,
120+
"num_stages": 3
121+
},
122+
"2048": {
123+
"BLOCK_SIZE_M": 128,
124+
"BLOCK_SIZE_N": 256,
125+
"BLOCK_SIZE_K": 64,
126+
"GROUP_SIZE_M": 1,
127+
"num_warps": 8,
128+
"num_stages": 4
129+
},
130+
"3072": {
131+
"BLOCK_SIZE_M": 128,
132+
"BLOCK_SIZE_N": 256,
133+
"BLOCK_SIZE_K": 64,
134+
"GROUP_SIZE_M": 1,
135+
"num_warps": 8,
136+
"num_stages": 4
137+
},
138+
"4096": {
139+
"BLOCK_SIZE_M": 128,
140+
"BLOCK_SIZE_N": 256,
141+
"BLOCK_SIZE_K": 64,
142+
"GROUP_SIZE_M": 1,
143+
"num_warps": 8,
144+
"num_stages": 4
145+
}
146+
}

python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,24 @@ def get_moe_configs(
114114
# If a configuration has been found, return it
115115
return {int(key): val for key, val in json.load(f).items()}
116116

117-
# If no optimized configuration is available, we will use the default
118-
# configuration
119-
logger.warning(
120-
(
121-
"Using default MoE kernel config. Performance might be sub-optimal! "
122-
"Config file not found at %s, you can create them with https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton"
123-
),
124-
config_file_path,
125-
)
117+
# If no optimized configuration is available, we will use the default configuration when down_moe is False
118+
# When down_moe is True, we will try to use the config for down_moe=False
119+
if down_moe:
120+
logger.warning(
121+
(
122+
"Using MoE kernel config with down_moe=False. Performance might be sub-optimal! "
123+
"Config file not found at %s, you can create them with https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton"
124+
),
125+
config_file_path,
126+
)
127+
else:
128+
logger.warning(
129+
(
130+
"Using default MoE kernel config. Performance might be sub-optimal! "
131+
"Config file not found at %s, you can create them with https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton"
132+
),
133+
config_file_path,
134+
)
126135
return None
127136

128137

0 commit comments

Comments
 (0)