Skip to content

Commit 761fcb0

Browse files
anmyachevetiotto
andauthored
Fix performance regression (#5559) (#5567)
Fixes issue #5553 and #5518 --------- (cherry picked from commit 96b9f83) Signed-off-by: Ettore Tiotto <[email protected]> Co-authored-by: Ettore Tiotto <[email protected]>
1 parent f3892f0 commit 761fcb0

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

python/test/unit/language/test_matmul.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,10 +1224,8 @@ def test_mxfp8_mxfp4_matmul(M, N, K, BLOCK_M, BLOCK_N, BLOCK_K, NUM_STAGES, B_TR
12241224
pytest.xfail("None scale has not been tested on XPU backend")
12251225
if not (A_DATA_TYPE == "float8e5" and B_DATA_TYPE == "float4"):
12261226
pytest.xfail(f"(A: {A_DATA_TYPE}, B: {B_DATA_TYPE}) has not been tested on XPU backend")
1227-
if (BLOCK_M, BLOCK_N,
1228-
BLOCK_K) == (128, 256,
1229-
256) and CONST_SCALE and triton.runtime.driver.active.utils.get_device_properties(
1230-
triton.runtime.driver.active.get_current_device())["max_shared_mem"] < 196608:
1227+
if ((BLOCK_M, BLOCK_N, BLOCK_K) == (128, 256, 256) and triton.runtime.driver.active.utils.get_device_properties(
1228+
triton.runtime.driver.active.get_current_device())["max_shared_mem"] < 196608):
12311229
pytest.xfail("XPU: Not enough shared memory")
12321230
if not PACK_B_ALONG_K and B_DATA_TYPE != "float4":
12331231
pytest.xfail("Pack along K can only be False for float4")

third_party/intel/lib/TritonIntelGPUTransforms/RemoveLayoutConversions.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1646,7 +1646,11 @@ void LayoutRematerialization::hoistConvertDotOperand() {
16461646
void LayoutRematerialization::hoistConvertDotOperand(
16471647
ConvertLayoutOp convertOp) {
16481648
auto targetType = convertOp.getType();
1649-
// The pass is targeted to MMA dot operands
1649+
1650+
// The pass is targeted to NVidia.
1651+
auto dotEnc = dyn_cast<DotOperandEncodingAttr>(targetType.getEncoding());
1652+
if (!(dotEnc && isa<NvidiaMmaEncodingAttr>(dotEnc.getParent())))
1653+
return;
16501654

16511655
auto canBePipelined = [&](ConvertLayoutOp convertOp) {
16521656
// FIXME: Check that the parent is a for loop

0 commit comments

Comments
 (0)