Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4d3d6e4
Apply alignment attr for make.buffer.rsrc
Shoreshen Nov 7, 2025
505426b
fix format error
Shoreshen Nov 10, 2025
4245ca5
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 10, 2025
07415ef
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 10, 2025
841bbe5
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 10, 2025
7be66a0
Addr 0 should have max alignment
Shoreshen Nov 10, 2025
5efe07b
Move to amd specific attribute
Shoreshen Nov 11, 2025
9d04645
fix failed test
Shoreshen Nov 11, 2025
5e3479c
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 12, 2025
0da0b42
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 14, 2025
533ca2f
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 14, 2025
7f29a7a
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 17, 2025
ff6c8d7
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 20, 2025
a5a8b66
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 24, 2025
989017e
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Nov 27, 2025
a8792cc
moving to general intrinsic handling in attribute
Shoreshen Nov 27, 2025
19d5915
fix test case
Shoreshen Nov 27, 2025
015e701
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 1, 2025
4aa04b1
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 2, 2025
09d4a9b
Update llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Shoreshen Dec 2, 2025
5af566a
fix format
Shoreshen Dec 2, 2025
677876b
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 4, 2025
96f5892
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 4, 2025
68c8e42
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 5, 2025
b4bdc67
Add comments
Shoreshen Dec 5, 2025
d17d04c
Update llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Shoreshen Dec 5, 2025
7431be9
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 8, 2025
d72fa3a
Merge branch 'main' into add-align-for-make.buffer-intrinsic-v2
Shoreshen Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
&AAAMDGPUClusterDims::ID});
&AAAMDGPUClusterDims::ID, &AAAlign::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
Expand Down Expand Up @@ -1661,6 +1661,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
if (Ptr) {
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
}
}
}
}
Expand Down
21 changes: 20 additions & 1 deletion llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5220,6 +5220,13 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
return AlignAA->getKnownAlign().value();
break;
}
case Intrinsic::amdgcn_make_buffer_rsrc: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment here regarding the handling of target specific handling and some future direction that we might take.

const auto *AlignAA = A.getAAFor<AAAlign>(
QueryingAA, IRPosition::value(*II), DepClassTy::NONE);
if (AlignAA)
return AlignAA->getKnownAlign().value();
break;
}
default:
break;
}
Expand Down Expand Up @@ -5543,7 +5550,7 @@ struct AAAlignCallSiteReturned final
const auto *AlignAA =
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
if (AlignAA && AlignAA->isValidState()) {
if (AlignAA) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about this change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @shiltian , if I'm not wrong the AAAlign would be invalid if assumed alignment is 1:

  /// See AbstractState::isValidState()
  /// NOTE: For now we simply pretend that the worst possible state is invalid.
  bool isValidState() const override { return Assumed != getWorstState(); }

In this case, if mask is not valid, then assumed alignment of ptrmask should also be 1...

Alignment = std::max(AlignAA->getAssumedAlign(), Alignment);
Valid = true;
}
Expand All @@ -5554,6 +5561,18 @@ struct AAAlignCallSiteReturned final
std::min(this->getAssumedAlign(), Alignment).value());
break;
}
// FIXME: Should introduce target specific sub-attributes and letting
// getAAfor<AAAlign> lead to create sub-attribute to handle target
// specific intrinsics.
case Intrinsic::amdgcn_make_buffer_rsrc: {
const auto *AlignAA =
A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
if (AlignAA)
return clampStateAndIndicateChange<StateType>(
this->getState(), AlignAA->getAssumedAlign().value());
break;
}
default:
break;
}
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s

define float @align_back_prop(ptr addrspace(1) align 4 %x) {
; CHECK-LABEL: define float @align_back_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: ret float [[Y]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 8
ret float %y
}

define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
; CHECK-LABEL: define float @align_foward_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: ret float [[Y]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 4
ret float %y
}

define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
; CHECK-LABEL: define float @align_mix_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
; CHECK-NEXT: ret float [[Z]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 2
%z = load float, ptr addrspace(1) %x, align 8
ret float %z
}
4 changes: 2 additions & 2 deletions llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() {

define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
Expand All @@ -320,7 +320,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {

define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
Expand Down
Loading