-
Notifications
You must be signed in to change notification settings - Fork 23
Add ROCm support #1576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add ROCm support #1576
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EnzymeJAX Benchmarks
| Benchmark suite | Current: f46684e | Previous: b55ffd4 | Ratio |
|---|---|---|---|
scatter_sum / JaX / cpu / Primal |
0.0000043525060871616 s |
0.000004338932399696205 s |
1.00 |
scatter_sum / JaXPipe / cpu / Primal |
0.000004293832019902766 s |
0.000004298453299998073 s |
1.00 |
scatter_sum / JaX / tpu / Primal |
0.0001354218465974 s |
0.0001558048111997 s |
0.87 |
scatter_sum / JaXPipe / tpu / Primal |
0.0001336760675068 s |
0.0001522772955002 s |
0.88 |
This comment was automatically generated by workflow using github-action-benchmark.
| } | ||
| }; | ||
|
|
||
| struct GPUShuffleOpToROCDL : public ConvertOpToLLVMPattern<gpu::ShuffleOp> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can simply be a dup of this
| } // namespace mlir | ||
|
|
||
| // https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.0/reference/hardware_features.html | ||
| struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| } else if (gpuTarget == "rocm") { | ||
| using namespace mlir::gpu::index_lowering; | ||
| PatternBenefit benefit(1); | ||
| PatternBenefit highBenefit(2); | ||
| patterns.add<gpu::index_lowering::OpLowering< | ||
| gpu::ThreadIdOp, ROCDL::ThreadIdXOp, ROCDL::ThreadIdYOp, | ||
| ROCDL::ThreadIdZOp>>(typeConverter, IndexKind::Block, IntrType::Id, | ||
| benefit); | ||
| patterns.add<gpu::index_lowering::OpLowering< | ||
| gpu::BlockDimOp, ROCDL::BlockDimXOp, ROCDL::BlockDimYOp, | ||
| ROCDL::BlockDimZOp>>(typeConverter, IndexKind::Block, IntrType::Dim, | ||
| benefit); | ||
| patterns.add<gpu::index_lowering::OpLowering< | ||
| gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, | ||
| ROCDL::BlockIdZOp>>(typeConverter, IndexKind::Grid, IntrType::Id, | ||
| benefit); | ||
| patterns.add<gpu::index_lowering::OpLowering< | ||
| gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, | ||
| ROCDL::GridDimZOp>>(typeConverter, IndexKind::Grid, IntrType::Dim, | ||
| benefit); | ||
|
|
||
| patterns.add<GPULaneIdOpToROCDL>(typeConverter, benefit); | ||
| patterns.add<GPUShuffleOpToROCDL>(typeConverter, benefit); | ||
| patterns.add<GPUBarrierToROCDL>(typeConverter, benefit); | ||
|
|
||
| populateMathToLLVMConversionPatterns(typeConverter, patterns); | ||
| populateMathToROCDLConversionPatterns(typeConverter, patterns, | ||
| std::nullopt); | ||
|
|
||
| patterns.add<ClusterIdOpToROCDL>(typeConverter, highBenefit); | ||
| patterns.add<ClusterDimOpToROCDL>(typeConverter, highBenefit); | ||
| patterns.add<ClusterBlockIdToBlockIdLowering>(&typeConverter.getContext(), | ||
| highBenefit); | ||
| patterns.add<ClusterDimBlocksToGridDimLowering>( | ||
| &typeConverter.getContext(), highBenefit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| } else if (gpuTarget == "rocm") { | |
| using namespace mlir::gpu::index_lowering; | |
| PatternBenefit benefit(1); | |
| PatternBenefit highBenefit(2); | |
| patterns.add<gpu::index_lowering::OpLowering< | |
| gpu::ThreadIdOp, ROCDL::ThreadIdXOp, ROCDL::ThreadIdYOp, | |
| ROCDL::ThreadIdZOp>>(typeConverter, IndexKind::Block, IntrType::Id, | |
| benefit); | |
| patterns.add<gpu::index_lowering::OpLowering< | |
| gpu::BlockDimOp, ROCDL::BlockDimXOp, ROCDL::BlockDimYOp, | |
| ROCDL::BlockDimZOp>>(typeConverter, IndexKind::Block, IntrType::Dim, | |
| benefit); | |
| patterns.add<gpu::index_lowering::OpLowering< | |
| gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, | |
| ROCDL::BlockIdZOp>>(typeConverter, IndexKind::Grid, IntrType::Id, | |
| benefit); | |
| patterns.add<gpu::index_lowering::OpLowering< | |
| gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, | |
| ROCDL::GridDimZOp>>(typeConverter, IndexKind::Grid, IntrType::Dim, | |
| benefit); | |
| patterns.add<GPULaneIdOpToROCDL>(typeConverter, benefit); | |
| patterns.add<GPUShuffleOpToROCDL>(typeConverter, benefit); | |
| patterns.add<GPUBarrierToROCDL>(typeConverter, benefit); | |
| populateMathToLLVMConversionPatterns(typeConverter, patterns); | |
| populateMathToROCDLConversionPatterns(typeConverter, patterns, | |
| std::nullopt); | |
| patterns.add<ClusterIdOpToROCDL>(typeConverter, highBenefit); | |
| patterns.add<ClusterDimOpToROCDL>(typeConverter, highBenefit); | |
| patterns.add<ClusterBlockIdToBlockIdLowering>(&typeConverter.getContext(), | |
| highBenefit); | |
| patterns.add<ClusterDimBlocksToGridDimLowering>( | |
| &typeConverter.getContext(), highBenefit); | |
| } else if (gpuTarget == "rocm") { | |
| mlir::populateGpuToROCDLConversionPatterns(typeConverter, patterns, mlir::gpu::amd::Runtime::HIP, amdgpu::Chipset()); |
I think we can figure out how to get the actual chipset details later.
I have not added lit tests yet