[MLIR][TORCH] Undo indices conversion to i32 for TMTensor ops (#4292)

vivekkhandelwal1 · web-flow · commit bc1dae9dde4b · 2025-08-13T19:43:40.000+05:30
Signed-off-by: Vivek Khandelwal &lt;vivekkhandelwal1424@gmail.com&gt;
Co-authored-by: Kunwar Grover
diff --git a/lib/Conversion/TorchToTMTensor/TorchToTMTensor.cpp b/lib/Conversion/TorchToTMTensor/TorchToTMTensor.cpp
@@ -89,6 +89,7 @@ convertTorchScatterIndexAndSrcToTMScatterIndexAndSrc(PatternRewriter &rewriter,
   // Store location for insertions
   Location loc = src.getLoc();
 
+  Type indicesElemType = getElementTypeOrSelf(indices);
   Value indexSize = getTensorSize(rewriter, loc, indices);
   indexSize = castIntToIndex(rewriter, loc, indexSize);
   SmallVector<Value> indexShape = getTensorSizes(rewriter, loc, indices);
@@ -97,7 +98,7 @@ convertTorchScatterIndexAndSrcToTMScatterIndexAndSrc(PatternRewriter &rewriter,
   // We flatten the `src` values from (i, j, k, ...) -> (i * j * k * ...)
   SmallVector<Value> indSliceShape({indexSize, cstOne});
   Value indSlice =
-      createZeroInitTensor(rewriter, loc, indSliceShape, rewriter.getI32Type());
+      createZeroInitTensor(rewriter, loc, indSliceShape, indicesElemType);
 
   // New output shape will be equal to the product of the dimensions of the
   // updates
@@ -142,13 +143,13 @@ convertTorchScatterIndexAndSrcToTMScatterIndexAndSrc(PatternRewriter &rewriter,
                 SmallVector<Value> yieldVals;
                 for (Value v : indexValues) {
                   Value scalar = castIndexToInt64(b, loc, v);
-                  yieldVals.push_back(b.create<arith::TruncIOp>(
-                      loc, rewriter.getI32Type(), scalar));
+                  yieldVals.push_back(convertScalarToDtype(
+                      rewriter, loc, scalar, indicesElemType));
                 }
                 // Replace the original index with the index specified
                 // by the scatter.
                 yieldVals[dim] = convertScalarToDtype(
-                    rewriter, loc, extractIndexValue, rewriter.getI32Type());
+                    rewriter, loc, extractIndexValue, indicesElemType);
                 yieldVals.push_back(extractSrcValue);
                 b.create<linalg::YieldOp>(loc, yieldVals);
               })
@@ -177,7 +178,7 @@ convertTorchScatterIndexAndSrcToTMScatterIndexAndSrc(PatternRewriter &rewriter,
       rewriter.create<arith::ConstantIndexOp>(loc, indexType.getRank());
   Value flattenedIndices = createZeroInitTensor(
       rewriter, loc, SmallVector<Value>({indexSize, indicesRank}),
-      rewriter.getI32Type());
+      indexType.getElementType());
   SmallVector<Value> scatterInputsVector(flattenedUpdates);
   for (auto const slice : ArrayRef(scatterInputsVector).drop_back()) {
     SmallVector<Value> sizes = getTensorSizes(rewriter, loc, slice);
@@ -540,8 +541,7 @@ class ConvertAtenBincountOp : public OpConversionPattern<AtenBincountOp> {
 
     // Creating a tm_tensor.scatter op with the following mapping:
     // 1.) `input` tensor maps to the indices in scatter op. `input` is
-    // expanded from 1-d to 2-d, and its element type is set to i32 as required
-    // for the scatter op.
+    // expanded from 1-d to 2-d.
     // 2.) `updates` is a 1-d dummy tensor with the size equivalent to the
     // `input`.
     // 3.) `bincount` a 1-d tensor maps to the original in scatter op
@@ -556,12 +556,10 @@ class ConvertAtenBincountOp : public OpConversionPattern<AtenBincountOp> {
     Value expandedInputTensor = rewriter.create<AtenUnsqueezeOp>(
         loc, expandInputType, torchTypeInput, torchCstOne);
 
-    // Converting the input element type to i32.
-    Value indices = convertTensorToDtype(
-        rewriter, loc, expandedInputTensor,
-        mlir::IntegerType::get(context, 32, mlir::IntegerType::Signed));
-    indices = typeConverter->materializeTargetConversion(
-        rewriter, loc, typeConverter->convertType(indices.getType()), indices);
+    Value indices = typeConverter->materializeTargetConversion(
+        rewriter, loc,
+        typeConverter->convertType(expandedInputTensor.getType()),
+        expandedInputTensor);
 
     auto resultType = cast<RankedTensorType>(
         typeConverter->convertType(op->getResult(0).getType()));
@@ -1039,7 +1037,6 @@ class ConvertAtenMaxPool2dWithIndicesBackwardOp
       return failure();
 
     Location loc = op.getLoc();
-    MLIRContext *context = op->getContext();
     Value gradOutput = adaptor.getGradOutput();
     Value input = adaptor.getSelf();
     RankedTensorType gradOutputType =
@@ -1049,12 +1046,7 @@ class ConvertAtenMaxPool2dWithIndicesBackwardOp
     Type inputElemType = inputType.getElementType();
     int64_t tensorOperandRank = inputType.getRank();
 
-    // `TMTensor::ScatterOp` expects indices of element type i32.
-    Value indices = convertTensorToDtype(
-        rewriter, loc, op.getIndices(),
-        mlir::IntegerType::get(context, 32, mlir::IntegerType::Signed));
-    indices = typeConverter->materializeTargetConversion(
-        rewriter, loc, typeConverter->convertType(indices.getType()), indices);
+    Value indices = adaptor.getIndices();
     RankedTensorType indicesType = cast<RankedTensorType>(indices.getType());
     Type indicesElemType = indicesType.getElementType();
 
diff --git a/test/Conversion/TorchToTMTensor/basic.mlir b/test/Conversion/TorchToTMTensor/basic.mlir
@@ -0,0 +1,28 @@
+// RUN: torch-mlir-opt <%s -convert-torch-to-tmtensor -split-input-file -verify-diagnostics | FileCheck %s
+
+// -----
+
+// CHECK-LABEL: @scatter_src_i64_index
+// CHECK: tm_tensor.scatter {dimension_map = array<i64: 0, 1, 2>} unique_indices(false) ins(%{{.*}}, %{{.*}} : tensor<?xf32>, tensor<?x3xi64>) outs(%{{.*}} : tensor<10x8x6xf32>) {
+// CHECK:      ^bb0(%arg3: f32, %arg4: f32):
+// CHECK:        tm_tensor.yield %arg3 : f32
+// CHECK:      } -> tensor<10x8x6xf32>
+func.func @scatter_src_i64_index(%arg0: !torch.vtensor<[10,8,6],f32>, %arg1: !torch.vtensor<[2,4,3],si64>, %arg2: !torch.vtensor<[5,8,6],f32>) -> !torch.vtensor<[10,8,6],f32> {
+  %int0 = torch.constant.int 0
+  %0 = torch.aten.scatter.src %arg0, %int0, %arg1, %arg2 : !torch.vtensor<[10,8,6],f32>, !torch.int, !torch.vtensor<[2,4,3],si64>, !torch.vtensor<[5,8,6],f32> -> !torch.vtensor<[10,8,6],f32>
+  return %0 : !torch.vtensor<[10,8,6],f32>
+}
+
+
+// -----
+
+// CHECK-LABEL: @scatter_src_i32_index
+// CHECK: tm_tensor.scatter {dimension_map = array<i64: 0, 1, 2>} unique_indices(false) ins(%{{.*}}, %{{.*}} : tensor<?xf32>, tensor<?x3xi32>) outs(%{{.*}} : tensor<10x8x6xf32>) {
+// CHECK:      ^bb0(%arg3: f32, %arg4: f32):
+// CHECK:        tm_tensor.yield %arg3 : f32
+// CHECK:      } -> tensor<10x8x6xf32>
+func.func @scatter_src_i32_index(%arg0: !torch.vtensor<[10,8,6],f32>, %arg1: !torch.vtensor<[2,4,3],si32>, %arg2: !torch.vtensor<[5,8,6],f32>) -> !torch.vtensor<[10,8,6],f32> {
+  %int0 = torch.constant.int 0
+  %0 = torch.aten.scatter.src %arg0, %int0, %arg1, %arg2 : !torch.vtensor<[10,8,6],f32>, !torch.int, !torch.vtensor<[2,4,3],si32>, !torch.vtensor<[5,8,6],f32> -> !torch.vtensor<[10,8,6],f32>
+  return %0 : !torch.vtensor<[10,8,6],f32>
+}