Skip to content

Commit 5d0bfd1

Browse files
authored
[MLIR][SCFToGPU] Guard operands before AffineApplyOp::create to avoid crash (#167959)
This fixes a crash in SCF→GPU when building the per‑dim index for mapped scf.parallel. **Change**: - Map step/lb through cloningMap, then run ensureLaunchIndependent. - If either is still unavailable at launch scope, emit a match‑failure; otherwise build the affine.apply. **Why this is correct:** - Matches how the pass already handles launch bounds; avoids creating an op with invalid operands and replaces a segfault with a clear diagnostic. **Tests**: - Added two small regressions that lower to gpu.launch and exercise the affine.apply path. Fixes : #167654 Signed-off-by: Shashi Shankar <[email protected]>
1 parent 4bb4ad4 commit 5d0bfd1

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,10 +453,24 @@ static LogicalResult processParallelLoop(
453453
1, 2,
454454
rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) +
455455
rewriter.getAffineSymbolExpr(1));
456+
// Map through cloningMap first so we use values valid at the launch
457+
// scope, then ensure they are launch-independent (or cloned constants).
458+
Value mappedStep = cloningMap.lookupOrDefault(step);
459+
Value mappedLowerBound = cloningMap.lookupOrDefault(lowerBound);
460+
461+
mappedStep = ensureLaunchIndependent(mappedStep);
462+
mappedLowerBound = ensureLaunchIndependent(mappedLowerBound);
463+
464+
// If either cannot be made available above the launch, fail gracefully.
465+
if (!mappedStep || !mappedLowerBound) {
466+
return rewriter.notifyMatchFailure(
467+
parallelOp, "lower bound / step must be constant or defined above "
468+
"the gpu.launch");
469+
}
470+
456471
newIndex = AffineApplyOp::create(
457472
rewriter, loc, annotation.getMap().compose(lowerAndStep),
458-
ValueRange{operand, ensureLaunchIndependent(step),
459-
ensureLaunchIndependent(lowerBound)});
473+
ValueRange{operand, mappedStep, mappedLowerBound});
460474
// If there was also a bound, insert that, too.
461475
// TODO: Check that we do not assign bounds twice.
462476
if (annotation.getBound()) {

mlir/test/Conversion/SCFToGPU/parallel_loop.mlir

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,3 +673,51 @@ func.func @nested_parallel_with_side_effect() {
673673

674674
// CHECK: gpu.launch
675675
// CHECK-NOT: scf.parallel
676+
677+
// -----
678+
679+
func.func @scf2gpu_index_creation_2d() {
680+
%c0 = arith.constant 0 : index
681+
%c1 = arith.constant 1 : index
682+
%c32 = arith.constant 32 : index
683+
684+
// Single 2-D scf.parallel mapped to block_x and thread_x.
685+
// Use both IVs so the conversion must compute indices.
686+
scf.parallel (%bx, %tx) = (%c0, %c0) to (%c32, %c32) step (%c1, %c1) {
687+
%u = arith.addi %bx, %c0 : index
688+
%v = arith.addi %tx, %c0 : index
689+
} {
690+
mapping = [
691+
#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
692+
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
693+
]
694+
}
695+
return
696+
}
697+
698+
// CHECK-LABEL: func @scf2gpu_index_creation_2d
699+
// CHECK: gpu.launch
700+
// CHECK: %[[IDX:.*]] = affine.apply
701+
// CHECK: arith.addi %[[IDX]],
702+
703+
// -----
704+
705+
func.func @scf2gpu_index_creation_1d() {
706+
%c0 = arith.constant 0 : index
707+
%c1 = arith.constant 1 : index
708+
%c64 = arith.constant 64 : index
709+
710+
scf.parallel (%t) = (%c0) to (%c64) step (%c1) {
711+
%w = arith.addi %t, %c0 : index
712+
} {
713+
mapping = [
714+
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
715+
]
716+
}
717+
return
718+
}
719+
720+
// CHECK-LABEL: func @scf2gpu_index_creation_1d
721+
// CHECK: gpu.launch
722+
// CHECK: %[[IDX:.*]] = affine.apply
723+
// CHECK: arith.addi %[[IDX]],

0 commit comments

Comments
 (0)