Skip to content

Commit cb37553

Browse files
committed
Address more review
1 parent 05d9db4 commit cb37553

File tree

3 files changed

+129
-100
lines changed

3 files changed

+129
-100
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8005,14 +8005,10 @@ bool VPRecipeBuilder::getScaledReductions(
80058005

80068006
ExtOpTypes[I] = ExtOp->getType();
80078007
ExtKinds[I] = TTI::getPartialReductionExtendKind(Exts[I]);
8008-
// Make sure that the outer extend is either sext or the same kind as the
8009-
// inner extend.
8010-
if (OuterExtKind.has_value()) {
8011-
TTI::PartialReductionExtendKind OuterKind = OuterExtKind.value();
8012-
if (OuterKind != TTI::PartialReductionExtendKind::PR_SignExtend &&
8013-
OuterKind != ExtKinds[I])
8014-
return false;
8015-
}
8008+
// The outer extend kind must be the same as the inner extends, so that
8009+
// they can be folded together.
8010+
if (OuterExtKind.has_value() && OuterExtKind.value() != ExtKinds[I])
8011+
return false;
80168012
}
80178013
return true;
80188014
};

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,8 +2376,8 @@ exit:
23762376
ret i64 %add
23772377
}
23782378

2379-
define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
2380-
; CHECK-INTERLEAVE1-LABEL: define i32 @dotp_sext_mul_zext(
2379+
define i32 @not_dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
2380+
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_sext_mul_zext(
23812381
; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
23822382
; CHECK-INTERLEAVE1-NEXT: entry:
23832383
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2388,29 +2388,30 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
23882388
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
23892389
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
23902390
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2391+
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2392+
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2393+
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
23912394
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
23922395
; CHECK-INTERLEAVE1: vector.body:
23932396
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2394-
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2395-
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2396-
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2397+
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2398+
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2399+
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
23972400
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2398-
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2399-
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2400-
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2401-
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2402-
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2401+
; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2402+
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2403+
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
24032404
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2404-
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2405-
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2405+
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2406+
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
24062407
; CHECK-INTERLEAVE1: middle.block:
2407-
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE]])
2408-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2408+
; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
2409+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7]], i32 7
24092410
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
24102411
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
24112412
; CHECK-INTERLEAVE1: scalar.ph:
24122413
;
2413-
; CHECK-INTERLEAVED-LABEL: define i32 @dotp_sext_mul_zext(
2414+
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_sext_mul_zext(
24142415
; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
24152416
; CHECK-INTERLEAVED-NEXT: entry:
24162417
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2421,32 +2422,33 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
24212422
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
24222423
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
24232424
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2425+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2426+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2427+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
24242428
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
24252429
; CHECK-INTERLEAVED: vector.body:
24262430
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2427-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2428-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ]
2429-
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2430-
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2431-
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> poison, <8 x i32> zeroinitializer
2432-
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2433-
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2434-
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2435-
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE2]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP3]])
2436-
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT4]] to <8 x i32>
2437-
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2431+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2432+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2433+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2434+
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
2435+
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT2]], <8 x i16> poison, <8 x i32> zeroinitializer
2436+
; CHECK-INTERLEAVED-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2437+
; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <8 x i32> [[VEC_PHI1]], [[TMP3]]
2438+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT3]] to <8 x i32>
2439+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sext <8 x i32> [[TMP7]] to <8 x i64>
24382440
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2439-
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2440-
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2441+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2442+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
24412443
; CHECK-INTERLEAVED: middle.block:
2442-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE2]], [[PARTIAL_REDUCE]]
2443-
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2444-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2444+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP6]], [[TMP5]]
2445+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]])
2446+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP8]], i32 7
24452447
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
24462448
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
24472449
; CHECK-INTERLEAVED: scalar.ph:
24482450
;
2449-
; CHECK-MAXBW-LABEL: define i32 @dotp_sext_mul_zext(
2451+
; CHECK-MAXBW-LABEL: define i32 @not_dotp_sext_mul_zext(
24502452
; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
24512453
; CHECK-MAXBW-NEXT: entry:
24522454
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2457,24 +2459,25 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
24572459
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
24582460
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
24592461
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2462+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2463+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2464+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
24602465
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
24612466
; CHECK-MAXBW: vector.body:
24622467
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2463-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2464-
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2465-
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2468+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2469+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2470+
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
24662471
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2467-
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2468-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2469-
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2470-
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2471-
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2472+
; CHECK-MAXBW-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2473+
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2474+
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
24722475
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2473-
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2474-
; CHECK-MAXBW-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2476+
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2477+
; CHECK-MAXBW-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
24752478
; CHECK-MAXBW: middle.block:
2476-
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE]])
2477-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2479+
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
2480+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7]], i32 7
24782481
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
24792482
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
24802483
; CHECK-MAXBW: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 78 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -147,59 +147,89 @@ exit:
147147
ret i32 %add
148148
}
149149

150-
define i32 @print_partial_reduction_ext_mul(i64 %n, ptr %a, i8 %b) {
151-
; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' {
152-
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
153-
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
154-
; CHECK-NEXT: vp<%2> = original trip-count
155-
; CHECK-EMPTY:
156-
; CHECK-NEXT: ir-bb<entry>:
157-
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (1 + %n)
158-
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
159-
; CHECK-EMPTY:
160-
; CHECK-NEXT: vector.ph:
161-
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
162-
; CHECK-NEXT: Successor(s): vector loop
163-
; CHECK-EMPTY:
164-
; CHECK-NEXT: <x1> vector loop: {
165-
; CHECK-NEXT: vector.body:
166-
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
167-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> (VF scaled by 1/4)
168-
; CHECK-NEXT: CLONE ir<%load> = load ir<%a>
169-
; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + partial.reduce.add (mul (ir<%b> zext to i32), (ir<%b> zext to i32))
170-
; CHECK-NEXT: WIDEN-CAST ir<%load.ext> = sext ir<%load> to i32
171-
; CHECK-NEXT: WIDEN-CAST ir<%load.ext.ext> = sext ir<%load.ext> to i64
172-
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
173-
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
174-
; CHECK-NEXT: No successors
175-
; CHECK-NEXT: }
176-
; CHECK-NEXT: Successor(s): middle.block
177-
; CHECK-EMPTY:
178-
; CHECK-NEXT: middle.block:
179-
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
180-
; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-element ir<%load.ext.ext>
181-
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%2>, vp<[[VTC]]>
182-
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
183-
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
150+
define i32 @print_partial_reduction_ext_mul(ptr %a, ptr %b) {
151+
; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
152+
; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF
153+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
154+
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
155+
; CHECK-NEXT: Live-in ir<1024> = original trip-count
156+
; CHECK-EMPTY:
157+
; CHECK-NEXT: ir-bb<entry>:
158+
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
159+
; CHECK-EMPTY:
160+
; CHECK-NEXT: vector.ph:
161+
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
162+
; CHECK-NEXT: Successor(s): vector loop
184163
; CHECK-EMPTY:
164+
; CHECK-NEXT: <x1> vector loop: {
165+
; CHECK-NEXT: vector.body:
166+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
167+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, vp<[[REDUCE:%.+]]> (VF scaled by 1/4)
168+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
169+
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
170+
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
171+
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
172+
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]>
173+
; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>
174+
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
175+
; CHECK-NEXT: EXPRESSION vp<[[REDUCE]]> = ir<[[ACC]]> + partial.reduce.add (mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32))
176+
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
177+
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
178+
; CHECK-NEXT: No successors
179+
; CHECK-NEXT: }
180+
; CHECK-NEXT: Successor(s): middle.block
181+
; CHECK-EMPTY:
182+
; CHECK-NEXT: middle.block:
183+
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]>
184+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
185+
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
186+
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
187+
; CHECK-EMPTY:
188+
; CHECK-NEXT: ir-bb<exit>:
189+
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
190+
; CHECK-NEXT: No successors
191+
; CHECK-EMPTY:
192+
; CHECK-NEXT: scalar.ph:
193+
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
194+
; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
195+
; CHECK-NEXT: Successor(s): ir-bb<for.body>
196+
; CHECK-EMPTY:
197+
; CHECK-NEXT: ir-bb<for.body>:
198+
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
199+
; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
200+
; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
201+
; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
202+
; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i16
203+
; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
204+
; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
205+
; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i16
206+
; CHECK-NEXT: IR %mul = mul i16 %ext.b, %ext.a
207+
; CHECK-NEXT: IR %mul.ext = zext i16 %mul to i32
208+
; CHECK-NEXT: IR %add = add i32 %mul.ext, %accum
209+
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
210+
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
211+
; CHECK-NEXT: No successors
212+
; CHECK-NEXT: }
185213
entry:
186-
br label %loop
214+
br label %for.body
187215

188-
loop:
189-
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
190-
%res1 = phi i64 [ 0, %entry ], [ %load.ext.ext, %loop ]
191-
%res2 = phi i32 [ 0, %entry ], [ %add, %loop ]
192-
%load = load i16, ptr %a, align 2
193-
%iv.next = add i64 %iv, 1
194-
%conv = zext i8 %b to i16
195-
%mul = mul i16 %conv, %conv
216+
for.body: ; preds = %for.body, %entry
217+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
218+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
219+
%gep.a = getelementptr i8, ptr %a, i64 %iv
220+
%load.a = load i8, ptr %gep.a, align 1
221+
%ext.a = zext i8 %load.a to i16
222+
%gep.b = getelementptr i8, ptr %b, i64 %iv
223+
%load.b = load i8, ptr %gep.b, align 1
224+
%ext.b = zext i8 %load.b to i16
225+
%mul = mul i16 %ext.b, %ext.a
196226
%mul.ext = zext i16 %mul to i32
197-
%add = add i32 %res2, %mul.ext
198-
%load.ext = sext i16 %load to i32
199-
%load.ext.ext = sext i32 %load.ext to i64
200-
%exitcond740.not = icmp eq i64 %iv, %n
201-
br i1 %exitcond740.not, label %exit, label %loop
227+
%add = add i32 %mul.ext, %accum
228+
%iv.next = add i64 %iv, 1
229+
%exitcond.not = icmp eq i64 %iv.next, 1024
230+
br i1 %exitcond.not, label %exit, label %for.body
202231

203232
exit:
204233
ret i32 %add
205234
}
235+

0 commit comments

Comments
 (0)