Skip to content

Commit f5ae82f

Browse files
committed
Address more review
1 parent 7c4cc47 commit f5ae82f

File tree

3 files changed

+130
-103
lines changed

3 files changed

+130
-103
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8131,14 +8131,10 @@ bool VPRecipeBuilder::getScaledReductions(
81318131

81328132
ExtOpTypes[I] = ExtOp->getType();
81338133
ExtKinds[I] = TTI::getPartialReductionExtendKind(Exts[I]);
8134-
// Make sure that the outer extend is either sext or the same kind as the
8135-
// inner extend.
8136-
if (OuterExtKind.has_value()) {
8137-
TTI::PartialReductionExtendKind OuterKind = OuterExtKind.value();
8138-
if (OuterKind != TTI::PartialReductionExtendKind::PR_SignExtend &&
8139-
OuterKind != ExtKinds[I])
8140-
return false;
8141-
}
8134+
// The outer extend kind must be the same as the inner extends, so that
8135+
// they can be folded together.
8136+
if (OuterExtKind.has_value() && OuterExtKind.value() != ExtKinds[I])
8137+
return false;
81428138
}
81438139
return true;
81448140
};

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,8 +2142,8 @@ exit:
21422142
ret i64 %add
21432143
}
21442144

2145-
define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
2146-
; CHECK-INTERLEAVE1-LABEL: define i32 @dotp_sext_mul_zext(
2145+
define i32 @not_dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
2146+
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_sext_mul_zext(
21472147
; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
21482148
; CHECK-INTERLEAVE1-NEXT: entry:
21492149
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2154,29 +2154,30 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
21542154
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
21552155
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
21562156
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2157+
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2158+
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2159+
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
21572160
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
21582161
; CHECK-INTERLEAVE1: vector.body:
21592162
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2160-
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2161-
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2162-
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2163+
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2164+
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2165+
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
21632166
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2164-
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2165-
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2166-
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2167-
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2168-
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2167+
; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2168+
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2169+
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
21692170
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2170-
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2171-
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2171+
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2172+
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
21722173
; CHECK-INTERLEAVE1: middle.block:
2173-
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE]])
2174-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2174+
; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
2175+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7]], i32 7
21752176
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
21762177
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
21772178
; CHECK-INTERLEAVE1: scalar.ph:
21782179
;
2179-
; CHECK-INTERLEAVED-LABEL: define i32 @dotp_sext_mul_zext(
2180+
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_sext_mul_zext(
21802181
; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
21812182
; CHECK-INTERLEAVED-NEXT: entry:
21822183
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2187,32 +2188,33 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
21872188
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
21882189
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
21892190
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2191+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2192+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2193+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
21902194
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
21912195
; CHECK-INTERLEAVED: vector.body:
21922196
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2193-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2194-
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ]
2195-
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2196-
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2197-
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> poison, <8 x i32> zeroinitializer
2198-
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2199-
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2200-
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2201-
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE2]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP3]])
2202-
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT4]] to <8 x i32>
2203-
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2197+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2198+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2199+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2200+
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
2201+
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT2]], <8 x i16> poison, <8 x i32> zeroinitializer
2202+
; CHECK-INTERLEAVED-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2203+
; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <8 x i32> [[VEC_PHI1]], [[TMP3]]
2204+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT3]] to <8 x i32>
2205+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sext <8 x i32> [[TMP7]] to <8 x i64>
22042206
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2205-
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2206-
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2207+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2208+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
22072209
; CHECK-INTERLEAVED: middle.block:
2208-
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE2]], [[PARTIAL_REDUCE]]
2209-
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2210-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2210+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP6]], [[TMP5]]
2211+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]])
2212+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP8]], i32 7
22112213
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
22122214
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
22132215
; CHECK-INTERLEAVED: scalar.ph:
22142216
;
2215-
; CHECK-MAXBW-LABEL: define i32 @dotp_sext_mul_zext(
2217+
; CHECK-MAXBW-LABEL: define i32 @not_dotp_sext_mul_zext(
22162218
; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
22172219
; CHECK-MAXBW-NEXT: entry:
22182220
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2223,24 +2225,25 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
22232225
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
22242226
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
22252227
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2228+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2229+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2230+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
22262231
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
22272232
; CHECK-MAXBW: vector.body:
22282233
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2229-
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2230-
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2231-
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2234+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2235+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2236+
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
22322237
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2233-
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2234-
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2235-
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2236-
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2237-
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2238+
; CHECK-MAXBW-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2239+
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2240+
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
22382241
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2239-
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2240-
; CHECK-MAXBW-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2242+
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2243+
; CHECK-MAXBW-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
22412244
; CHECK-MAXBW: middle.block:
2242-
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE]])
2243-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5]], i32 7
2245+
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
2246+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7]], i32 7
22442247
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
22452248
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
22462249
; CHECK-MAXBW: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 79 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -192,66 +192,94 @@ exit:
192192
ret i32 %add
193193
}
194194

195-
196-
!0 = distinct !{!0, !2, !3}
197-
!1 = distinct !{!1, !2, !4}
198-
!2 = !{!"llvm.loop.interleave.count", i32 1}
199-
!3 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
200-
!4 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
201-
202-
define i32 @print_partial_reduction_ext_mul(i64 %n, ptr %a, i8 %b) {
203-
; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' {
204-
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
205-
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
206-
; CHECK-NEXT: vp<%2> = original trip-count
195+
define i32 @print_partial_reduction_ext_mul(ptr %a, ptr %b) "target-features"="+neon,+dotprod" {
196+
; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
197+
; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF
198+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
199+
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
200+
; CHECK-NEXT: Live-in ir<1024> = original trip-count
201+
; CHECK-EMPTY:
202+
; CHECK-NEXT: ir-bb<entry>:
203+
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
204+
; CHECK-EMPTY:
205+
; CHECK-NEXT: vector.ph:
206+
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
207+
; CHECK-NEXT: Successor(s): vector loop
207208
; CHECK-EMPTY:
208-
; CHECK-NEXT: ir-bb<entry>:
209-
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (1 + %n)
210-
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
209+
; CHECK-NEXT: <x1> vector loop: {
210+
; CHECK-NEXT: vector.body:
211+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
212+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, vp<[[REDUCE:%.+]]> (VF scaled by 1/4)
213+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
214+
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
215+
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
216+
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
217+
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]>
218+
; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>
219+
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
220+
; CHECK-NEXT: EXPRESSION vp<[[REDUCE]]> = ir<[[ACC]]> + partial.reduce.add (mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32))
221+
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
222+
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
223+
; CHECK-NEXT: No successors
224+
; CHECK-NEXT: }
225+
; CHECK-NEXT: Successor(s): middle.block
211226
; CHECK-EMPTY:
212-
; CHECK-NEXT: vector.ph:
213-
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
214-
; CHECK-NEXT: Successor(s): vector loop
227+
; CHECK-NEXT: middle.block:
228+
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]>
229+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
230+
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
231+
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
215232
; CHECK-EMPTY:
216-
; CHECK-NEXT: <x1> vector loop: {
217-
; CHECK-NEXT: vector.body:
218-
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
219-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> (VF scaled by 1/4)
220-
; CHECK-NEXT: CLONE ir<%load> = load ir<%a>
221-
; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + partial.reduce.add (mul (ir<%b> zext to i32), (ir<%b> zext to i32))
222-
; CHECK-NEXT: WIDEN-CAST ir<%load.ext> = sext ir<%load> to i32
223-
; CHECK-NEXT: WIDEN-CAST ir<%load.ext.ext> = sext ir<%load.ext> to i64
224-
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
225-
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
226-
; CHECK-NEXT: No successors
227-
; CHECK-NEXT: }
228-
; CHECK-NEXT: Successor(s): middle.block
233+
; CHECK-NEXT: ir-bb<exit>:
234+
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
235+
; CHECK-NEXT: No successors
229236
; CHECK-EMPTY:
230-
; CHECK-NEXT: middle.block:
231-
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
232-
; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-element ir<%load.ext.ext>
233-
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%2>, vp<[[VTC]]>
234-
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
235-
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
237+
; CHECK-NEXT: scalar.ph:
238+
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
239+
; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
240+
; CHECK-NEXT: Successor(s): ir-bb<for.body>
236241
; CHECK-EMPTY:
242+
; CHECK-NEXT: ir-bb<for.body>:
243+
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
244+
; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
245+
; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
246+
; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
247+
; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i16
248+
; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
249+
; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
250+
; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i16
251+
; CHECK-NEXT: IR %mul = mul i16 %ext.b, %ext.a
252+
; CHECK-NEXT: IR %mul.ext = zext i16 %mul to i32
253+
; CHECK-NEXT: IR %add = add i32 %mul.ext, %accum
254+
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
255+
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
256+
; CHECK-NEXT: No successors
257+
; CHECK-NEXT: }
237258
entry:
238-
br label %loop
259+
br label %for.body
239260

240-
loop:
241-
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
242-
%res1 = phi i64 [ 0, %entry ], [ %load.ext.ext, %loop ]
243-
%res2 = phi i32 [ 0, %entry ], [ %add, %loop ]
244-
%load = load i16, ptr %a, align 2
245-
%iv.next = add i64 %iv, 1
246-
%conv = zext i8 %b to i16
247-
%mul = mul i16 %conv, %conv
261+
for.body: ; preds = %for.body, %entry
262+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
263+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
264+
%gep.a = getelementptr i8, ptr %a, i64 %iv
265+
%load.a = load i8, ptr %gep.a, align 1
266+
%ext.a = zext i8 %load.a to i16
267+
%gep.b = getelementptr i8, ptr %b, i64 %iv
268+
%load.b = load i8, ptr %gep.b, align 1
269+
%ext.b = zext i8 %load.b to i16
270+
%mul = mul i16 %ext.b, %ext.a
248271
%mul.ext = zext i16 %mul to i32
249-
%add = add i32 %res2, %mul.ext
250-
%load.ext = sext i16 %load to i32
251-
%load.ext.ext = sext i32 %load.ext to i64
252-
%exitcond740.not = icmp eq i64 %iv, %n
253-
br i1 %exitcond740.not, label %exit, label %loop
272+
%add = add i32 %mul.ext, %accum
273+
%iv.next = add i64 %iv, 1
274+
%exitcond.not = icmp eq i64 %iv.next, 1024
275+
br i1 %exitcond.not, label %exit, label %for.body
254276

255277
exit:
256278
ret i32 %add
257279
}
280+
281+
!0 = distinct !{!0, !2, !3}
282+
!1 = distinct !{!1, !2, !4}
283+
!2 = !{!"llvm.loop.interleave.count", i32 1}
284+
!3 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
285+
!4 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}

0 commit comments

Comments
 (0)