@@ -2142,8 +2142,8 @@ exit:
21422142 ret i64 %add
21432143}
21442144
2145- define i32 @dotp_sext_mul_zext (i64 %n , ptr %a , i8 %b ) {
2146- ; CHECK-INTERLEAVE1-LABEL: define i32 @dotp_sext_mul_zext (
2145+ define i32 @not_dotp_sext_mul_zext (i64 %n , ptr %a , i8 %b ) {
2146+ ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_sext_mul_zext (
21472147; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
21482148; CHECK-INTERLEAVE1-NEXT: entry:
21492149; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2154,29 +2154,30 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
21542154; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
21552155; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
21562156; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2157+ ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2158+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2159+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
21572160; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
21582161; CHECK-INTERLEAVE1: vector.body:
21592162; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2160- ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], [[VECTOR_BODY]] ]
2161- ; CHECK-INTERLEAVE1-NEXT: [[TMP1 :%.*]] = load i16, ptr [[A]], align 2
2162- ; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1 ]], i64 0
2163+ ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5 :%.*]], [[VECTOR_BODY]] ]
2164+ ; CHECK-INTERLEAVE1-NEXT: [[TMP4 :%.*]] = load i16, ptr [[A]], align 2
2165+ ; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4 ]], i64 0
21632166; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2164- ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2165- ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2166- ; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2167- ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2168- ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2167+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2168+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2169+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
21692170; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2170- ; CHECK-INTERLEAVE1-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2171- ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2171+ ; CHECK-INTERLEAVE1-NEXT: [[TMP8 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2172+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP8 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
21722173; CHECK-INTERLEAVE1: middle.block:
2173- ; CHECK-INTERLEAVE1-NEXT: [[TMP7 :%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE ]])
2174- ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5 ]], i32 7
2174+ ; CHECK-INTERLEAVE1-NEXT: [[TMP9 :%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5 ]])
2175+ ; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7 ]], i32 7
21752176; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
21762177; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
21772178; CHECK-INTERLEAVE1: scalar.ph:
21782179;
2179- ; CHECK-INTERLEAVED-LABEL: define i32 @dotp_sext_mul_zext (
2180+ ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_sext_mul_zext (
21802181; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
21812182; CHECK-INTERLEAVED-NEXT: entry:
21822183; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2187,32 +2188,33 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
21872188; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
21882189; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
21892190; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2191+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2192+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2193+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
21902194; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
21912195; CHECK-INTERLEAVED: vector.body:
21922196; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2193- ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
2194- ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ]
2195- ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 2
2196- ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
2197- ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> poison, <8 x i32> zeroinitializer
2198- ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2199- ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2200- ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2201- ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE2]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP3]])
2202- ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT4]] to <8 x i32>
2203- ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2197+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2198+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2199+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = load i16, ptr [[A]], align 2
2200+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4]], i64 0
2201+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT2]], <8 x i16> poison, <8 x i32> zeroinitializer
2202+ ; CHECK-INTERLEAVED-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2203+ ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <8 x i32> [[VEC_PHI1]], [[TMP3]]
2204+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT3]] to <8 x i32>
2205+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sext <8 x i32> [[TMP7]] to <8 x i64>
22042206; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2205- ; CHECK-INTERLEAVED-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2206- ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2207+ ; CHECK-INTERLEAVED-NEXT: [[TMP9 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2208+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
22072209; CHECK-INTERLEAVED: middle.block:
2208- ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE2 ]], [[PARTIAL_REDUCE ]]
2209- ; CHECK-INTERLEAVED-NEXT: [[TMP7 :%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2210- ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5 ]], i32 7
2210+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP6 ]], [[TMP5 ]]
2211+ ; CHECK-INTERLEAVED-NEXT: [[TMP10 :%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]])
2212+ ; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP8 ]], i32 7
22112213; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
22122214; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
22132215; CHECK-INTERLEAVED: scalar.ph:
22142216;
2215- ; CHECK-MAXBW-LABEL: define i32 @dotp_sext_mul_zext (
2217+ ; CHECK-MAXBW-LABEL: define i32 @not_dotp_sext_mul_zext (
22162218; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0]] {
22172219; CHECK-MAXBW-NEXT: entry:
22182220; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
@@ -2223,24 +2225,25 @@ define i32 @dotp_sext_mul_zext(i64 %n, ptr %a, i8 %b) {
22232225; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
22242226; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
22252227; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
2228+ ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i16>
2229+ ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], [[TMP1]]
2230+ ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
22262231; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
22272232; CHECK-MAXBW: vector.body:
22282233; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2229- ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], [[VECTOR_BODY]] ]
2230- ; CHECK-MAXBW-NEXT: [[TMP1 :%.*]] = load i16, ptr [[A]], align 2
2231- ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1 ]], i64 0
2234+ ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5 :%.*]], [[VECTOR_BODY]] ]
2235+ ; CHECK-MAXBW-NEXT: [[TMP4 :%.*]] = load i16, ptr [[A]], align 2
2236+ ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP4 ]], i64 0
22322237; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
2233- ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
2234- ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul <8 x i32> [[TMP2]], [[TMP2]]
2235- ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP3]])
2236- ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2237- ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64>
2238+ ; CHECK-MAXBW-NEXT: [[TMP5]] = add <8 x i32> [[VEC_PHI]], [[TMP3]]
2239+ ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
2240+ ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
22382241; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2239- ; CHECK-MAXBW-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2240- ; CHECK-MAXBW-NEXT: br i1 [[TMP6 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2242+ ; CHECK-MAXBW-NEXT: [[TMP8 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2243+ ; CHECK-MAXBW-NEXT: br i1 [[TMP8 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
22412244; CHECK-MAXBW: middle.block:
2242- ; CHECK-MAXBW-NEXT: [[TMP7 :%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE ]])
2243- ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP5 ]], i32 7
2245+ ; CHECK-MAXBW-NEXT: [[TMP9 :%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5 ]])
2246+ ; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP7 ]], i32 7
22442247; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
22452248; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
22462249; CHECK-MAXBW: scalar.ph:
0 commit comments