@@ -2721,6 +2721,238 @@ for.body: ; preds = %for.body.lr.ph, %fo
27212721 br i1 %exitcond.not , label %for.cond.for.cond.cleanup_crit_edge , label %for.body , !llvm.loop !8
27222722}
27232723
2724+ define i32 @not_dotp_zext_mul_different_inner_extends (ptr %a , ptr %b ) {
2725+ ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2726+ ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2727+ ; CHECK-INTERLEAVE1-NEXT: entry:
2728+ ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
2729+ ; CHECK-INTERLEAVE1: vector.ph:
2730+ ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
2731+ ; CHECK-INTERLEAVE1: vector.body:
2732+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2733+ ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2734+ ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2735+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2736+ ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2737+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2738+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2739+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2740+ ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2741+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i16> [[TMP4]] to <16 x i32>
2742+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2743+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2744+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2745+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2746+ ; CHECK-INTERLEAVE1: middle.block:
2747+ ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2748+ ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
2749+ ; CHECK-INTERLEAVE1: for.exit:
2750+ ; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP8]]
2751+ ;
2752+ ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2753+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2754+ ; CHECK-INTERLEAVED-NEXT: entry:
2755+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
2756+ ; CHECK-INTERLEAVED: vector.ph:
2757+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
2758+ ; CHECK-INTERLEAVED: vector.body:
2759+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2760+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
2761+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
2762+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2763+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16
2764+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2765+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
2766+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2767+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16>
2768+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2769+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
2770+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
2771+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2772+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i16>
2773+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i16>
2774+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul <16 x i16> [[TMP6]], [[TMP2]]
2775+ ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = mul <16 x i16> [[TMP7]], [[TMP3]]
2776+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <16 x i16> [[TMP8]] to <16 x i32>
2777+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = zext <16 x i16> [[TMP9]] to <16 x i32>
2778+ ; CHECK-INTERLEAVED-NEXT: [[TMP12]] = add <16 x i32> [[TMP10]], [[VEC_PHI]]
2779+ ; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI1]]
2780+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
2781+ ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2782+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2783+ ; CHECK-INTERLEAVED: middle.block:
2784+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]]
2785+ ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
2786+ ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]]
2787+ ; CHECK-INTERLEAVED: for.exit:
2788+ ; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP15]]
2789+ ;
2790+ ; CHECK-MAXBW-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2791+ ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2792+ ; CHECK-MAXBW-NEXT: entry:
2793+ ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
2794+ ; CHECK-MAXBW: vector.ph:
2795+ ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
2796+ ; CHECK-MAXBW: vector.body:
2797+ ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2798+ ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2799+ ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2800+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2801+ ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2802+ ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2803+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2804+ ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2805+ ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2806+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = zext <16 x i16> [[TMP4]] to <16 x i32>
2807+ ; CHECK-MAXBW-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2808+ ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2809+ ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2810+ ; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2811+ ; CHECK-MAXBW: middle.block:
2812+ ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2813+ ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]]
2814+ ; CHECK-MAXBW: for.exit:
2815+ ; CHECK-MAXBW-NEXT: ret i32 [[TMP8]]
2816+ ;
2817+ entry:
2818+ br label %for.body
2819+
2820+ for.body: ; preds = %for.body, %entry
2821+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
2822+ %accum = phi i32 [ 0 , %entry ], [ %add , %for.body ]
2823+ %gep.a = getelementptr i8 , ptr %a , i64 %iv
2824+ %load.a = load i8 , ptr %gep.a , align 1
2825+ %ext.a = zext i8 %load.a to i16
2826+ %gep.b = getelementptr i8 , ptr %b , i64 %iv
2827+ %load.b = load i8 , ptr %gep.b , align 1
2828+ %ext.b = sext i8 %load.b to i16
2829+ %mul = mul i16 %ext.b , %ext.a
2830+ %mul.ext = zext i16 %mul to i32
2831+ %add = add i32 %mul.ext , %accum
2832+ %iv.next = add i64 %iv , 1
2833+ %exitcond.not = icmp eq i64 %iv.next , 1024
2834+ br i1 %exitcond.not , label %for.exit , label %for.body
2835+
2836+ for.exit: ; preds = %for.body
2837+ ret i32 %add
2838+ }
2839+
2840+ define i32 @not_dotp_sext_mul_different_inner_extends (ptr %a , ptr %b ) {
2841+ ; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2842+ ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2843+ ; CHECK-INTERLEAVE1-NEXT: entry:
2844+ ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
2845+ ; CHECK-INTERLEAVE1: vector.ph:
2846+ ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
2847+ ; CHECK-INTERLEAVE1: vector.body:
2848+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2849+ ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2850+ ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2851+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2852+ ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2853+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2854+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2855+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2856+ ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2857+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext <16 x i16> [[TMP4]] to <16 x i32>
2858+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2859+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2860+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2861+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2862+ ; CHECK-INTERLEAVE1: middle.block:
2863+ ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2864+ ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
2865+ ; CHECK-INTERLEAVE1: for.exit:
2866+ ; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP8]]
2867+ ;
2868+ ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2869+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2870+ ; CHECK-INTERLEAVED-NEXT: entry:
2871+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
2872+ ; CHECK-INTERLEAVED: vector.ph:
2873+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
2874+ ; CHECK-INTERLEAVED: vector.body:
2875+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2876+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
2877+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
2878+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2879+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16
2880+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2881+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
2882+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2883+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16>
2884+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2885+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
2886+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
2887+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2888+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i16>
2889+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i16>
2890+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul <16 x i16> [[TMP6]], [[TMP2]]
2891+ ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = mul <16 x i16> [[TMP7]], [[TMP3]]
2892+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = sext <16 x i16> [[TMP8]] to <16 x i32>
2893+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = sext <16 x i16> [[TMP9]] to <16 x i32>
2894+ ; CHECK-INTERLEAVED-NEXT: [[TMP12]] = add <16 x i32> [[TMP10]], [[VEC_PHI]]
2895+ ; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI1]]
2896+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
2897+ ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2898+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
2899+ ; CHECK-INTERLEAVED: middle.block:
2900+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]]
2901+ ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
2902+ ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]]
2903+ ; CHECK-INTERLEAVED: for.exit:
2904+ ; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP15]]
2905+ ;
2906+ ; CHECK-MAXBW-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2907+ ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2908+ ; CHECK-MAXBW-NEXT: entry:
2909+ ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
2910+ ; CHECK-MAXBW: vector.ph:
2911+ ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
2912+ ; CHECK-MAXBW: vector.body:
2913+ ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2914+ ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2915+ ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2916+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2917+ ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2918+ ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2919+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2920+ ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2921+ ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2922+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = sext <16 x i16> [[TMP4]] to <16 x i32>
2923+ ; CHECK-MAXBW-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2924+ ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2925+ ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2926+ ; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2927+ ; CHECK-MAXBW: middle.block:
2928+ ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2929+ ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]]
2930+ ; CHECK-MAXBW: for.exit:
2931+ ; CHECK-MAXBW-NEXT: ret i32 [[TMP8]]
2932+ ;
2933+ entry:
2934+ br label %for.body
2935+
2936+ for.body: ; preds = %for.body, %entry
2937+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
2938+ %accum = phi i32 [ 0 , %entry ], [ %add , %for.body ]
2939+ %gep.a = getelementptr i8 , ptr %a , i64 %iv
2940+ %load.a = load i8 , ptr %gep.a , align 1
2941+ %ext.a = zext i8 %load.a to i16
2942+ %gep.b = getelementptr i8 , ptr %b , i64 %iv
2943+ %load.b = load i8 , ptr %gep.b , align 1
2944+ %ext.b = sext i8 %load.b to i16
2945+ %mul = mul i16 %ext.b , %ext.a
2946+ %mul.ext = sext i16 %mul to i32
2947+ %add = add i32 %mul.ext , %accum
2948+ %iv.next = add i64 %iv , 1
2949+ %exitcond.not = icmp eq i64 %iv.next , 1024
2950+ br i1 %exitcond.not , label %for.exit , label %for.body
2951+
2952+ for.exit: ; preds = %for.body
2953+ ret i32 %add
2954+ }
2955+
27242956!7 = distinct !{!7 , !8 , !9 , !10 }
27252957!8 = !{!"llvm.loop.mustprogress" }
27262958!9 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
0 commit comments