Skip to content

Commit 5bb0c1d

Browse files
committed
Address review
1 parent cb37553 commit 5bb0c1d

File tree

3 files changed

+280
-99
lines changed

3 files changed

+280
-99
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2721,6 +2721,238 @@ for.body: ; preds = %for.body.lr.ph, %fo
27212721
br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body, !llvm.loop !8
27222722
}
27232723

2724+
define i32 @not_dotp_zext_mul_different_inner_extends(ptr %a, ptr %b) {
2725+
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2726+
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2727+
; CHECK-INTERLEAVE1-NEXT: entry:
2728+
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
2729+
; CHECK-INTERLEAVE1: vector.ph:
2730+
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
2731+
; CHECK-INTERLEAVE1: vector.body:
2732+
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2733+
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2734+
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2735+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2736+
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2737+
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2738+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2739+
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2740+
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2741+
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i16> [[TMP4]] to <16 x i32>
2742+
; CHECK-INTERLEAVE1-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2743+
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2744+
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2745+
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2746+
; CHECK-INTERLEAVE1: middle.block:
2747+
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2748+
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
2749+
; CHECK-INTERLEAVE1: for.exit:
2750+
; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP8]]
2751+
;
2752+
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2753+
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2754+
; CHECK-INTERLEAVED-NEXT: entry:
2755+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
2756+
; CHECK-INTERLEAVED: vector.ph:
2757+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
2758+
; CHECK-INTERLEAVED: vector.body:
2759+
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2760+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
2761+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
2762+
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2763+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16
2764+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2765+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
2766+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2767+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16>
2768+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2769+
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
2770+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
2771+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2772+
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i16>
2773+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i16>
2774+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul <16 x i16> [[TMP6]], [[TMP2]]
2775+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = mul <16 x i16> [[TMP7]], [[TMP3]]
2776+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <16 x i16> [[TMP8]] to <16 x i32>
2777+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = zext <16 x i16> [[TMP9]] to <16 x i32>
2778+
; CHECK-INTERLEAVED-NEXT: [[TMP12]] = add <16 x i32> [[TMP10]], [[VEC_PHI]]
2779+
; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI1]]
2780+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
2781+
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2782+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2783+
; CHECK-INTERLEAVED: middle.block:
2784+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]]
2785+
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
2786+
; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]]
2787+
; CHECK-INTERLEAVED: for.exit:
2788+
; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP15]]
2789+
;
2790+
; CHECK-MAXBW-LABEL: define i32 @not_dotp_zext_mul_different_inner_extends(
2791+
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2792+
; CHECK-MAXBW-NEXT: entry:
2793+
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
2794+
; CHECK-MAXBW: vector.ph:
2795+
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
2796+
; CHECK-MAXBW: vector.body:
2797+
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2798+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2799+
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2800+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2801+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2802+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2803+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2804+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2805+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2806+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = zext <16 x i16> [[TMP4]] to <16 x i32>
2807+
; CHECK-MAXBW-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2808+
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2809+
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2810+
; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2811+
; CHECK-MAXBW: middle.block:
2812+
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2813+
; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]]
2814+
; CHECK-MAXBW: for.exit:
2815+
; CHECK-MAXBW-NEXT: ret i32 [[TMP8]]
2816+
;
2817+
entry:
2818+
br label %for.body
2819+
2820+
for.body: ; preds = %for.body, %entry
2821+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2822+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
2823+
%gep.a = getelementptr i8, ptr %a, i64 %iv
2824+
%load.a = load i8, ptr %gep.a, align 1
2825+
%ext.a = zext i8 %load.a to i16
2826+
%gep.b = getelementptr i8, ptr %b, i64 %iv
2827+
%load.b = load i8, ptr %gep.b, align 1
2828+
%ext.b = sext i8 %load.b to i16
2829+
%mul = mul i16 %ext.b, %ext.a
2830+
%mul.ext = zext i16 %mul to i32
2831+
%add = add i32 %mul.ext, %accum
2832+
%iv.next = add i64 %iv, 1
2833+
%exitcond.not = icmp eq i64 %iv.next, 1024
2834+
br i1 %exitcond.not, label %for.exit, label %for.body
2835+
2836+
for.exit: ; preds = %for.body
2837+
ret i32 %add
2838+
}
2839+
2840+
define i32 @not_dotp_sext_mul_different_inner_extends(ptr %a, ptr %b) {
2841+
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2842+
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2843+
; CHECK-INTERLEAVE1-NEXT: entry:
2844+
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
2845+
; CHECK-INTERLEAVE1: vector.ph:
2846+
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
2847+
; CHECK-INTERLEAVE1: vector.body:
2848+
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2849+
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2850+
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2851+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2852+
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2853+
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2854+
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2855+
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2856+
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2857+
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext <16 x i16> [[TMP4]] to <16 x i32>
2858+
; CHECK-INTERLEAVE1-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2859+
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2860+
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2861+
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2862+
; CHECK-INTERLEAVE1: middle.block:
2863+
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2864+
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
2865+
; CHECK-INTERLEAVE1: for.exit:
2866+
; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP8]]
2867+
;
2868+
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2869+
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2870+
; CHECK-INTERLEAVED-NEXT: entry:
2871+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
2872+
; CHECK-INTERLEAVED: vector.ph:
2873+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
2874+
; CHECK-INTERLEAVED: vector.body:
2875+
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2876+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
2877+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
2878+
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2879+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16
2880+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2881+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
2882+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2883+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16>
2884+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2885+
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
2886+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
2887+
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
2888+
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i16>
2889+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i16>
2890+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul <16 x i16> [[TMP6]], [[TMP2]]
2891+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = mul <16 x i16> [[TMP7]], [[TMP3]]
2892+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = sext <16 x i16> [[TMP8]] to <16 x i32>
2893+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = sext <16 x i16> [[TMP9]] to <16 x i32>
2894+
; CHECK-INTERLEAVED-NEXT: [[TMP12]] = add <16 x i32> [[TMP10]], [[VEC_PHI]]
2895+
; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI1]]
2896+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
2897+
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2898+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
2899+
; CHECK-INTERLEAVED: middle.block:
2900+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]]
2901+
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
2902+
; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]]
2903+
; CHECK-INTERLEAVED: for.exit:
2904+
; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP15]]
2905+
;
2906+
; CHECK-MAXBW-LABEL: define i32 @not_dotp_sext_mul_different_inner_extends(
2907+
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
2908+
; CHECK-MAXBW-NEXT: entry:
2909+
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
2910+
; CHECK-MAXBW: vector.ph:
2911+
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
2912+
; CHECK-MAXBW: vector.body:
2913+
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2914+
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2915+
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2916+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
2917+
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
2918+
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
2919+
; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
2920+
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
2921+
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP3]], [[TMP1]]
2922+
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = sext <16 x i16> [[TMP4]] to <16 x i32>
2923+
; CHECK-MAXBW-NEXT: [[TMP6]] = add <16 x i32> [[TMP5]], [[VEC_PHI]]
2924+
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2925+
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2926+
; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2927+
; CHECK-MAXBW: middle.block:
2928+
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]])
2929+
; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]]
2930+
; CHECK-MAXBW: for.exit:
2931+
; CHECK-MAXBW-NEXT: ret i32 [[TMP8]]
2932+
;
2933+
entry:
2934+
br label %for.body
2935+
2936+
for.body: ; preds = %for.body, %entry
2937+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2938+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
2939+
%gep.a = getelementptr i8, ptr %a, i64 %iv
2940+
%load.a = load i8, ptr %gep.a, align 1
2941+
%ext.a = zext i8 %load.a to i16
2942+
%gep.b = getelementptr i8, ptr %b, i64 %iv
2943+
%load.b = load i8, ptr %gep.b, align 1
2944+
%ext.b = sext i8 %load.b to i16
2945+
%mul = mul i16 %ext.b, %ext.a
2946+
%mul.ext = sext i16 %mul to i32
2947+
%add = add i32 %mul.ext, %accum
2948+
%iv.next = add i64 %iv, 1
2949+
%exitcond.not = icmp eq i64 %iv.next, 1024
2950+
br i1 %exitcond.not, label %for.exit, label %for.body
2951+
2952+
for.exit: ; preds = %for.body
2953+
ret i32 %add
2954+
}
2955+
27242956
!7 = distinct !{!7, !8, !9, !10}
27252957
!8 = !{!"llvm.loop.mustprogress"}
27262958
!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -184,32 +184,6 @@ define i32 @print_partial_reduction_ext_mul(ptr %a, ptr %b) {
184184
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
185185
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
186186
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
187-
; CHECK-EMPTY:
188-
; CHECK-NEXT: ir-bb<exit>:
189-
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
190-
; CHECK-NEXT: No successors
191-
; CHECK-EMPTY:
192-
; CHECK-NEXT: scalar.ph:
193-
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
194-
; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
195-
; CHECK-NEXT: Successor(s): ir-bb<for.body>
196-
; CHECK-EMPTY:
197-
; CHECK-NEXT: ir-bb<for.body>:
198-
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
199-
; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
200-
; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
201-
; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
202-
; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i16
203-
; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
204-
; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
205-
; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i16
206-
; CHECK-NEXT: IR %mul = mul i16 %ext.b, %ext.a
207-
; CHECK-NEXT: IR %mul.ext = zext i16 %mul to i32
208-
; CHECK-NEXT: IR %add = add i32 %mul.ext, %accum
209-
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
210-
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
211-
; CHECK-NEXT: No successors
212-
; CHECK-NEXT: }
213187
entry:
214188
br label %for.body
215189

0 commit comments

Comments
 (0)