Skip to content
Merged
30 changes: 28 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
// Define customized scheduler read/write types specific to the Neoverse N2.

//===----------------------------------------------------------------------===//

// Define generic 0 micro-op types
def N2Write_0c : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 0;
}

// Define generic 1 micro-op types

def N2Write_1c_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
Expand Down Expand Up @@ -645,6 +652,21 @@ def N2Write_11c_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 27;
}

//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def N2Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_1c_1I]>]>;

def N2Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_2c_1V]>]>;

def N2Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_3c_1M0]>]>;

//===----------------------------------------------------------------------===//
// Define types for arithmetic and logical ops with short shifts
def N2Write_Arith : SchedWriteVariant<[
Expand Down Expand Up @@ -680,6 +702,7 @@ def : InstRW<[N2Write_1c_1B_1S], (instrs BL, BLR)>;
// ALU, basic
// ALU, basic, flagset
def : SchedAlias<WriteI, N2Write_1c_1I>;
def : InstRW<[N2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, N2Write_2c_1M>;
Expand All @@ -691,7 +714,8 @@ def : SchedAlias<WriteISReg, N2Write_Arith>;

// Logical, shift, no flagset
def : InstRW<[N2Write_1c_1I],
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[N2Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -882,7 +906,7 @@ def : SchedAlias<WriteFImm, N2Write_2c_1V>;
def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;

// FP transfer, from gen to high half of vec reg
Expand Down Expand Up @@ -1225,6 +1249,8 @@ def : InstRW<[N2Write_3c_1V0], (instrs BFCVT)>;
// ASIMD unzip/zip
// Handled by SchedAlias<WriteV[dq], ...>

def : InstRW<[N2Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[N2Write_3c_1M0], (instregex "^DUPv.+gpr")>;

Expand Down
26 changes: 23 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,22 @@ def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
let NumMicroOps = 16;
}


//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def N3Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_1c_1I]>]>;

def N3Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_2c_1V]>]>;

def N3Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_3c_1M0]>]>;

// Miscellaneous
// -----------------------------------------------------------------------------

Expand Down Expand Up @@ -581,6 +597,7 @@ def : InstRW<[N3Write_1c_1B_1S], (instrs BL, BLR)>;
// Conditional compare
// Conditional select
def : SchedAlias<WriteI, N3Write_1c_1I>;
def : InstRW<[N3Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, N3Write_2c_1M>;
Expand Down Expand Up @@ -610,7 +627,8 @@ def : InstRW<[N3Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;

// Logical, shift, no flagset
def : InstRW<[N3Write_1c_1I],
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[N3Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[N3Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -855,10 +873,11 @@ def : SchedAlias<WriteFCvt, N3Write_3c_1V0>;
def : SchedAlias<WriteFImm, N3Write_2c_1V>;

// FP move, register
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr)>;
def : InstRW<[N3Write_0c], (instrs FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[N3Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
def : InstRW<[N3Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;

// FP transfer, from gen to high half of vec reg
def : InstRW<[N3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
Expand Down Expand Up @@ -1186,6 +1205,7 @@ def : InstRW<[N3Write_3c_1V0], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Covered by WriteV[dq]
def : InstRW<[N3Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[N3Write_3c_1M0], (instregex "^DUPv.+gpr")>;
Expand Down
22 changes: 20 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,21 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
V1UnitV, V1UnitV, V1UnitV,
V1UnitV, V1UnitV, V1UnitV]>;

//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def V1Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_1c_1I]>]>;

def V1Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_2c_1V]>]>;

def V1Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_3c_1M0]>]>;

//===----------------------------------------------------------------------===//
// Define forwarded types

Expand Down Expand Up @@ -603,6 +618,7 @@ def : InstRW<[V1Write_1c_1I_1Flg],
"^(ADC|SBC)S[WX]r$",
"^ANDS[WX]ri$",
"^(AND|BIC)S[WX]rr$")>;
def : InstRW<[V1Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
Expand All @@ -623,7 +639,8 @@ def : InstRW<[V1WriteISRegS],
(instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;

// Logical, shift, no flagset
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[V1Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -805,7 +822,7 @@ def : SchedAlias<WriteFImm, V1Write_2c_1V>;
def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
def : InstRW<[V1Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;

// FP transfer, from gen to high half of vec reg
def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
Expand Down Expand Up @@ -1122,6 +1139,7 @@ def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Covered by "SchedAlias (WriteV[dq]...)" above
def : InstRW<[V1Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[V1Write_3c_1M0],
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
//===----------------------------------------------------------------------===//

// Define generic 0 micro-op types
def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
def V2Write_0c : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 0;
}

// Define generic 1 micro-op types

Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
//===----------------------------------------------------------------------===//

// Define generic 0 micro-op types
def V3Write_0c : SchedWriteRes<[]> { let Latency = 0; }
def V3Write_0c : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 0;
}

// Define generic 1 micro-op types

Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
//===----------------------------------------------------------------------===//

// Define generic 0 micro-op types
def V3AEWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
def V3AEWrite_0c : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 0;
}

// Define generic 1 micro-op types

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/pr164181.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: mov x10, xzr
; CHECK-NEXT: mov w23, wzr
; CHECK-NEXT: mov w30, wzr
; CHECK-NEXT: ldrb w19, [sp, #240]
; CHECK-NEXT: mov w25, wzr
; CHECK-NEXT: mov x24, xzr
; CHECK-NEXT: str w8, [sp, #108] // 4-byte Folded Spill
; CHECK-NEXT: mov x3, x26
; CHECK-NEXT: str w8, [sp, #108] // 4-byte Folded Spill
; CHECK-NEXT: ldrb w19, [sp, #240]
; CHECK-NEXT: ldp x9, x8, [sp, #344]
; CHECK-NEXT: str w12, [sp, #92] // 4-byte Folded Spill
; CHECK-NEXT: mov w12, #1 // =0x1
Expand Down Expand Up @@ -123,8 +123,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2
; CHECK-NEXT: add x19, x4, w8, sxtw #2
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: csel x12, x24, x12, lo
; CHECK-NEXT: mov w4, w30
; CHECK-NEXT: csel x12, x24, x12, lo
; CHECK-NEXT: str x12, [sp, #56] // 8-byte Folded Spill
; CHECK-NEXT: b .LBB0_8
; CHECK-NEXT: .p2align 5, , 16
Expand Down Expand Up @@ -341,8 +341,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: mov x24, x27
; CHECK-NEXT: lsl x23, x14, #1
; CHECK-NEXT: mov x27, #-1 // =0xffffffffffffffff
; CHECK-NEXT: madd x14, x14, x3, x11
; CHECK-NEXT: mov w28, w30
; CHECK-NEXT: madd x14, x14, x3, x11
; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
; CHECK-NEXT: b .LBB0_39
; CHECK-NEXT: .p2align 5, , 16
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -2508,14 +2508,14 @@ drps
# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
# CHECK-NEXT: 1 1 0.25 mov x3, x6
# CHECK-NEXT: 1 1 0.25 mov x3, xzr
# CHECK-NEXT: 1 1 0.25 mov wzr, w2
# CHECK-NEXT: 1 1 0.25 mov w3, w5
# CHECK-NEXT: 0 0 0.00 mov x3, x6
# CHECK-NEXT: 0 0 0.00 mov x3, xzr
# CHECK-NEXT: 0 0 0.00 mov wzr, w2
# CHECK-NEXT: 0 0 0.00 mov w3, w5
# CHECK-NEXT: 1 1 0.25 movz w2, #0, lsl #16
# CHECK-NEXT: 1 1 0.25 mov w2, #-1235
# CHECK-NEXT: 1 1 0.25 mov x2, #5299989643264
# CHECK-NEXT: 1 1 0.25 mov x2, #0
# CHECK-NEXT: 0 0 0.00 mov x2, #0
# CHECK-NEXT: 1 1 0.25 movk w3, #0
# CHECK-NEXT: 1 1 0.25 movz x4, #0, lsl #16
# CHECK-NEXT: 1 1 0.25 movk w5, #0, lsl #16
Expand Down Expand Up @@ -2557,7 +2557,7 @@ drps

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 515.75 249.75 161.25 161.25 215.50 85.50

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
Expand Down Expand Up @@ -3692,14 +3692,14 @@ drps
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - bics x3, xzr, x3, lsl #1
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst w3, w7, lsl #31
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst x2, x20, asr #2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, x6
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, xzr
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov wzr, w2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w3, w5
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, x6
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, xzr
# CHECK-NEXT: - - - - - - - - - - - - - mov wzr, w2
# CHECK-NEXT: - - - - - - - - - - - - - mov w3, w5
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz w2, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w2, #-1235
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #5299989643264
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #0
# CHECK-NEXT: - - - - - - - - - - - - - mov x2, #0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w3, #0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz x4, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w5, #0, lsl #16
Expand Down
Loading