Skip to content
Merged
28 changes: 25 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
// Define customized scheduler read/write types specific to the Neoverse N2.

//===----------------------------------------------------------------------===//

// Define generic 0 micro-op types
def N2Write_0c : SchedWriteRes<[]> { let Latency = 0; }

// Define generic 1 micro-op types

def N2Write_1c_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
Expand Down Expand Up @@ -645,6 +649,21 @@ def N2Write_11c_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 27;
}

//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def N2Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_1c_1I]>]>;

def N2Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_2c_1V]>]>;

def N2Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N2Write_0c]>,
SchedVar<NoSchedPred, [N2Write_3c_1M0]>]>;

//===----------------------------------------------------------------------===//
// Define types for arithmetic and logical ops with short shifts
def N2Write_Arith : SchedWriteVariant<[
Expand Down Expand Up @@ -680,6 +699,7 @@ def : InstRW<[N2Write_1c_1B_1S], (instrs BL, BLR)>;
// ALU, basic
// ALU, basic, flagset
def : SchedAlias<WriteI, N2Write_1c_1I>;
def : InstRW<[N2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, N2Write_2c_1M>;
Expand All @@ -691,7 +711,8 @@ def : SchedAlias<WriteISReg, N2Write_Arith>;

// Logical, shift, no flagset
def : InstRW<[N2Write_1c_1I],
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[N2Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -882,8 +903,7 @@ def : SchedAlias<WriteFImm, N2Write_2c_1V>;
def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;

// FP transfer, from gen to high half of vec reg
def : InstRW<[N2Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
Expand Down Expand Up @@ -1225,6 +1245,8 @@ def : InstRW<[N2Write_3c_1V0], (instrs BFCVT)>;
// ASIMD unzip/zip
// Handled by SchedAlias<WriteV[dq], ...>

def : InstRW<[N2Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[N2Write_3c_1M0], (instregex "^DUPv.+gpr")>;

Expand Down
28 changes: 24 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }

def N3Write_0c : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 0;
let NumMicroOps = 1;
}

def N3Write_4c : SchedWriteRes<[]> {
Expand Down Expand Up @@ -553,6 +553,22 @@ def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
let NumMicroOps = 16;
}


//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def N3Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_1c_1I]>]>;

def N3Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_2c_1V]>]>;

def N3Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [N3Write_0c]>,
SchedVar<NoSchedPred, [N3Write_3c_1M0]>]>;

// Miscellaneous
// -----------------------------------------------------------------------------

Expand Down Expand Up @@ -581,6 +597,7 @@ def : InstRW<[N3Write_1c_1B_1S], (instrs BL, BLR)>;
// Conditional compare
// Conditional select
def : SchedAlias<WriteI, N3Write_1c_1I>;
def : InstRW<[N3Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, N3Write_2c_1M>;
Expand Down Expand Up @@ -610,7 +627,8 @@ def : InstRW<[N3Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;

// Logical, shift, no flagset
def : InstRW<[N3Write_1c_1I],
(instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
(instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[N3Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[N3Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -855,10 +873,11 @@ def : SchedAlias<WriteFCvt, N3Write_3c_1V0>;
def : SchedAlias<WriteFImm, N3Write_2c_1V>;

// FP move, register
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
def : InstRW<[N3Write_2c_1V], (instrs FMOVHr)>;
def : InstRW<[N3Write_0c], (instrs FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[N3Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
def : InstRW<[N3Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;

// FP transfer, from gen to high half of vec reg
def : InstRW<[N3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
Expand Down Expand Up @@ -1186,6 +1205,7 @@ def : InstRW<[N3Write_3c_1V0], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Covered by WriteV[dq]
def : InstRW<[N3Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[N3Write_3c_1M0], (instregex "^DUPv.+gpr")>;
Expand Down
24 changes: 21 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
//===----------------------------------------------------------------------===//
// Define generic 0 micro-op types

let Latency = 0, NumMicroOps = 0 in
let Latency = 0, NumMicroOps = 1 in
def V1Write_0c_0Z : SchedWriteRes<[]>;


Expand Down Expand Up @@ -472,6 +472,21 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
V1UnitV, V1UnitV, V1UnitV,
V1UnitV, V1UnitV, V1UnitV]>;

//===----------------------------------------------------------------------===//
// Define predicate-controlled types

def V1Write_0or1c_1I : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_1c_1I]>]>;

def V1Write_0or2c_1V : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_2c_1V]>]>;

def V1Write_0or3c_1M0 : SchedWriteVariant<[
SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
SchedVar<NoSchedPred, [V1Write_3c_1M0]>]>;

//===----------------------------------------------------------------------===//
// Define forwarded types

Expand Down Expand Up @@ -603,6 +618,7 @@ def : InstRW<[V1Write_1c_1I_1Flg],
"^(ADC|SBC)S[WX]r$",
"^ANDS[WX]ri$",
"^(AND|BIC)S[WX]rr$")>;
def : InstRW<[V1Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;

// ALU, extend and shift
def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
Expand All @@ -623,7 +639,8 @@ def : InstRW<[V1WriteISRegS],
(instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;

// Logical, shift, no flagset
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
def : InstRW<[V1Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;

// Logical, shift, flagset
def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
Expand Down Expand Up @@ -805,7 +822,7 @@ def : SchedAlias<WriteFImm, V1Write_2c_1V>;
def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;

// FP transfer, from gen to low half of vec reg
def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
def : InstRW<[V1Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;

// FP transfer, from gen to high half of vec reg
def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
Expand Down Expand Up @@ -1122,6 +1139,7 @@ def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Covered by "SchedAlias (WriteV[dq]...)" above
def : InstRW<[V1Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;

// ASIMD duplicate, gen reg
def : InstRW<[V1Write_3c_1M0],
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -2086,9 +2086,9 @@ drps
# CHECK-NEXT: 1 3 0.50 fcvtas x27, d28
# CHECK-NEXT: 1 3 0.50 fcvtau w29, d30
# CHECK-NEXT: 1 3 0.50 fcvtau xzr, d0
# CHECK-NEXT: 1 3 3.00 fmov w3, s9
# CHECK-NEXT: 1 2 0.50 fmov w3, s9
# CHECK-NEXT: 1 3 3.00 fmov s9, w3
# CHECK-NEXT: 1 3 3.00 fmov x20, d31
# CHECK-NEXT: 1 2 0.50 fmov x20, d31
# CHECK-NEXT: 1 3 3.00 fmov d1, x15
# CHECK-NEXT: 1 2 0.50 fmov x3, v12.d[1]
# CHECK-NEXT: 2 5 1.00 fmov v1.d[1], x19
Expand Down Expand Up @@ -2508,14 +2508,14 @@ drps
# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
# CHECK-NEXT: 1 1 0.25 mov x3, x6
# CHECK-NEXT: 1 1 0.25 mov x3, xzr
# CHECK-NEXT: 1 1 0.25 mov wzr, w2
# CHECK-NEXT: 1 1 0.25 mov w3, w5
# CHECK-NEXT: 1 0 0.20 mov x3, x6
# CHECK-NEXT: 1 0 0.20 mov x3, xzr
# CHECK-NEXT: 1 0 0.20 mov wzr, w2
# CHECK-NEXT: 1 0 0.20 mov w3, w5
# CHECK-NEXT: 1 1 0.25 movz w2, #0, lsl #16
# CHECK-NEXT: 1 1 0.25 mov w2, #-1235
# CHECK-NEXT: 1 1 0.25 mov x2, #5299989643264
# CHECK-NEXT: 1 1 0.25 mov x2, #0
# CHECK-NEXT: 1 0 0.20 mov x2, #0
# CHECK-NEXT: 1 1 0.25 movk w3, #0
# CHECK-NEXT: 1 1 0.25 movz x4, #0, lsl #16
# CHECK-NEXT: 1 1 0.25 movk w5, #0, lsl #16
Expand Down Expand Up @@ -2557,7 +2557,7 @@ drps

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 509.75 249.75 161.25 161.25 216.50 86.50

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
Expand Down Expand Up @@ -3270,9 +3270,9 @@ drps
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtas x27, d28
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtau w29, d30
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fcvtau xzr, d0
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov w3, s9
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov w3, s9
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov s9, w3
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov x20, d31
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x20, d31
# CHECK-NEXT: - - - - - - - 3.00 - - - - - fmov d1, x15
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov x3, v12.d[1]
# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 fmov v1.d[1], x19
Expand Down Expand Up @@ -3692,14 +3692,14 @@ drps
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - bics x3, xzr, x3, lsl #1
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst w3, w7, lsl #31
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - tst x2, x20, asr #2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, x6
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x3, xzr
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov wzr, w2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w3, w5
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, x6
# CHECK-NEXT: - - - - - - - - - - - - - mov x3, xzr
# CHECK-NEXT: - - - - - - - - - - - - - mov wzr, w2
# CHECK-NEXT: - - - - - - - - - - - - - mov w3, w5
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz w2, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov w2, #-1235
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #5299989643264
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - mov x2, #0
# CHECK-NEXT: - - - - - - - - - - - - - mov x2, #0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w3, #0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movz x4, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - - movk w5, #0, lsl #16
Expand Down
Loading
Loading