Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7678,6 +7678,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;

// Bail out on irregular type lengths.
if (Size >= 128 || Size % 8 != 0)
return UnableToLegalize;

// Count set bits in blocks of 2 bits. Default approach would be
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
// We use following formula instead:
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.scalarSameSizeAs(1, 0);
} else {
CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8)
.clampScalar(0, s8, sXLen)
.scalarSameSizeAs(1, 0)
.lower();
}

getActionDefinitionsBuilder(G_CONSTANT)
Expand Down
140 changes: 140 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,143 @@ define i24 @bitreverse_i24(i24 %x) {
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}

define i2 @test_ctpop_i2(i2 %a) {
; RV32-LABEL: test_ctpop_i2:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: andi a0, a0, 3
; RV32-NEXT: srli a1, a0, 1
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: zext.b a1, a0
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: andi a1, a1, 51
; RV32-NEXT: andi a0, a0, 51
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: srli a1, a0, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: andi a0, a0, 15
; RV32-NEXT: li a1, 1
; RV32-NEXT: call __mulsi3
; RV32-NEXT: zext.b a0, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_ctpop_i2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: andi a0, a0, 3
; RV64-NEXT: srli a1, a0, 1
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: zext.b a1, a0
; RV64-NEXT: srli a1, a1, 2
; RV64-NEXT: andi a1, a1, 51
; RV64-NEXT: andi a0, a0, 51
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: srli a1, a0, 4
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: andi a0, a0, 15
; RV64-NEXT: li a1, 1
; RV64-NEXT: call __muldi3
; RV64-NEXT: zext.b a0, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%1 = call i2 @llvm.ctpop.i2(i2 %a)
ret i2 %1
}

define i11 @test_ctpop_i11(i11 %a) {
; RV32-LABEL: test_ctpop_i11:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: andi a0, a0, 2047
; RV32-NEXT: lui a1, 5
; RV32-NEXT: lui a2, 16
; RV32-NEXT: srli a3, a0, 1
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: lui a3, 3
; RV32-NEXT: addi s0, a2, -1
; RV32-NEXT: addi a2, a3, 819
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: and a1, a0, s0
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lui a2, 1
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: srli a1, a0, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: addi a1, a2, -241
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: li a1, 257
; RV32-NEXT: call __mulsi3
; RV32-NEXT: and a0, a0, s0
; RV32-NEXT: srli a0, a0, 8
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_ctpop_i11:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: lui a1, 5
; RV64-NEXT: lui a2, 16
; RV64-NEXT: srli a3, a0, 1
; RV64-NEXT: addi a1, a1, 1365
; RV64-NEXT: and a1, a3, a1
; RV64-NEXT: lui a3, 3
; RV64-NEXT: addi s0, a2, -1
; RV64-NEXT: addi a2, a3, 819
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: and a1, a0, s0
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: srli a1, a1, 2
; RV64-NEXT: and a1, a1, a2
; RV64-NEXT: lui a2, 1
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: srli a1, a0, 4
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: addi a1, a2, -241
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: li a1, 257
; RV64-NEXT: call __muldi3
; RV64-NEXT: and a0, a0, s0
; RV64-NEXT: srli a0, a0, 8
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: .cfi_restore s0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%1 = call i11 @llvm.ctpop.i11(i11 %a)
ret i11 %1
}
112 changes: 112 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,115 @@ body: |
PseudoRET implicit $x10

...
---
name: ctpop_i2
body: |
bb.1:
liveins: $x10

; RV64I-LABEL: name: ctpop_i2
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
;
; RV64ZBB-LABEL: name: ctpop_i2
; RV64ZBB: liveins: $x10
; RV64ZBB-NEXT: {{ $}}
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
; RV64ZBB-NEXT: PseudoRET implicit $x10
%1:_(s64) = COPY $x10
%0:_(s2) = G_TRUNC %1(s64)
%2:_(s2) = G_CTPOP %0(s2)
%3:_(s64) = G_ANYEXT %2(s2)
$x10 = COPY %3(s64)
PseudoRET implicit $x10

...
---
name: ctpop_i11
body: |
bb.1:
liveins: $x10

; RV64I-LABEL: name: ctpop_i11
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 257
; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C8]]
; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C4]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C9]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
;
; RV64ZBB-LABEL: name: ctpop_i11
; RV64ZBB: liveins: $x10
; RV64ZBB-NEXT: {{ $}}
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
; RV64ZBB-NEXT: PseudoRET implicit $x10
%1:_(s64) = COPY $x10
%0:_(s11) = G_TRUNC %1(s64)
%2:_(s11) = G_CTPOP %0(s11)
%3:_(s64) = G_ANYEXT %2(s11)
$x10 = COPY %3(s64)
PseudoRET implicit $x10

...
Loading