Skip to content

Commit 523bd2d

Browse files
authored
[GISel][RISCV] Compute CTPOP of small odd-sized integer correctly (#168559)
Fixes the assertion in #168523 This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.
1 parent 0ae2bcc commit 523bd2d

File tree

4 files changed

+260
-1
lines changed

4 files changed

+260
-1
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7698,6 +7698,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
76987698
unsigned Size = Ty.getSizeInBits();
76997699
MachineIRBuilder &B = MIRBuilder;
77007700

7701+
// Bail out on irregular type lengths.
7702+
if (Size > 128 || Size % 8 != 0)
7703+
return UnableToLegalize;
7704+
77017705
// Count set bits in blocks of 2 bits. Default approach would be
77027706
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
77037707
// We use following formula instead:

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
238238
.clampScalar(0, sXLen, sXLen)
239239
.scalarSameSizeAs(1, 0);
240240
} else {
241-
CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
241+
CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8)
242+
.clampScalar(0, s8, sXLen)
243+
.scalarSameSizeAs(1, 0)
244+
.lower();
242245
}
243246

244247
getActionDefinitionsBuilder(G_CONSTANT)

llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,143 @@ define i24 @bitreverse_i24(i24 %x) {
205205
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
206206
ret i24 %rev
207207
}
208+
209+
define i2 @test_ctpop_i2(i2 %a) {
210+
; RV32-LABEL: test_ctpop_i2:
211+
; RV32: # %bb.0:
212+
; RV32-NEXT: addi sp, sp, -16
213+
; RV32-NEXT: .cfi_def_cfa_offset 16
214+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
215+
; RV32-NEXT: .cfi_offset ra, -4
216+
; RV32-NEXT: andi a0, a0, 3
217+
; RV32-NEXT: srli a1, a0, 1
218+
; RV32-NEXT: sub a0, a0, a1
219+
; RV32-NEXT: zext.b a1, a0
220+
; RV32-NEXT: srli a1, a1, 2
221+
; RV32-NEXT: andi a1, a1, 51
222+
; RV32-NEXT: andi a0, a0, 51
223+
; RV32-NEXT: add a0, a1, a0
224+
; RV32-NEXT: srli a1, a0, 4
225+
; RV32-NEXT: add a0, a1, a0
226+
; RV32-NEXT: andi a0, a0, 15
227+
; RV32-NEXT: li a1, 1
228+
; RV32-NEXT: call __mulsi3
229+
; RV32-NEXT: zext.b a0, a0
230+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
231+
; RV32-NEXT: .cfi_restore ra
232+
; RV32-NEXT: addi sp, sp, 16
233+
; RV32-NEXT: .cfi_def_cfa_offset 0
234+
; RV32-NEXT: ret
235+
;
236+
; RV64-LABEL: test_ctpop_i2:
237+
; RV64: # %bb.0:
238+
; RV64-NEXT: addi sp, sp, -16
239+
; RV64-NEXT: .cfi_def_cfa_offset 16
240+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
241+
; RV64-NEXT: .cfi_offset ra, -8
242+
; RV64-NEXT: andi a0, a0, 3
243+
; RV64-NEXT: srli a1, a0, 1
244+
; RV64-NEXT: sub a0, a0, a1
245+
; RV64-NEXT: zext.b a1, a0
246+
; RV64-NEXT: srli a1, a1, 2
247+
; RV64-NEXT: andi a1, a1, 51
248+
; RV64-NEXT: andi a0, a0, 51
249+
; RV64-NEXT: add a0, a1, a0
250+
; RV64-NEXT: srli a1, a0, 4
251+
; RV64-NEXT: add a0, a1, a0
252+
; RV64-NEXT: andi a0, a0, 15
253+
; RV64-NEXT: li a1, 1
254+
; RV64-NEXT: call __muldi3
255+
; RV64-NEXT: zext.b a0, a0
256+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
257+
; RV64-NEXT: .cfi_restore ra
258+
; RV64-NEXT: addi sp, sp, 16
259+
; RV64-NEXT: .cfi_def_cfa_offset 0
260+
; RV64-NEXT: ret
261+
%1 = call i2 @llvm.ctpop.i2(i2 %a)
262+
ret i2 %1
263+
}
264+
265+
define i11 @test_ctpop_i11(i11 %a) {
266+
; RV32-LABEL: test_ctpop_i11:
267+
; RV32: # %bb.0:
268+
; RV32-NEXT: addi sp, sp, -16
269+
; RV32-NEXT: .cfi_def_cfa_offset 16
270+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
271+
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
272+
; RV32-NEXT: .cfi_offset ra, -4
273+
; RV32-NEXT: .cfi_offset s0, -8
274+
; RV32-NEXT: andi a0, a0, 2047
275+
; RV32-NEXT: lui a1, 5
276+
; RV32-NEXT: lui a2, 16
277+
; RV32-NEXT: srli a3, a0, 1
278+
; RV32-NEXT: addi a1, a1, 1365
279+
; RV32-NEXT: and a1, a3, a1
280+
; RV32-NEXT: lui a3, 3
281+
; RV32-NEXT: addi s0, a2, -1
282+
; RV32-NEXT: addi a2, a3, 819
283+
; RV32-NEXT: sub a0, a0, a1
284+
; RV32-NEXT: and a1, a0, s0
285+
; RV32-NEXT: and a0, a0, a2
286+
; RV32-NEXT: srli a1, a1, 2
287+
; RV32-NEXT: and a1, a1, a2
288+
; RV32-NEXT: lui a2, 1
289+
; RV32-NEXT: add a0, a1, a0
290+
; RV32-NEXT: srli a1, a0, 4
291+
; RV32-NEXT: add a0, a1, a0
292+
; RV32-NEXT: addi a1, a2, -241
293+
; RV32-NEXT: and a0, a0, a1
294+
; RV32-NEXT: li a1, 257
295+
; RV32-NEXT: call __mulsi3
296+
; RV32-NEXT: and a0, a0, s0
297+
; RV32-NEXT: srli a0, a0, 8
298+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
299+
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
300+
; RV32-NEXT: .cfi_restore ra
301+
; RV32-NEXT: .cfi_restore s0
302+
; RV32-NEXT: addi sp, sp, 16
303+
; RV32-NEXT: .cfi_def_cfa_offset 0
304+
; RV32-NEXT: ret
305+
;
306+
; RV64-LABEL: test_ctpop_i11:
307+
; RV64: # %bb.0:
308+
; RV64-NEXT: addi sp, sp, -16
309+
; RV64-NEXT: .cfi_def_cfa_offset 16
310+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
311+
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
312+
; RV64-NEXT: .cfi_offset ra, -8
313+
; RV64-NEXT: .cfi_offset s0, -16
314+
; RV64-NEXT: andi a0, a0, 2047
315+
; RV64-NEXT: lui a1, 5
316+
; RV64-NEXT: lui a2, 16
317+
; RV64-NEXT: srli a3, a0, 1
318+
; RV64-NEXT: addi a1, a1, 1365
319+
; RV64-NEXT: and a1, a3, a1
320+
; RV64-NEXT: lui a3, 3
321+
; RV64-NEXT: addi s0, a2, -1
322+
; RV64-NEXT: addi a2, a3, 819
323+
; RV64-NEXT: sub a0, a0, a1
324+
; RV64-NEXT: and a1, a0, s0
325+
; RV64-NEXT: and a0, a0, a2
326+
; RV64-NEXT: srli a1, a1, 2
327+
; RV64-NEXT: and a1, a1, a2
328+
; RV64-NEXT: lui a2, 1
329+
; RV64-NEXT: add a0, a1, a0
330+
; RV64-NEXT: srli a1, a0, 4
331+
; RV64-NEXT: add a0, a1, a0
332+
; RV64-NEXT: addi a1, a2, -241
333+
; RV64-NEXT: and a0, a0, a1
334+
; RV64-NEXT: li a1, 257
335+
; RV64-NEXT: call __muldi3
336+
; RV64-NEXT: and a0, a0, s0
337+
; RV64-NEXT: srli a0, a0, 8
338+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
339+
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
340+
; RV64-NEXT: .cfi_restore ra
341+
; RV64-NEXT: .cfi_restore s0
342+
; RV64-NEXT: addi sp, sp, 16
343+
; RV64-NEXT: .cfi_def_cfa_offset 0
344+
; RV64-NEXT: ret
345+
%1 = call i11 @llvm.ctpop.i11(i11 %a)
346+
ret i11 %1
347+
}

llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,115 @@ body: |
216216
PseudoRET implicit $x10
217217
218218
...
219+
---
220+
name: ctpop_i2
221+
body: |
222+
bb.1:
223+
liveins: $x10
224+
225+
; RV64I-LABEL: name: ctpop_i2
226+
; RV64I: liveins: $x10
227+
; RV64I-NEXT: {{ $}}
228+
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
229+
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
230+
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
231+
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
232+
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
233+
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
234+
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
235+
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
236+
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
237+
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
238+
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
239+
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
240+
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
241+
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
242+
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
243+
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
244+
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
245+
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
246+
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
247+
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
248+
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
249+
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
250+
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
251+
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
252+
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
253+
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
254+
; RV64I-NEXT: PseudoRET implicit $x10
255+
;
256+
; RV64ZBB-LABEL: name: ctpop_i2
257+
; RV64ZBB: liveins: $x10
258+
; RV64ZBB-NEXT: {{ $}}
259+
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
260+
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
261+
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
262+
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
263+
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
264+
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
265+
; RV64ZBB-NEXT: PseudoRET implicit $x10
266+
%1:_(s64) = COPY $x10
267+
%0:_(s2) = G_TRUNC %1(s64)
268+
%2:_(s2) = G_CTPOP %0(s2)
269+
%3:_(s64) = G_ANYEXT %2(s2)
270+
$x10 = COPY %3(s64)
271+
PseudoRET implicit $x10
272+
273+
...
274+
---
275+
name: ctpop_i11
276+
body: |
277+
bb.1:
278+
liveins: $x10
279+
280+
; RV64I-LABEL: name: ctpop_i11
281+
; RV64I: liveins: $x10
282+
; RV64I-NEXT: {{ $}}
283+
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
284+
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
285+
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
286+
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
287+
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
288+
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845
289+
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
290+
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
291+
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
292+
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
293+
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
294+
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
295+
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
296+
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107
297+
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
298+
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
299+
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
300+
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
301+
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
302+
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
303+
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855
304+
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
305+
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 257
306+
; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
307+
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C8]]
308+
; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C4]]
309+
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C9]](s64)
310+
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
311+
; RV64I-NEXT: PseudoRET implicit $x10
312+
;
313+
; RV64ZBB-LABEL: name: ctpop_i11
314+
; RV64ZBB: liveins: $x10
315+
; RV64ZBB-NEXT: {{ $}}
316+
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
317+
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
318+
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
319+
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
320+
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
321+
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
322+
; RV64ZBB-NEXT: PseudoRET implicit $x10
323+
%1:_(s64) = COPY $x10
324+
%0:_(s11) = G_TRUNC %1(s64)
325+
%2:_(s11) = G_CTPOP %0(s11)
326+
%3:_(s64) = G_ANYEXT %2(s11)
327+
$x10 = COPY %3(s64)
328+
PseudoRET implicit $x10
329+
330+
...

0 commit comments

Comments
 (0)