Skip to content

Commit 2f151e7

Browse files
authored
ZJIT: Allow ccalls above 7 arguments (ruby#15312)
ZJIT: Add stack support for CCalls
1 parent 176e384 commit 2f151e7

File tree

5 files changed

+99
-99
lines changed

5 files changed

+99
-99
lines changed

zjit/src/asm/arm64/opnd.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 };
9898
pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 };
9999
pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 };
100100
pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 };
101+
pub const X6_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 6 };
102+
pub const X7_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 7 };
101103

102104
// caller-save registers
103105
pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 };

zjit/src/backend/arm64/mod.rs

Lines changed: 55 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@ pub const EC: Opnd = Opnd::Reg(X20_REG);
2424
pub const SP: Opnd = Opnd::Reg(X21_REG);
2525

2626
// C argument registers on this platform
27-
pub const C_ARG_OPNDS: [Opnd; 6] = [
27+
pub const C_ARG_OPNDS: [Opnd; 8] = [
2828
Opnd::Reg(X0_REG),
2929
Opnd::Reg(X1_REG),
3030
Opnd::Reg(X2_REG),
3131
Opnd::Reg(X3_REG),
3232
Opnd::Reg(X4_REG),
33-
Opnd::Reg(X5_REG)
33+
Opnd::Reg(X5_REG),
34+
Opnd::Reg(X6_REG),
35+
Opnd::Reg(X7_REG)
3436
];
3537

3638
// C return value register on this platform
@@ -199,6 +201,8 @@ pub const ALLOC_REGS: &[Reg] = &[
199201
X3_REG,
200202
X4_REG,
201203
X5_REG,
204+
X6_REG,
205+
X7_REG,
202206
X11_REG,
203207
X12_REG,
204208
];
@@ -231,7 +235,7 @@ impl Assembler {
231235

232236
/// Get a list of all of the caller-saved registers
233237
pub fn get_caller_save_regs() -> Vec<Reg> {
234-
vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
238+
vec![X1_REG, X6_REG, X7_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
235239
}
236240

237241
/// How many bytes a call and a [Self::frame_setup] would change native SP
@@ -488,31 +492,13 @@ impl Assembler {
488492
}
489493
*/
490494
Insn::CCall { opnds, .. } => {
491-
assert!(opnds.len() <= C_ARG_OPNDS.len());
492-
493-
// Load each operand into the corresponding argument
494-
// register.
495-
// Note: the iteration order is reversed to avoid corrupting x0,
496-
// which is both the return value and first argument register
497-
if !opnds.is_empty() {
498-
let mut args: Vec<(Opnd, Opnd)> = vec![];
499-
for (idx, opnd) in opnds.iter_mut().enumerate().rev() {
500-
// If the value that we're sending is 0, then we can use
501-
// the zero register, so in this case we'll just send
502-
// a UImm of 0 along as the argument to the move.
503-
let value = match opnd {
504-
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
505-
Opnd::Mem(_) => split_memory_address(asm, *opnd),
506-
_ => *opnd
507-
};
508-
args.push((C_ARG_OPNDS[idx], value));
509-
}
510-
asm.parallel_mov(args);
511-
}
512-
513-
// Now we push the CCall without any arguments so that it
514-
// just performs the call.
515-
*opnds = vec![];
495+
opnds.iter_mut().for_each(|opnd| {
496+
*opnd = match opnd {
497+
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
498+
Opnd::Mem(_) => split_memory_address(asm, *opnd),
499+
_ => *opnd
500+
};
501+
});
516502
asm.push_insn(insn);
517503
},
518504
Insn::Cmp { left, right } => {
@@ -1857,17 +1843,19 @@ mod tests {
18571843

18581844
assert_disasm_snapshot!(cb.disasm(), @r"
18591845
0x0: str x1, [sp, #-0x10]!
1860-
0x4: str x9, [sp, #-0x10]!
1861-
0x8: str x10, [sp, #-0x10]!
1862-
0xc: str x11, [sp, #-0x10]!
1863-
0x10: str x12, [sp, #-0x10]!
1864-
0x14: str x13, [sp, #-0x10]!
1865-
0x18: str x14, [sp, #-0x10]!
1866-
0x1c: str x15, [sp, #-0x10]!
1867-
0x20: mrs x16, nzcv
1868-
0x24: str x16, [sp, #-0x10]!
1846+
0x4: str x6, [sp, #-0x10]!
1847+
0x8: str x7, [sp, #-0x10]!
1848+
0xc: str x9, [sp, #-0x10]!
1849+
0x10: str x10, [sp, #-0x10]!
1850+
0x14: str x11, [sp, #-0x10]!
1851+
0x18: str x12, [sp, #-0x10]!
1852+
0x1c: str x13, [sp, #-0x10]!
1853+
0x20: str x14, [sp, #-0x10]!
1854+
0x24: str x15, [sp, #-0x10]!
1855+
0x28: mrs x16, nzcv
1856+
0x2c: str x16, [sp, #-0x10]!
18691857
");
1870-
assert_snapshot!(cb.hexdump(), @"e10f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8");
1858+
assert_snapshot!(cb.hexdump(), @"e10f1ff8e60f1ff8e70f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8");
18711859
}
18721860

18731861
#[test]
@@ -1887,9 +1875,11 @@ mod tests {
18871875
0x18: ldr x11, [sp], #0x10
18881876
0x1c: ldr x10, [sp], #0x10
18891877
0x20: ldr x9, [sp], #0x10
1890-
0x24: ldr x1, [sp], #0x10
1878+
0x24: ldr x7, [sp], #0x10
1879+
0x28: ldr x6, [sp], #0x10
1880+
0x2c: ldr x1, [sp], #0x10
18911881
");
1892-
assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e10741f8");
1882+
assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e70741f8e60741f8e10741f8");
18931883
}
18941884

18951885
#[test]
@@ -2658,14 +2648,14 @@ mod tests {
26582648
]);
26592649
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
26602650

2661-
assert_disasm_snapshot!(cb.disasm(), @"
2662-
0x0: mov x15, x0
2663-
0x4: mov x0, x1
2664-
0x8: mov x1, x15
2665-
0xc: mov x16, #0
2666-
0x10: blr x16
2651+
assert_disasm_snapshot!(cb.disasm(), @r"
2652+
0x0: mov x15, x1
2653+
0x4: mov x1, x0
2654+
0x8: mov x0, x15
2655+
0xc: mov x16, #0
2656+
0x10: blr x16
26672657
");
2668-
assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6");
2658+
assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faa100080d200023fd6");
26692659
}
26702660

26712661
#[test]
@@ -2681,17 +2671,17 @@ mod tests {
26812671
]);
26822672
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
26832673

2684-
assert_disasm_snapshot!(cb.disasm(), @"
2685-
0x0: mov x15, x2
2686-
0x4: mov x2, x3
2687-
0x8: mov x3, x15
2688-
0xc: mov x15, x0
2689-
0x10: mov x0, x1
2690-
0x14: mov x1, x15
2691-
0x18: mov x16, #0
2692-
0x1c: blr x16
2674+
assert_disasm_snapshot!(cb.disasm(), @r"
2675+
0x0: mov x15, x1
2676+
0x4: mov x1, x0
2677+
0x8: mov x0, x15
2678+
0xc: mov x15, x3
2679+
0x10: mov x3, x2
2680+
0x14: mov x2, x15
2681+
0x18: mov x16, #0
2682+
0x1c: blr x16
26932683
");
2694-
assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6");
2684+
assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faaef0303aae30302aae2030faa100080d200023fd6");
26952685
}
26962686

26972687
#[test]
@@ -2706,15 +2696,15 @@ mod tests {
27062696
]);
27072697
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
27082698

2709-
assert_disasm_snapshot!(cb.disasm(), @"
2710-
0x0: mov x15, x0
2711-
0x4: mov x0, x1
2712-
0x8: mov x1, x2
2713-
0xc: mov x2, x15
2714-
0x10: mov x16, #0
2715-
0x14: blr x16
2699+
assert_disasm_snapshot!(cb.disasm(), @r"
2700+
0x0: mov x15, x1
2701+
0x4: mov x1, x2
2702+
0x8: mov x2, x0
2703+
0xc: mov x0, x15
2704+
0x10: mov x16, #0
2705+
0x14: blr x16
27162706
");
2717-
assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6");
2707+
assert_snapshot!(cb.hexdump(), @"ef0301aae10302aae20300aae0030faa100080d200023fd6");
27182708
}
27192709

27202710
#[test]

zjit/src/backend/lir.rs

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,9 +1559,8 @@ impl Assembler
15591559
frame_setup_idxs.push(asm.insns.len());
15601560
}
15611561

1562-
let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) {
1563-
(Insn::ParallelMov { .. }, Some(Insn::CCall { .. })) |
1564-
(Insn::CCall { .. }, _) if !pool.is_empty() => {
1562+
let before_ccall = match &insn {
1563+
Insn::CCall { .. } if !pool.is_empty() => {
15651564
// If C_RET_REG is in use, move it to another register.
15661565
// This must happen before last-use registers are deallocated.
15671566
if let Some(vreg_idx) = pool.vreg_for(&C_RET_REG) {
@@ -1612,6 +1611,25 @@ impl Assembler
16121611
if cfg!(target_arch = "x86_64") && saved_regs.len() % 2 == 1 {
16131612
asm.cpush(Opnd::Reg(saved_regs.last().unwrap().0));
16141613
}
1614+
if let Insn::ParallelMov { moves } = &insn {
1615+
if moves.len() > C_ARG_OPNDS.len() {
1616+
let difference = moves.len().saturating_sub(C_ARG_OPNDS.len());
1617+
1618+
#[cfg(target_arch = "x86_64")]
1619+
let offset = {
1620+
// double quadword alignment
1621+
((difference + 3) / 4) * 4
1622+
};
1623+
1624+
#[cfg(target_arch = "aarch64")]
1625+
let offset = {
1626+
// quadword alignment
1627+
if difference % 2 == 0 { difference } else { difference + 1 }
1628+
};
1629+
1630+
asm.sub_into(NATIVE_STACK_PTR, (offset * 8).into());
1631+
}
1632+
}
16151633
}
16161634

16171635
// Allocate a register for the output operand if it exists
@@ -1703,7 +1721,23 @@ impl Assembler
17031721
// Push instruction(s)
17041722
let is_ccall = matches!(insn, Insn::CCall { .. });
17051723
match insn {
1706-
Insn::ParallelMov { moves } => {
1724+
Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
1725+
let mut moves: Vec<(Opnd, Opnd)> = vec![];
1726+
let num_reg_args = opnds.len().min(C_ARG_OPNDS.len());
1727+
let num_stack_args = opnds.len().saturating_sub(C_ARG_OPNDS.len());
1728+
1729+
if num_stack_args > 0 {
1730+
for (i, opnd) in opnds.iter().skip(num_reg_args).enumerate() {
1731+
moves.push((Opnd::mem(64, NATIVE_STACK_PTR, 8 * i as i32), *opnd));
1732+
}
1733+
}
1734+
1735+
if num_reg_args > 0 {
1736+
for (i, opnd) in opnds.iter().take(num_reg_args).enumerate() {
1737+
moves.push((C_ARG_OPNDS[i], *opnd));
1738+
}
1739+
}
1740+
17071741
// For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg.
17081742
if let Some(moves) = Self::resolve_parallel_moves(&moves, None) {
17091743
for (dst, src) in moves {
@@ -1713,13 +1747,12 @@ impl Assembler
17131747
// If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it.
17141748
asm.push_insn(Insn::ParallelMov { moves });
17151749
}
1716-
}
1717-
Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
1750+
17181751
// Split start_marker and end_marker here to avoid inserting push/pop between them.
17191752
if let Some(start_marker) = start_marker {
17201753
asm.push_insn(Insn::PosMarker(start_marker));
17211754
}
1722-
asm.push_insn(Insn::CCall { opnds, fptr, start_marker: None, end_marker: None, out });
1755+
asm.push_insn(Insn::CCall { opnds: vec![], fptr, start_marker: None, end_marker: None, out });
17231756
if let Some(end_marker) = end_marker {
17241757
asm.push_insn(Insn::PosMarker(end_marker));
17251758
}

zjit/src/backend/x86_64/mod.rs

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -351,21 +351,7 @@ impl Assembler {
351351
};
352352
asm.push_insn(insn);
353353
},
354-
Insn::CCall { opnds, .. } => {
355-
assert!(opnds.len() <= C_ARG_OPNDS.len());
356-
357-
// Load each operand into the corresponding argument register.
358-
if !opnds.is_empty() {
359-
let mut args: Vec<(Opnd, Opnd)> = vec![];
360-
for (idx, opnd) in opnds.iter_mut().enumerate() {
361-
args.push((C_ARG_OPNDS[idx], *opnd));
362-
}
363-
asm.parallel_mov(args);
364-
}
365-
366-
// Now we push the CCall without any arguments so that it
367-
// just performs the call.
368-
*opnds = vec![];
354+
Insn::CCall { .. } => {
369355
asm.push_insn(insn);
370356
},
371357
Insn::Lea { .. } => {

zjit/src/codegen.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -398,15 +398,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
398398
&Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
399399
&Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
400400
&Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason),
401-
// Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it.
401+
// Give up SendWithoutBlockDirect for 6+ args since the callee doesn't know how to read arguments from the stack.
402402
Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self
403403
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::TooManyArgsForLir),
404404
Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)),
405405
&Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
406406
&Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason),
407-
// Ensure we have enough room fit ec, self, and arguments
408-
// TODO remove this check when we have stack args (we can use Time.new to test it)
409-
Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state),
410407
Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)),
411408
&Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)),
412409
Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))),
@@ -453,10 +450,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
453450
&Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
454451
Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))),
455452
Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
456-
// Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
457-
// There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods.
458-
Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() =>
459-
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
460453
Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } =>
461454
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
462455
Insn::CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type: _, elidable: _ } => {
@@ -722,10 +715,6 @@ fn gen_fixnum_bit_check(asm: &mut Assembler, val: Opnd, index: u8) -> Opnd {
722715
}
723716

724717
fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec<Opnd>) -> lir::Opnd {
725-
assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32,
726-
"gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}",
727-
unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() },
728-
bf.argc);
729718
if leaf {
730719
gen_prepare_leaf_call_with_gc(asm, state);
731720
} else {

0 commit comments

Comments
 (0)