Skip to content

Commit 718fa6c

Browse files
committed
CFI improvements to the AArch64 fiber implementation
Now the fiber implementation on AArch64 authenticates function return addresses and includes the relevant BTI instructions, except on macOS. Also, change the locations of the saved FP and LR registers on the fiber stack to make them compliant with the Procedure Call Standard for the Arm 64-bit Architecture. Copyright (c) 2022, Arm Limited.
1 parent 8238175 commit 718fa6c

File tree

6 files changed

+104
-14
lines changed

6 files changed

+104
-14
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,6 @@ jobs:
211211
gcc: aarch64-linux-gnu-gcc
212212
qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu
213213
qemu_target: aarch64-linux-user
214-
# FIXME(#3183) shouldn't be necessary to specify this
215-
qemu_flags: -cpu max,pauth=off
216214
- os: ubuntu-latest
217215
target: s390x-unknown-linux-gnu
218216
gcc_package: gcc-s390x-linux-gnu
@@ -251,7 +249,7 @@ jobs:
251249
# Configure Cargo for cross compilation and tell it how it can run
252250
# cross executables
253251
upcase=$(echo ${{ matrix.target }} | awk '{ print toupper($0) }' | sed 's/-/_/g')
254-
echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} ${{ matrix.qemu_flags }} >> $GITHUB_ENV
252+
echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} >> $GITHUB_ENV
255253
echo CARGO_TARGET_${upcase}_LINKER=${{ matrix.gcc }} >> $GITHUB_ENV
256254
257255
# QEMU emulation is not always the speediest, so total testing time

cranelift/codegen/src/isa/aarch64/abi.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,11 +623,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
623623
}
624624

625625
fn gen_debug_frame_info(
626+
call_conv: isa::CallConv,
626627
flags: &settings::Flags,
627628
_isa_flags: &Vec<settings::Value>,
628629
) -> SmallInstVec<Inst> {
629630
let mut insts = SmallVec::new();
630-
if flags.unwind_info() {
631+
if flags.unwind_info() && call_conv.extends_apple_aarch64() {
631632
insts.push(Inst::Unwind {
632633
inst: UnwindInst::Aarch64SetPointerAuth {
633634
return_addresses: false,

cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ mod tests {
104104
_ => panic!("expected unwind information"),
105105
};
106106

107-
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
107+
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
108108
}
109109

110110
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
@@ -146,7 +146,7 @@ mod tests {
146146

147147
assert_eq!(
148148
format!("{:?}", fde),
149-
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] }))] }"
149+
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
150150
);
151151
}
152152

cranelift/codegen/src/machinst/abi_impl.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ pub trait ABIMachineSpec {
406406
/// Generates extra unwind instructions for a new frame for this
407407
/// architecture, whether the frame has a prologue sequence or not.
408408
fn gen_debug_frame_info(
409+
_call_conv: isa::CallConv,
409410
_flags: &settings::Flags,
410411
_isa_flags: &Vec<settings::Value>,
411412
) -> SmallInstVec<Self::I> {
@@ -1238,7 +1239,9 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
12381239
self.fixed_frame_storage_size,
12391240
);
12401241

1241-
insts.extend(M::gen_debug_frame_info(&self.flags, &self.isa_flags).into_iter());
1242+
insts.extend(
1243+
M::gen_debug_frame_info(self.call_conv, &self.flags, &self.isa_flags).into_iter(),
1244+
);
12421245

12431246
if self.setup_frame {
12441247
// set up frame

crates/cranelift/src/lib.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,23 @@ fn func_signature(
217217
// then we can optimize this function to use the fastest calling
218218
// convention since it's purely an internal implementation detail of
219219
// the module itself.
220-
Some(_idx) if !func.is_escaping() => CallConv::Fast,
220+
Some(_idx) if !func.is_escaping() => {
221+
let on_apple_aarch64 = isa
222+
.triple()
223+
.default_calling_convention()
224+
.unwrap_or(CallingConvention::SystemV)
225+
== CallingConvention::AppleAarch64;
226+
227+
if on_apple_aarch64 {
228+
// FIXME: We need an Apple-specific calling convention, so that
229+
// Cranelift's ABI implementation generates unwinding directives
230+
// about pointer authentication usage, so we can't just use
231+
// `CallConv::Fast`.
232+
CallConv::WasmtimeAppleAarch64
233+
} else {
234+
CallConv::Fast
235+
}
236+
}
221237

222238
// ... otherwise if it's an imported function or if it's a possibly
223239
// exported function then we use the default ABI wasmtime would

crates/fiber/src/arch/aarch64.S

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,58 @@
77
//
88
// Also at this time this file is heavily based off the x86_64 file, so you'll
99
// probably want to read that one as well.
10+
//
11+
// Finally, control flow integrity hardening has been applied to the code using
12+
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
13+
// technologies from the Arm instruction set architecture:
14+
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
15+
// instructions
16+
// * Return addresses are signed and authenticated using the stack pointer
17+
// value as a modifier (similarly to the salt in a HMAC operation); the
18+
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
19+
// `.cfi_window_save` assembler directive) informs an unwinder about this
1020

1121
#include "header.h"
1222

23+
#ifndef CFG_TARGET_OS_macos
24+
// We need to tell whatever loads the following code (e.g. the dynamic linker)
25+
// that it is compatible with BTI, so that the corresponding executable memory
26+
// pages have the necessary attribute set (if supported by the environment). To
27+
// this end, we follow the ELF for the Arm® 64-bit Architecture standard, and
28+
// use a special metadata section. Further details are in section 6.2 of the
29+
// specification:
30+
//
31+
// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property
32+
//
33+
// We also set the PAuth (PAC) property, even though it is optional, for the
34+
// sake of completeness.
35+
.pushsection .note.gnu.property, "a";
36+
.p2align 3;
37+
.word 4;
38+
.word 16;
39+
.word 5;
40+
.asciz "GNU";
41+
.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND
42+
.word 4;
43+
.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC
44+
.word 0;
45+
.popsection
46+
#endif
47+
1348
// fn(top_of_stack(%x0): *mut u8)
1449
HIDDEN(wasmtime_fiber_switch)
1550
GLOBL(wasmtime_fiber_switch)
1651
.p2align 2
1752
TYPE(wasmtime_fiber_switch)
1853
FUNCTION(wasmtime_fiber_switch):
54+
.cfi_startproc
55+
#ifndef CFG_TARGET_OS_macos
56+
hint #25 // paciasp
57+
.cfi_window_save
58+
#endif
1959
// Save all callee-saved registers on the stack since we're assuming
2060
// they're clobbered as a result of the stack switch.
21-
stp lr, fp, [sp, -16]!
61+
stp x29, x30, [sp, -16]!
2262
stp x20, x19, [sp, -16]!
2363
stp x22, x21, [sp, -16]!
2464
stp x24, x23, [sp, -16]!
@@ -47,8 +87,13 @@ FUNCTION(wasmtime_fiber_switch):
4787
ldp x24, x23, [sp], 16
4888
ldp x22, x21, [sp], 16
4989
ldp x20, x19, [sp], 16
50-
ldp lr, fp, [sp], 16
90+
ldp x29, x30, [sp], 16
91+
#ifndef CFG_TARGET_OS_macos
92+
hint #29 // autiasp
93+
.cfi_window_save
94+
#endif
5195
ret
96+
.cfi_endproc
5297
SIZE(wasmtime_fiber_switch)
5398

5499
// fn(
@@ -61,15 +106,39 @@ GLOBL(wasmtime_fiber_init)
61106
.p2align 2
62107
TYPE(wasmtime_fiber_init)
63108
FUNCTION(wasmtime_fiber_init):
64-
adr x8, FUNCTION(wasmtime_fiber_start)
65-
stp x0, x8, [x0, -0x28] // x0 => x19, x8 => lr
109+
.cfi_startproc
110+
hint #34 // bti c
111+
// We set up the newly initialized fiber, so that it resumes execution from
112+
// wasmtime_fiber_start(). As a result, we need a signed address of this
113+
// function, so there are 2 requirements:
114+
// * The fiber stack pointer value that is used by the signing operation
115+
// must match the value when the pointer is authenticated inside
116+
// wasmtime_fiber_switch(), otherwise the latter would fault
117+
// * We would like to use an instruction that is executed as a no-op by
118+
// processors that do not support PAuth, so that the code is backward-
119+
// compatible and there is no duplication; `PACIA1716` is a suitable
120+
// one, which has the following operand register conventions:
121+
// * X17 contains the pointer value to sign
122+
// * X16 contains the modifier value
123+
//
124+
// TODO: Use the PACGA instruction to authenticate the saved register state,
125+
// which avoids creating signed pointers to wasmtime_fiber_start(), and
126+
// provides wider coverage.
127+
sub x16, x0, #16
128+
adr x17, FUNCTION(wasmtime_fiber_start)
129+
#ifndef CFG_TARGET_OS_macos
130+
hint #8 // pacia1716
131+
#endif
132+
str x17, [x16, -0x8] // x17 => lr
133+
str x0, [x16, -0x18] // x0 => x19
66134
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
67135

68136
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
69137
// the original reserved 16 bytes.
70138
add x8, x0, -0xb0
71139
str x8, [x0, -0x10]
72140
ret
141+
.cfi_endproc
73142
SIZE(wasmtime_fiber_init)
74143

75144
.p2align 2
@@ -86,8 +155,11 @@ FUNCTION(wasmtime_fiber_start):
86155
0x06, /* DW_OP_deref */ \
87156
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
88157

89-
.cfi_rel_offset x29, -0x08
90-
.cfi_rel_offset lr, -0x10
158+
.cfi_rel_offset x29, -0x10
159+
#ifndef CFG_TARGET_OS_macos
160+
.cfi_window_save
161+
#endif
162+
.cfi_rel_offset x30, -0x08
91163
.cfi_rel_offset x19, -0x18
92164
.cfi_rel_offset x20, -0x20
93165
.cfi_rel_offset x21, -0x28

0 commit comments

Comments
 (0)