Skip to content

Commit ed9148e

Browse files
committed
amd64: slightly improve TLB flushing
This is still somewhat broken (see #721), since it relies on the flush_tlb function ending up in the big area around the code that is still identity mapped. So, this code should probably move somewhere else. However, this fixes a very significant bug in the prior code, where the flush_tlb() function called on guest function dispatch could return to the wrong address, due to the correct return address having been pushed onto the wrong page due to a stale TLB entry. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com>
1 parent 6fa0f74 commit ed9148e

File tree

7 files changed

+104
-69
lines changed

7 files changed

+104
-69
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
Copyright 2025 The Hyperlight Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
//! This module contains the architecture-specific dispatch function
18+
//! entry sequence
19+
20+
// The extern "C" here is a lie until #![feature(abi_custom)] is stabilised
21+
unsafe extern "C" {
22+
/// There are two reasons that we need an architecture-specific
23+
/// assembly stub currently: (1) to ensure that the fact that the
24+
/// dispatch function never returns but can be executed again is
25+
/// OK (i.e. we must ensure that there is no stack frame
26+
/// teardown); (2) at least presently, to ensure that the TLB is
27+
/// flushed in certain cases.
28+
///
29+
/// # TLB flushing
30+
///
31+
/// The hyperlight host likes to use one partition and reset it in
32+
/// various ways; if that has happened, there might stale TLB
33+
/// entries hanging around from the former user of the
34+
/// partition. Flushing the TLB here is not quite the right thing
35+
/// to do, since incorrectly cached entries could make even this
36+
/// code not exist, but regrettably there is not a simple way for
37+
/// the host to trigger flushing when it ought to happen, so for
38+
/// now this works in practice, since the text segment is always
39+
/// part of the big identity-mapped region at the base of the
40+
/// guest. The stack, however, is not part of the snapshot region
41+
/// which is (in practice) idmapped for the relevant area, so this
42+
/// cannot touch the stack until the flush is done.
43+
///
44+
/// Currently this just always flips CR4.PGE back and forth to
45+
/// trigger a tlb flush. We should use a faster approach where
46+
/// available
47+
///
48+
/// # ABI
49+
///
50+
/// The ZF should be set if a TLB flush is required on this call
51+
/// (e.g. the first call after a snapshot restore)
52+
///
53+
/// The stack pointer should, unusually for amd64, by 16-byte
54+
/// aligned at the beginning of the function---no return address
55+
/// should be pushed.
56+
pub(crate) unsafe fn dispatch_function(must_flush_tlb: u64);
57+
}
58+
core::arch::global_asm!("
59+
.global dispatch_function
60+
dispatch_function:
61+
jnz flush_done
62+
mov rdi, cr4
63+
xor rdi, 0x80
64+
mov cr4, rdi
65+
xor rdi, 0x80
66+
mov cr4, rdi
67+
flush_done:
68+
call {internal_dispatch_function}\n
69+
hlt\n
70+
", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function);

src/hyperlight_guest_bin/src/arch/amd64/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
*/
1616

1717
pub(crate) mod context;
18+
pub(crate) mod dispatch;
1819
pub mod exception;
1920
mod init;
2021
mod layout;

src/hyperlight_guest_bin/src/guest_function/call.rs

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, Functi
2222
use hyperlight_common::flatbuffer_wrappers::function_types::{FunctionCallResult, ParameterType};
2323
use hyperlight_common::flatbuffer_wrappers::guest_error::{ErrorCode, GuestError};
2424
use hyperlight_guest::error::{HyperlightGuestError, Result};
25-
use hyperlight_guest::exit::halt;
2625
use tracing::{Span, instrument};
2726

2827
use crate::{GUEST_HANDLE, REGISTERED_GUEST_FUNCTIONS};
@@ -79,14 +78,8 @@ pub(crate) fn call_guest_function(function_call: FunctionCall) -> Result<Vec<u8>
7978
}
8079
}
8180

82-
// This function is marked as no_mangle/inline to prevent the compiler from inlining it , if its inlined the epilogue will not be called
83-
// and we will leak memory as the epilogue will not be called as halt() is not going to return.
84-
//
85-
// This function may panic, as we have no other ways of dealing with errors at this level
86-
#[unsafe(no_mangle)]
87-
#[inline(never)]
8881
#[instrument(skip_all, parent = Span::current(), level= "Trace")]
89-
fn internal_dispatch_function() {
82+
pub(crate) fn internal_dispatch_function() {
9083
let handle = unsafe { GUEST_HANDLE };
9184

9285
#[cfg(debug_assertions)]
@@ -115,34 +108,3 @@ fn internal_dispatch_function() {
115108
}
116109
}
117110
}
118-
119-
// This is implemented as a separate function to make sure that epilogue in the internal_dispatch_function is called before the halt()
120-
// which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return
121-
// when running in the hypervisor.
122-
#[instrument(skip_all, parent = Span::current(), level= "Trace")]
123-
pub(crate) extern "C" fn dispatch_function() {
124-
// The hyperlight host likes to use one partition and reset it in
125-
// various ways; if that has happened, there might stale TLB
126-
// entries hanging around from the former user of the
127-
// partition. Flushing the TLB here is not quite the right thing
128-
// to do, since incorrectly cached entries could make even this
129-
// code not exist, but regrettably there is not a simple way for
130-
// the host to trigger flushing when it ought to happen, so for
131-
// now this works in practice, since the text segment is always
132-
// part of the big identity-mapped region at the base of the
133-
// guest.
134-
crate::paging::flush_tlb();
135-
136-
// Read the current TSC to report it to the host with the spans/events
137-
// This helps calculating the timestamps relative to the guest call
138-
#[cfg(feature = "trace_guest")]
139-
{
140-
let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc();
141-
// Reset the trace state for the new guest function call with the new start TSC
142-
// This clears any existing spans/events from previous calls ensuring a clean state
143-
hyperlight_guest_tracing::new_call(guest_start_tsc);
144-
}
145-
146-
internal_dispatch_function();
147-
halt();
148-
}

src/hyperlight_guest_bin/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ extern crate alloc;
2020

2121
use core::fmt::Write;
2222

23+
use arch::dispatch::dispatch_function;
2324
use buddy_system_allocator::LockedHeap;
24-
use guest_function::call::dispatch_function;
2525
use guest_function::register::GuestFunctionRegister;
2626
use guest_logger::init_logger;
2727
use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;

src/hyperlight_guest_bin/src/paging.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,3 @@ pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator<Item = vmem::Mapping>
144144
pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> {
145145
GuestMappingOperations::new().fallible_phys_to_virt(gpa)
146146
}
147-
148-
pub fn flush_tlb() {
149-
// Currently this just always flips CR4.PGE back and forth to
150-
// trigger a tlb flush. We should use a faster approach where
151-
// available
152-
let mut orig_cr4: u64;
153-
unsafe {
154-
asm!("mov {}, cr4", out(reg) orig_cr4);
155-
}
156-
let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE
157-
unsafe {
158-
asm!(
159-
"mov cr4, {}",
160-
"mov cr4, {}",
161-
in(reg) tmp_cr4,
162-
in(reg) orig_cr4
163-
);
164-
}
165-
}

src/hyperlight_host/src/hypervisor/hyperlight_vm.rs

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ pub(crate) struct HyperlightVm {
104104

105105
mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region)
106106

107+
pending_tlb_flush: bool,
108+
107109
#[cfg(gdb)]
108110
gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
109111
#[cfg(gdb)]
@@ -421,6 +423,8 @@ impl HyperlightVm {
421423

422424
mmap_regions: Vec::new(),
423425

426+
pending_tlb_flush: false,
427+
424428
#[cfg(gdb)]
425429
gdb_conn,
426430
#[cfg(gdb)]
@@ -625,6 +629,15 @@ impl HyperlightVm {
625629
Ok(self.vm.sregs()?)
626630
}
627631

632+
/// Set the current base page table physical address
633+
pub(crate) fn set_root_pt(&mut self, addr: u64) -> Result<(), AccessPageTableError> {
634+
let mut sregs = self.vm.sregs()?;
635+
sregs.cr3 = addr;
636+
self.vm.set_sregs(&sregs)?;
637+
self.pending_tlb_flush = true;
638+
Ok(())
639+
}
640+
628641
/// Get the current stack top virtual address
629642
pub(crate) fn get_stack_top(&mut self) -> u64 {
630643
self.rsp_gva
@@ -662,17 +675,25 @@ impl HyperlightVm {
662675
let NextAction::Call(dispatch_func_addr) = self.entrypoint else {
663676
return Err(DispatchGuestCallError::Uninitialized);
664677
};
678+
let mut rflags = 1 << 1; // RFLAGS.1 is RES1
679+
if self.pending_tlb_flush {
680+
rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes
681+
self.pending_tlb_flush = false;
682+
}
665683
// set RIP and RSP, reset others
666684
let regs = CommonRegisters {
667685
rip: dispatch_func_addr,
668686
// We usually keep the top of the stack 16-byte
669-
// aligned. However, the ABI requirement is that the stack
670-
// be aligned _before a call instruction_, which means
671-
// that the stack needs to actually be ≡ 8 mod 16 at the
672-
// first instruction (since, on x64, a call instruction
673-
// automatically pushes a return address).
674-
rsp: self.rsp_gva - 8,
675-
rflags: 1 << 1,
687+
// aligned. Since the usual ABI requirement is that the
688+
// stack be aligned _before a call instruction_, one might
689+
// expect that the stack pointer here needs to actually be
690+
// ≡ 8 mod 16 at the first instruction (since, on x64, a
691+
// call instruction automatically pushes a return
692+
// address). However, the x64 entry stub in
693+
// hyperlight_guest::arch::dispatch handles this itself,
694+
// so we do use the aligned address here.
695+
rsp: self.rsp_gva,
696+
rflags,
676697
..Default::default()
677698
};
678699
self.vm

src/hyperlight_host/tests/integration_test.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -704,13 +704,13 @@ fn log_message() {
704704
// - internal_dispatch_function does a log::trace! in debug mode
705705
// - logs from trace level tracing spans created as logs because of the tracing `log` feature
706706
// - 4 from evolve call (hyperlight_main + halt)
707-
// - 16 from guest call (internal_dispatch_function + others)
707+
// - 12 from guest call (internal_dispatch_function + others)
708708
// and are multiplied because we make 6 calls to `log_test_messages`
709709
// NOTE: These numbers need to be updated if log messages or spans are added/removed
710710
let num_fixed_trace_log = if cfg!(debug_assertions) {
711-
(1 + 20) * 6
711+
(1 + 16) * 6
712712
} else {
713-
20 * 6
713+
16 * 6
714714
};
715715

716716
let tests = vec![

0 commit comments

Comments
 (0)