Skip to content

Commit aa53b46

Browse files
committed
vmclock: add snapshot safety features
Add support for `vm_generation_counter` field in VMClock ABI. This field is similar to `disruption_marker` but it's only updated on snapshot loading events (not in live migration). It is meant to provide the guest with snapshot safety notifications. Moreover, add support for the notification capability. This capability require us to send an ACPI notification every time we change the seq_count field to a new even value. This essentially means that we need to send a notification upon resuming from a snapshot just before resuming vCPUs. Signed-off-by: Babis Chalios <bchalios@amazon.es>
1 parent 219a14c commit aa53b46

File tree

4 files changed

+115
-34
lines changed

4 files changed

+115
-34
lines changed

src/vmm/src/device_manager/acpi.rs

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4+
#[cfg(target_arch = "x86_64")]
45
use acpi_tables::{Aml, aml};
56
use vm_memory::GuestMemoryError;
67

@@ -45,11 +46,13 @@ impl ACPIDeviceManager {
4546

4647
#[cfg(target_arch = "x86_64")]
4748
pub fn attach_vmclock(&self, vm: &Vm) -> Result<(), ACPIDeviceError> {
49+
vm.register_irq(&self.vmclock.interrupt_evt, self.vmclock.gsi)?;
4850
self.vmclock.activate(vm.guest_memory())?;
4951
Ok(())
5052
}
5153
}
5254

55+
#[cfg(target_arch = "x86_64")]
5356
impl Aml for ACPIDeviceManager {
5457
fn append_aml_bytes(&self, v: &mut Vec<u8>) -> Result<(), aml::AmlError> {
5558
// AML for [`VmGenId`] device.
@@ -65,30 +68,41 @@ impl Aml for ACPIDeviceManager {
6568
&aml::Name::new("_HID".try_into()?, &"ACPI0013")?,
6669
&aml::Name::new(
6770
"_CRS".try_into()?,
68-
&aml::ResourceTemplate::new(vec![&aml::Interrupt::new(
69-
true,
70-
true,
71-
false,
72-
false,
73-
self.vmgenid.gsi,
74-
)]),
71+
&aml::ResourceTemplate::new(vec![
72+
&aml::Interrupt::new(true, true, false, false, self.vmgenid.gsi),
73+
&aml::Interrupt::new(true, true, false, false, self.vmclock.gsi),
74+
]),
7575
)?,
7676
&aml::Method::new(
7777
"_EVT".try_into()?,
7878
1,
7979
true,
80-
vec![&aml::If::new(
81-
// We know that the maximum IRQ number fits in a u8. We have up to
82-
// 32 IRQs in x86 and up to 128 in
83-
// ARM (look into
84-
// `vmm::crate::arch::layout::GSI_LEGACY_END`)
85-
#[allow(clippy::cast_possible_truncation)]
86-
&aml::Equal::new(&aml::Arg(0), &(self.vmgenid.gsi as u8)),
87-
vec![&aml::Notify::new(
88-
&aml::Path::new("\\_SB_.VGEN")?,
89-
&0x80usize,
90-
)],
91-
)],
80+
vec![
81+
&aml::If::new(
82+
// We know that the maximum IRQ number fits in a u8. We have up to
83+
// 32 IRQs in x86 and up to 128 in
84+
// ARM (look into
85+
// `vmm::crate::arch::layout::GSI_LEGACY_END`)
86+
#[allow(clippy::cast_possible_truncation)]
87+
&aml::Equal::new(&aml::Arg(0), &(self.vmgenid.gsi as u8)),
88+
vec![&aml::Notify::new(
89+
&aml::Path::new("\\_SB_.VGEN")?,
90+
&0x80usize,
91+
)],
92+
),
93+
&aml::If::new(
94+
// We know that the maximum IRQ number fits in a u8. We have up to
95+
// 32 IRQs in x86 and up to 128 in
96+
// ARM (look into
97+
// `vmm::crate::arch::layout::GSI_LEGACY_END`)
98+
#[allow(clippy::cast_possible_truncation)]
99+
&aml::Equal::new(&aml::Arg(0), &(self.vmclock.gsi as u8)),
100+
vec![&aml::Notify::new(
101+
&aml::Path::new("\\_SB_.VCLK")?,
102+
&0x80usize,
103+
)],
104+
),
105+
],
92106
),
93107
],
94108
)

src/vmm/src/device_manager/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,10 @@ impl<'a> Persist<'a> for DeviceManager {
465465
// Restore ACPI devices
466466
let mut acpi_devices = ACPIDeviceManager::restore(constructor_args.vm, &state.acpi_state)?;
467467
acpi_devices.vmgenid.notify_guest()?;
468+
#[cfg(target_arch = "x86_64")]
469+
acpi_devices
470+
.vmclock
471+
.post_load_update(constructor_args.vm.guest_memory());
468472

469473
// Restore PCI devices
470474
let pci_ctor_args = PciDevicesConstructorArgs {

src/vmm/src/device_manager/persist.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,15 @@ impl<'a> Persist<'a> for ACPIDeviceManager {
192192
vmgenid: VmGenId::restore((), &state.vmgenid).unwrap(),
193193
// Safe to unwrap() here, this will never return an error.
194194
#[cfg(target_arch = "x86_64")]
195-
vmclock: VmClock::restore(vm.guest_memory(), &state.vmclock).unwrap(),
195+
vmclock: VmClock::restore((), &state.vmclock).unwrap(),
196196
};
197197

198+
#[cfg(target_arch = "x86_64")]
199+
vm.register_irq(
200+
&acpi_devices.vmclock.interrupt_evt,
201+
acpi_devices.vmclock.gsi,
202+
)?;
203+
198204
acpi_devices.attach_vmgenid(vm)?;
199205
Ok(acpi_devices)
200206
}

src/vmm/src/devices/acpi/vmclock.rs

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,19 @@ use std::mem::offset_of;
66
use std::sync::atomic::{Ordering, fence};
77

88
use acpi_tables::{Aml, aml};
9-
use log::error;
9+
use log::{debug, error};
1010
use serde::{Deserialize, Serialize};
1111
use vm_allocator::AllocPolicy;
1212
use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryError};
13+
use vm_superio::Trigger;
14+
use vmm_sys_util::eventfd::EventFd;
1315

16+
use crate::Vm;
1417
use crate::devices::acpi::generated::vmclock_abi::{
15-
VMCLOCK_COUNTER_INVALID, VMCLOCK_MAGIC, VMCLOCK_STATUS_UNKNOWN, vmclock_abi,
18+
VMCLOCK_COUNTER_INVALID, VMCLOCK_FLAG_NOTIFICATION_PRESENT,
19+
VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT, VMCLOCK_MAGIC, VMCLOCK_STATUS_UNKNOWN, vmclock_abi,
1620
};
21+
use crate::devices::legacy::EventFdTrigger;
1722
use crate::snapshot::Persist;
1823
use crate::vstate::memory::GuestMemoryMmap;
1924
use crate::vstate::resources::ResourceAllocator;
@@ -47,6 +52,10 @@ macro_rules! write_vmclock_field {
4752
pub struct VmClock {
4853
/// Guest address in which we will write the VMclock struct
4954
pub guest_address: GuestAddress,
55+
/// Interrupt line for notifying the device about changes
56+
pub interrupt_evt: EventFdTrigger,
57+
/// GSI number allocated for the device.
58+
pub gsi: u32,
5059
/// The [`VmClock`] state we are exposing to the guest
5160
inner: vmclock_abi,
5261
}
@@ -62,17 +71,33 @@ impl VmClock {
6271
)
6372
.expect("vmclock: could not allocate guest memory for device");
6473

74+
let gsi = resource_allocator
75+
.allocate_gsi_legacy(1)
76+
.inspect_err(|err| error!("vmclock: Could not allocate GSI for VMClock: {err}"))
77+
.unwrap()[0];
78+
79+
let interrupt_evt = EventFdTrigger::new(
80+
EventFd::new(libc::EFD_NONBLOCK)
81+
.inspect_err(|err| {
82+
error!("vmclock: Could not create EventFd for VMClock device: {err}")
83+
})
84+
.unwrap(),
85+
);
86+
6587
let mut inner = vmclock_abi {
6688
magic: VMCLOCK_MAGIC,
6789
size: VMCLOCK_SIZE,
6890
version: 1,
6991
clock_status: VMCLOCK_STATUS_UNKNOWN,
7092
counter_id: VMCLOCK_COUNTER_INVALID,
93+
flags: VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT | VMCLOCK_FLAG_NOTIFICATION_PRESENT,
7194
..Default::default()
7295
};
7396

7497
VmClock {
7598
guest_address: GuestAddress(addr),
99+
interrupt_evt,
100+
gsi,
76101
inner,
77102
}
78103
}
@@ -98,11 +123,23 @@ impl VmClock {
98123
self.inner.disruption_marker.wrapping_add(1)
99124
);
100125

101-
// This fence ensures guest sees the `disruption_marker` update. It is matched to a
102-
// read barrier in the guest.
126+
write_vmclock_field!(
127+
self,
128+
mem,
129+
vm_generation_counter,
130+
self.inner.vm_generation_counter.wrapping_add(1)
131+
);
132+
133+
// This fence ensures guest sees the `disruption_marker` and `vm_generation_counter`
134+
// updates. It is matched to a read barrier in the guest.
103135
fence(Ordering::Release);
104136

105137
write_vmclock_field!(self, mem, seq_count, self.inner.seq_count.wrapping_add(1));
138+
self.interrupt_evt
139+
.trigger()
140+
.inspect_err(|err| error!("vmclock: could not send guest notification: {err}"))
141+
.unwrap();
142+
debug!("vmclock: notifying guest about VMClock updates");
106143
}
107144
}
108145

@@ -113,31 +150,39 @@ impl VmClock {
113150
pub struct VmClockState {
114151
/// Guest address in which we write the [`VmClock`] info
115152
pub guest_address: u64,
153+
/// GSI used for notifying the guest about device changes
154+
pub gsi: u32,
116155
/// Data we expose to the guest
117156
pub inner: vmclock_abi,
118157
}
119158

120159
impl<'a> Persist<'a> for VmClock {
121160
type State = VmClockState;
122-
type ConstructorArgs = &'a GuestMemoryMmap;
161+
type ConstructorArgs = ();
123162
type Error = Infallible;
124163

125164
fn save(&self) -> Self::State {
126165
VmClockState {
127166
guest_address: self.guest_address.0,
167+
gsi: self.gsi,
128168
inner: self.inner,
129169
}
130170
}
131171

132-
fn restore(
133-
constructor_args: Self::ConstructorArgs,
134-
state: &Self::State,
135-
) -> Result<Self, Self::Error> {
172+
fn restore(vm: Self::ConstructorArgs, state: &Self::State) -> Result<Self, Self::Error> {
173+
let interrupt_evt = EventFdTrigger::new(
174+
EventFd::new(libc::EFD_NONBLOCK)
175+
.inspect_err(|err| {
176+
error!("vmclock: Could not create EventFd for VMClock device: {err}")
177+
})
178+
.unwrap(),
179+
);
136180
let mut vmclock = VmClock {
137181
guest_address: GuestAddress(state.guest_address),
182+
interrupt_evt,
183+
gsi: state.gsi,
138184
inner: state.inner,
139185
};
140-
vmclock.post_load_update(constructor_args);
141186
Ok(vmclock)
142187
}
143188
}
@@ -174,14 +219,20 @@ impl Aml for VmClock {
174219
#[cfg(test)]
175220
mod tests {
176221
use vm_memory::{Bytes, GuestAddress};
222+
use vmm_sys_util::tempfile::TempFile;
177223

178-
use crate::arch;
224+
use crate::Vm;
225+
#[cfg(target_arch = "x86_64")]
226+
use crate::arch::x86_64::layout;
227+
use crate::arch::{self, Kvm};
179228
use crate::devices::acpi::generated::vmclock_abi::vmclock_abi;
180229
use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock};
181-
use crate::snapshot::Persist;
230+
use crate::devices::virtio::test_utils::default_mem;
231+
use crate::snapshot::{Persist, Snapshot};
182232
use crate::test_utils::single_region_mem;
183233
use crate::utils::u64_to_usize;
184234
use crate::vstate::resources::ResourceAllocator;
235+
use crate::vstate::vm::tests::setup_vm_with_memory;
185236

186237
// We are allocating memory from the end of the system memory portion
187238
const VMCLOCK_TEST_GUEST_ADDR: GuestAddress =
@@ -211,15 +262,17 @@ mod tests {
211262
#[test]
212263
fn test_device_save_restore() {
213264
let vmclock = default_vmclock();
265+
// We're using memory inside the system memory portion of the guest RAM. So we need a
266+
// memory region that includes it.
214267
let mem = single_region_mem(
215268
u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE),
216269
);
217270

218271
vmclock.activate(&mem).unwrap();
219-
let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
220272

221273
let state = vmclock.save();
222-
let vmclock_new = VmClock::restore(&mem, &state).unwrap();
274+
let mut vmclock_new = VmClock::restore((), &state).unwrap();
275+
vmclock_new.post_load_update(&mem);
223276

224277
let guest_data_new: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
225278
assert_ne!(guest_data_new, vmclock.inner);
@@ -228,5 +281,9 @@ mod tests {
228281
vmclock.inner.disruption_marker + 1,
229282
vmclock_new.inner.disruption_marker
230283
);
284+
assert_eq!(
285+
vmclock.inner.vm_generation_counter + 1,
286+
vmclock_new.inner.vm_generation_counter
287+
);
231288
}
232289
}

0 commit comments

Comments
 (0)