diff --git a/Cargo.lock b/Cargo.lock index 7852fb2bb70..9fa0f705d84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1566,7 +1566,7 @@ dependencies = [ "event-manager", "gdbstub", "gdbstub_arch", - "getrandom 0.3.1", + "getrandom", "itertools 0.14.0", "kvm-bindings", "kvm-ioctls", diff --git a/src/cpu-template-helper/src/utils/mod.rs b/src/cpu-template-helper/src/utils/mod.rs index f23871df1a9..f885f81589c 100644 --- a/src/cpu-template-helper/src/utils/mod.rs +++ b/src/cpu-template-helper/src/utils/mod.rs @@ -13,7 +13,7 @@ use vmm::builder::{StartMicrovmError, build_microvm_for_boot}; use vmm::cpu_config::templates::{CustomCpuTemplate, Numeric}; use vmm::resources::VmResources; use vmm::seccomp::get_empty_filters; -use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; +use vmm::vmm_config::instance_info::{Capabilities, InstanceInfo, VmState}; use vmm::{EventManager, HTTP_MAX_PAYLOAD_SIZE, Vmm}; use vmm_sys_util::tempfile::TempFile; @@ -125,6 +125,7 @@ pub fn build_microvm_from_config( state: VmState::NotStarted, vmm_version: CPU_TEMPLATE_HELPER_VERSION.to_string(), app_name: "cpu-template-helper".to_string(), + capabilities: Capabilities::default(), }; let mut vm_resources = VmResources::from_json(&config, &instance_info, HTTP_MAX_PAYLOAD_SIZE, None) diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 739214999a4..eb8fd798499 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -29,7 +29,7 @@ use vmm::resources::VmResources; use vmm::seccomp::BpfThreadMap; use vmm::signal_handler::register_signal_handlers; use vmm::snapshot::{SnapshotError, get_format_version}; -use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; +use vmm::vmm_config::instance_info::{Capabilities, InstanceInfo, VmState}; use vmm::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics}; use vmm::{EventManager, FcExitCode, HTTP_MAX_PAYLOAD_SIZE}; use vmm_sys_util::terminal::Terminal; @@ -347,6 +347,9 @@ fn main_exec() -> Result<(), MainError> { state: VmState::NotStarted, vmm_version: FIRECRACKER_VERSION.to_string(), app_name: "Firecracker".to_string(), + capabilities: Capabilities { + snapshot_cancel: true, + }, }; if let Some(metrics_path) = arguments.single_value("metrics-path") { diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index ba2608070c6..a295d130bb5 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -144,6 +144,8 @@ pub enum CreateSnapshotError { SerializeMicrovmState(#[from] crate::snapshot::SnapshotError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), + /// Snapshot was cancelled by signal + Cancelled, } /// Snapshot version @@ -161,8 +163,12 @@ pub fn create_snapshot( snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; - vmm.vm - .snapshot_memory_to_file(¶ms.mem_file_path, params.snapshot_type)?; + vmm.vm.snapshot_memory_to_file( + ¶ms.mem_file_path, + params.snapshot_type, + params.chunk_size_bytes, + params.direct_io, + )?; // We need to mark queues as dirty again for all activated devices. The reason we // do it here is that we don't mark pages as dirty during runtime diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index fdd0862a9d4..071cfc604e6 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Debug}; +use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex, MutexGuard}; use serde_json::Value; @@ -21,6 +22,7 @@ use crate::mmds::data_store::{self, Mmds}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; use crate::resources::VmmConfig; use crate::seccomp::BpfThreadMap; +use crate::signal_handler::SNAPSHOT_CANCELLED; use crate::vmm_config::balloon::{ BalloonConfigError, BalloonDeviceConfig, BalloonStats, BalloonUpdateConfig, BalloonUpdateStatsConfig, @@ -798,6 +800,9 @@ impl RuntimeApiController { /// Pauses the microVM by pausing the vCPUs. pub fn pause(&mut self) -> Result { + // Reset snapshot cancellation flag in anticipation of a snapshot being taken + SNAPSHOT_CANCELLED.store(false, Ordering::SeqCst); + let pause_start_us = get_time_us(ClockType::Monotonic); self.vmm.lock().expect("Poisoned lock").pause_vm()?; diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 0fdfa0d8b97..75d2ae7934c 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -1,11 +1,17 @@ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +use std::sync::atomic::{AtomicBool, Ordering}; + use libc::{ - SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ, c_int, c_void, siginfo_t, + SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGUSR1, SIGXCPU, SIGXFSZ, c_int, c_void, + siginfo_t, }; use log::error; +/// Flag set by SIGUSR1 handler to cancel in-progress snapshot +pub static SNAPSHOT_CANCELLED: AtomicBool = AtomicBool::new(false); + use crate::FcExitCode; use crate::logger::{IncMetric, METRICS, StoreMetric}; use crate::utils::signal::register_signal_handler; @@ -150,10 +156,15 @@ extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_ error!("Received signal {}, code {}.", si_signo, si_code); } +#[inline(always)] +extern "C" fn sigusr1_handler(_num: c_int, _info: *mut siginfo_t, _unused: *mut c_void) { + SNAPSHOT_CANCELLED.store(true, Ordering::SeqCst); +} + /// Registers all the required signal handlers. /// /// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS` -/// `SIGXFSZ` `SIGXCPU` `SIGPIPE` `SIGHUP` and `SIGILL`. +/// `SIGXFSZ` `SIGXCPU` `SIGPIPE` `SIGHUP`, `SIGILL`, and `SIGUSR1`. pub fn register_signal_handlers() -> vmm_sys_util::errno::Result<()> { // Call to unsafe register_signal_handler which is considered unsafe because it will // register a signal handler which will be called in the current thread and will interrupt @@ -167,5 +178,6 @@ pub fn register_signal_handlers() -> vmm_sys_util::errno::Result<()> { register_signal_handler(SIGPIPE, sigpipe_handler)?; register_signal_handler(SIGHUP, sighup_handler)?; register_signal_handler(SIGILL, sigill_handler)?; + register_signal_handler(SIGUSR1, sigusr1_handler)?; Ok(()) } diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index cd5b44f30ba..f84bb8c36dc 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -4,6 +4,13 @@ use std::fmt::{self, Display, Formatter}; use serde::{Serialize, ser}; +/// Capabilities that this Firecracker instance supports. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)] +pub struct Capabilities { + /// Whether SIGUSR1 can cancel an in-progress snapshot + pub snapshot_cancel: bool, +} + /// Enumerates microVM runtime states. #[derive(Clone, Debug, Default, PartialEq, Eq)] pub enum VmState { @@ -46,4 +53,6 @@ pub struct InstanceInfo { pub vmm_version: String, /// The name of the application that runs the microVM. pub app_name: String, + /// Capabilities supported by this Firecracker instance. + pub capabilities: Capabilities, } diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index 13a87ba30c4..757a0fd81e2 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -45,6 +45,13 @@ pub struct CreateSnapshotParams { pub snapshot_path: PathBuf, /// Path to the file that will contain the guest memory. pub mem_file_path: PathBuf, + /// Chunk size in bytes for full snapshot writes. Defaults to 4MB. + /// Smaller chunks allow more frequent cancellation checks. + #[serde(default)] + pub chunk_size_bytes: Option, + /// Use O_DIRECT for memory file writes to bypass page cache. + #[serde(default)] + pub direct_io: bool, } /// Allows for changing the mapping between tap devices and host devices diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 9b62152c4b8..57c879e81f5 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -8,6 +8,7 @@ use std::fs::File; use std::io::SeekFrom; use std::ops::Deref; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; use bitvec::vec::BitVec; @@ -71,6 +72,8 @@ pub enum MemoryError { SeekError(std::io::Error), /// Volatile memory error: {0} VolatileMemoryError(vm_memory::VolatileMemoryError), + /// Snapshot operation was cancelled + Cancelled, } impl From for MemoryError { @@ -603,7 +606,12 @@ where fn mark_dirty(&self, addr: GuestAddress, len: usize); /// Dumps all contents of GuestMemoryMmap to a writer. - fn dump(&self, writer: &mut T) -> Result<(), MemoryError>; + fn dump( + &self, + writer: &mut T, + cancel_flag: &AtomicBool, + chunk_size: Option, + ) -> Result<(), MemoryError>; /// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. fn dump_dirty( @@ -688,7 +696,15 @@ impl GuestMemoryExtension for GuestMemoryMmap { } /// Dumps all contents of GuestMemoryMmap to a writer. - fn dump(&self, writer: &mut T) -> Result<(), MemoryError> { + fn dump( + &self, + writer: &mut T, + cancel_flag: &AtomicBool, + chunk_size: Option, + ) -> Result<(), MemoryError> { + // Write in chunks to allow for cancellation checks (default 4MB) + let chunk_size = chunk_size.unwrap_or(4 * 1024 * 1024); + self.iter() .flat_map(|region| region.slots()) .try_for_each(|(mem_slot, plugged)| { @@ -696,11 +712,20 @@ impl GuestMemoryExtension for GuestMemoryMmap { let ilen = i64::try_from(mem_slot.slice.len()).unwrap(); writer.seek(SeekFrom::Current(ilen)).unwrap(); } else { - writer.write_all_volatile(&mem_slot.slice)?; + let total_len = mem_slot.slice.len(); + (0..total_len).step_by(chunk_size).try_for_each(|offset| { + if cancel_flag.load(Ordering::Relaxed) { + return Err(MemoryError::Cancelled); + } + let chunk_len = std::cmp::min(chunk_size, total_len - offset); + let chunk = mem_slot.slice.subslice(offset, chunk_len)?; + writer + .write_all_volatile(&chunk) + .map_err(|e| MemoryError::WriteMemory(e.into())) + })?; } Ok(()) }) - .map_err(MemoryError::WriteMemory) } /// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. @@ -1129,7 +1154,8 @@ mod tests { // dump the full memory. let mut memory_file = TempFile::new().unwrap().into_file(); - guest_memory.dump(&mut memory_file).unwrap(); + let cancel_flag = AtomicBool::new(false); + guest_memory.dump(&mut memory_file, &cancel_flag, None).unwrap(); let restored_guest_memory = into_region_ext(snapshot_file(memory_file, memory_state.regions(), false).unwrap()); diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 83e899eff1d..43db9c3d28e 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -8,6 +8,7 @@ use std::collections::HashMap; use std::fs::OpenOptions; use std::io::Write; +use std::os::unix::fs::OpenOptionsExt; use std::path::Path; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; @@ -26,6 +27,7 @@ use vmm_sys_util::eventfd::EventFd; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; use crate::arch::{GSI_MSI_END, host_page_size}; +use crate::signal_handler::SNAPSHOT_CANCELLED; use crate::logger::info; use crate::pci::{DeviceRelocation, DeviceRelocationError, PciDevice}; use crate::persist::CreateSnapshotError; @@ -335,16 +337,20 @@ impl Vm { &self, mem_file_path: &Path, snapshot_type: SnapshotType, + chunk_size: Option, + direct_io: bool, ) -> Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; // Need to check this here, as we create the file in the line below let file_existed = mem_file_path.exists(); - let mut file = OpenOptions::new() - .write(true) - .create(true) - .truncate(false) + let mut opts = OpenOptions::new(); + opts.write(true).create(true).truncate(false); + if direct_io { + opts.custom_flags(libc::O_DIRECT); + } + let mut file = opts .open(mem_file_path) .map_err(|err| MemoryBackingFile("open", err))?; @@ -381,7 +387,8 @@ impl Vm { self.guest_memory().dump_dirty(&mut file, &dirty_bitmap)?; } SnapshotType::Full => { - self.guest_memory().dump(&mut file)?; + self.guest_memory() + .dump(&mut file, &SNAPSHOT_CANCELLED, chunk_size)?; self.reset_dirty_bitmap(); self.guest_memory().reset_dirty(); }