From fc294d6c5596e4d79ab00b9145e7f4e5149d8071 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 15 Nov 2025 00:30:51 -0800 Subject: [PATCH 01/18] Expose memory mapping; Add optional memfile dump --- .tool-versions | 1 + src/firecracker/src/api_server/mod.rs | 4 +-- .../src/api_server/request/snapshot.rs | 4 +-- src/firecracker/swagger/firecracker.yaml | 26 +++++++++++++++++++ src/vmm/src/persist.rs | 5 +++- src/vmm/src/rpc_interface.rs | 16 +++++++++--- src/vmm/src/vmm_config/instance_info.rs | 3 +++ src/vmm/src/vmm_config/snapshot.rs | 4 ++- src/vmm/src/vstate/vm.rs | 14 ++++++++++ src/vmm/tests/integration_tests.rs | 2 +- 10 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 .tool-versions diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 00000000000..0aed332b367 --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +rust 1.85.0 diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 6ac2955af8f..85b6358b871 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -274,7 +274,7 @@ mod tests { Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), })), start_time_us, ); @@ -287,7 +287,7 @@ mod tests { Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), })), start_time_us, ); diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 8878c224b5c..448fa95ad48 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -139,7 +139,7 @@ mod tests { let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_file_path: Some(PathBuf::from("bar")), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), @@ -153,7 +153,7 @@ mod tests { let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_file_path: Some(PathBuf::from("bar")), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 1f2edb714b8..8dffeee9b33 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -990,6 +990,32 @@ definitions: vmm_version: description: MicroVM hypervisor build version. type: string + memory_regions: + type: array + description: The regions of the guest memory. + items: + $ref: "#/definitions/GuestMemoryRegion" + + GuestMemoryRegionMapping: + type: object + description: Describes the region of guest memory that can be used for creating the memfile. + required: + - base_host_virt_addr + - size + - offset + - page_size + properties: + base_host_virt_addr: + type: integer + size: + description: The size of the region in bytes. + type: integer + offset: + description: The offset of the region in bytes. + type: integer + page_size: + description: The page size of the region in pages. + type: integer Logger: type: object diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 16d7ed72537..74c23bb4f75 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -171,7 +171,10 @@ pub fn create_snapshot( snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; - snapshot_memory_to_file(vmm, ¶ms.mem_file_path, params.snapshot_type)?; + // Dump memory to file only if mem_file_path is specified + if let Some(ref mem_file_path) = params.mem_file_path { + snapshot_memory_to_file(vmm, mem_file_path, params.snapshot_type)?; + } Ok(()) } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 566228fd53a..54dcb983fbe 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -646,9 +646,17 @@ impl RuntimeApiController { GetVmMachineConfig => Ok(VmmData::MachineConfiguration(MachineConfig::from( &self.vm_resources.vm_config, ))), - GetVmInstanceInfo => Ok(VmmData::InstanceInformation( - self.vmm.lock().expect("Poisoned lock").instance_info(), - )), + GetVmInstanceInfo => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + + let mut instance_info = locked_vmm.instance_info(); + + instance_info.memory_regions = locked_vmm + .vm + .guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + + Ok(VmmData::InstanceInformation(instance_info)) + } GetVmmVersion => Ok(VmmData::VmmVersion( self.vmm.lock().expect("Poisoned lock").version(), )), @@ -1150,7 +1158,7 @@ mod tests { CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), }, ))); #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index 67fd335deaa..52003dd3c89 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Display, Formatter}; +use crate::vstate::vm::GuestMemoryRegionMapping; use serde::{ser, Serialize}; /// Enumerates microVM runtime states. @@ -46,4 +47,6 @@ pub struct InstanceInfo { pub vmm_version: String, /// The name of the application that runs the microVM. pub app_name: String, + /// The regions of the guest memory. + pub memory_regions: Vec, } diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index e1850b74939..6ea0ee4a92a 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -44,7 +44,9 @@ pub struct CreateSnapshotParams { /// Path to the file that will contain the microVM state. pub snapshot_path: PathBuf, /// Path to the file that will contain the guest memory. - pub mem_file_path: PathBuf, + /// If not specified, the memory is not dumped to a file. + #[serde(skip_serializing_if = "Option::is_none")] + pub mem_file_path: Option, } /// Stores the configuration that will be used for loading a snapshot. diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 0f72abcf68f..fadb6026883 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -27,6 +27,20 @@ use crate::cpu_config::templates::KvmCapability; use crate::utils::u64_to_usize; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; +/// Describes the region of guest memory that can be used for creating the memfile. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct GuestMemoryRegionMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, + /// The configured page size for this memory region. + pub page_size: usize, +} + /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 4312c6345db..2d8a7aed580 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -212,7 +212,7 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { let snapshot_params = CreateSnapshotParams { snapshot_type, snapshot_path: snapshot_file.as_path().to_path_buf(), - mem_file_path: memory_file.as_path().to_path_buf(), + mem_file_path: Some(memory_file.as_path().to_path_buf()), }; controller From a3d19a14b05915a41cdad3805462849fb1b3aa77 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 15 Nov 2025 01:05:07 -0800 Subject: [PATCH 02/18] Merge --- .tool-versions | 2 +- Cargo.lock | 33 ++++++++---------------- src/cpu-template-helper/src/utils/mod.rs | 1 + src/firecracker/Cargo.toml | 12 +++++++-- src/vmm/Cargo.toml | 19 +++++++++++--- src/vmm/src/lib.rs | 30 +++++++++++++++++++++ src/vmm/src/rpc_interface.rs | 5 ++-- src/vmm/src/vmm_config/instance_info.rs | 5 ++-- src/vmm/src/vstate/vm.rs | 14 ---------- 9 files changed, 73 insertions(+), 48 deletions(-) diff --git a/.tool-versions b/.tool-versions index 0aed332b367..6e0d11c9ac4 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1 +1 @@ -rust 1.85.0 +rust 1.79.0 diff --git a/Cargo.lock b/Cargo.lock index 9ad999e44d0..d515b776e19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,46 +125,41 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-fips-sys" -version = "0.12.13" +version = "0.13.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf12b67bc9c5168f68655aadb2a12081689a58f1d9b1484705e4d1810ed6e4ac" +checksum = "2608e5a7965cc9d58c56234d346c9c89b824c4c8652b6f047b3bd0a777c0644f" dependencies = [ - "bindgen 0.69.4", + "bindgen 0.69.5", "cc", "cmake", "dunce", "fs_extra", - "libc", - "paste", + "regex", ] [[package]] name = "aws-lc-rs" -version = "1.10.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd82dba44d209fddb11c190e0a94b78651f95299598e472215667417a03ff1d" +checksum = "93fcc8f365936c834db5514fc45aee5b1202d677e6b40e48468aaaa8183ca8c7" dependencies = [ "aws-lc-fips-sys", "aws-lc-sys", - "mirai-annotations", - "paste", "untrusted", "zeroize", ] [[package]] name = "aws-lc-sys" -version = "0.22.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7a4168111d7eb622a31b214057b8509c0a7e1794f44c546d742330dc793972" +checksum = "61b1d86e7705efe1be1b569bab41d4fa1e14e220b60a160f78de2db687add079" dependencies = [ - "bindgen 0.69.4", + "bindgen 0.69.5", "cc", "cmake", "dunce", "fs_extra", - "libc", - "paste", ] [[package]] @@ -204,9 +199,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.4" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags 2.6.0", "cexpr", @@ -955,12 +950,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "nix" version = "0.27.1" diff --git a/src/cpu-template-helper/src/utils/mod.rs b/src/cpu-template-helper/src/utils/mod.rs index bd570840fc5..f457ca0b872 100644 --- a/src/cpu-template-helper/src/utils/mod.rs +++ b/src/cpu-template-helper/src/utils/mod.rs @@ -125,6 +125,7 @@ pub fn build_microvm_from_config( state: VmState::NotStarted, vmm_version: CPU_TEMPLATE_HELPER_VERSION.to_string(), app_name: "cpu-template-helper".to_string(), + memory_regions: None, }; let mut vm_resources = VmResources::from_json(&config, &instance_info, HTTP_MAX_PAYLOAD_SIZE, None) diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index adc0cb1ff83..9395fc508ed 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -35,7 +35,10 @@ vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } [dev-dependencies] cargo_toml = "0.20.5" libc = "0.2.161" -regex = { version = "1.11.1", default-features = false, features = ["std", "unicode-perl"] } +regex = { version = "1.11.1", default-features = false, features = [ + "std", + "unicode-perl", +] } # Dev-Dependencies for uffd examples serde = { version = "1.0.214", features = ["derive"] } @@ -48,7 +51,12 @@ serde = { version = "1.0.214" } serde_json = "1.0.132" [features] -tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"] +tracing = [ + "log-instrument", + "seccompiler/tracing", + "utils/tracing", + "vmm/tracing", +] gdb = ["vmm/gdb"] [lints] diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index c9a032edb95..6b22376c482 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -9,15 +9,18 @@ license = "Apache-2.0" bench = false [dependencies] -acpi_tables = { path = "../acpi-tables" } -aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } +acpi_tables = { path = "../acpi-tables" } +aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } arrayvec = { version = "0.7.6", optional = true } aws-lc-rs = { version = "1.10.0", features = ["bindgen"] } base64 = "0.22.1" bincode = "1.2.1" bitflags = "2.6.0" crc64 = "2.0.0" -derive_more = { version = "1.0.0", default-features = false, features = ["from", "display"] } +derive_more = { version = "1.0.0", default-features = false, features = [ + "from", + "display", +] } displaydoc = "0.2.5" event-manager = "0.4.0" gdbstub = { version = "0.7.3", optional = true } @@ -43,7 +46,10 @@ userfaultfd = "0.8.1" utils = { path = "../utils" } vhost = { version = "0.13.0", features = ["vhost-user-frontend"] } vm-allocator = "0.1.0" -vm-memory = { version = "0.16.0", features = ["backend-mmap", "backend-bitmap"] } +vm-memory = { version = "0.16.0", features = [ + "backend-mmap", + "backend-bitmap", +] } vm-superio = "0.8.0" vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } zerocopy = { version = "0.8.8" } @@ -80,3 +86,8 @@ harness = false [lints] workspace = true + +[patch.crates-io] +aws-lc-sys = "=0.29.0" +aws-lc-rs = "=1.13.1" +aws-lc-fips-sys = "=0.13.7" diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index c80f004e789..6668e968e0a 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -153,6 +153,7 @@ use crate::vstate::memory::{ use crate::vstate::vcpu::VcpuState; pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse}; pub use crate::vstate::vm::Vm; +use serde::{Deserialize, Serialize}; /// Shorthand type for the EventManager flavour used by Firecracker. pub type EventManager = BaseEventManager>>; @@ -191,6 +192,20 @@ pub enum FcExitCode { ArgParsing = 153, } +/// Describes the region of guest memory that can be used for creating the memfile. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct GuestMemoryRegionMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, + /// The configured page size for this memory region. + pub page_size: usize, +} + /// Timeout used in recv_timeout, when waiting for a vcpu response on /// Pause/Resume/Save/Restore. A high enough limit that should not be reached during normal usage, /// used to detect a potential vcpu deadlock. @@ -451,6 +466,21 @@ impl Vmm { &self.guest_memory } + pub fn guest_memory_mappings(&self, vm_info: &VmInfo) -> Vec { + let mut offset = 0; + let mut mappings = Vec::new(); + for mem_region in self.guest_memory().iter() { + mappings.push(GuestMemoryRegionMapping { + base_host_virt_addr: mem_region.as_ptr() as u64, + size: mem_region.size(), + offset, + page_size: vm_info.huge_pages.page_size_kib(), + }); + offset += mem_region.size() as u64; + } + mappings + } + /// Sets RDA bit in serial console pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { // When restoring from a previously saved state, there is no serial diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 54dcb983fbe..7c729af32c6 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -651,9 +651,8 @@ impl RuntimeApiController { let mut instance_info = locked_vmm.instance_info(); - instance_info.memory_regions = locked_vmm - .vm - .guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + instance_info.memory_regions = + locked_vmm.guest_memory_mappings(&VmInfo::from(&self.vm_resources)); Ok(VmmData::InstanceInformation(instance_info)) } diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index 52003dd3c89..3b281ca1cbe 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Display, Formatter}; -use crate::vstate::vm::GuestMemoryRegionMapping; +use crate::GuestMemoryRegionMapping; use serde::{ser, Serialize}; /// Enumerates microVM runtime states. @@ -48,5 +48,6 @@ pub struct InstanceInfo { /// The name of the application that runs the microVM. pub app_name: String, /// The regions of the guest memory. - pub memory_regions: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub memory_regions: Option>, } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index fadb6026883..0f72abcf68f 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -27,20 +27,6 @@ use crate::cpu_config::templates::KvmCapability; use crate::utils::u64_to_usize; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; -/// Describes the region of guest memory that can be used for creating the memfile. -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] -pub struct GuestMemoryRegionMapping { - /// Base host virtual address where the guest memory contents for this region - /// should be copied/populated. - pub base_host_virt_addr: u64, - /// Region size. - pub size: usize, - /// Offset in the backend file/buffer where the region contents are. - pub offset: u64, - /// The configured page size for this memory region. - pub page_size: usize, -} - /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] From b63d1b85b7eb9c3aaa2142151f77a85df08996aa Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 15 Nov 2025 01:15:20 -0800 Subject: [PATCH 03/18] Fix compile errors --- src/firecracker/src/main.rs | 1 + src/vmm/src/lib.rs | 1 + src/vmm/src/rpc_interface.rs | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 8fb5392afcf..1f0ec961ae8 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -337,6 +337,7 @@ fn main_exec() -> Result<(), MainError> { state: VmState::NotStarted, vmm_version: FIRECRACKER_VERSION.to_string(), app_name: "Firecracker".to_string(), + memory_regions: None, }; if let Some(metrics_path) = arguments.single_value("metrics-path") { diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 6668e968e0a..3054d9ae30e 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -466,6 +466,7 @@ impl Vmm { &self.guest_memory } + /// Returns the memory mappings for the guest memory. pub fn guest_memory_mappings(&self, vm_info: &VmInfo) -> Vec { let mut offset = 0; let mut mappings = Vec::new(); diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 7c729af32c6..5896d33906f 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -652,7 +652,7 @@ impl RuntimeApiController { let mut instance_info = locked_vmm.instance_info(); instance_info.memory_regions = - locked_vmm.guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + Some(locked_vmm.guest_memory_mappings(&VmInfo::from(&self.vm_resources))); Ok(VmmData::InstanceInformation(instance_info)) } From 8bf13f2ee26a7a5c66f586721d9a5fe73e12f26e Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Wed, 3 Dec 2025 14:26:13 -0800 Subject: [PATCH 04/18] Remove required field from spec --- src/firecracker/swagger/firecracker.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 8dffeee9b33..1400f807e57 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -1219,7 +1219,6 @@ definitions: type: object required: - mem_file_path - - snapshot_path properties: mem_file_path: type: string From 70c0afac87678fcab2c124eb43a31726b81a2e1b Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 5 Dec 2025 14:36:38 -0800 Subject: [PATCH 05/18] Fix parameter --- src/firecracker/swagger/firecracker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 1400f807e57..91b4249abdc 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -1218,7 +1218,7 @@ definitions: SnapshotCreateParams: type: object required: - - mem_file_path + - snapshot_path properties: mem_file_path: type: string From 8155dcdb0c1b837c91913832181d0a9fba4dc794 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 5 Dec 2025 14:40:58 -0800 Subject: [PATCH 06/18] Fix type --- src/firecracker/swagger/firecracker.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 91b4249abdc..83790da7ba4 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -994,7 +994,7 @@ definitions: type: array description: The regions of the guest memory. items: - $ref: "#/definitions/GuestMemoryRegion" + $ref: "#/definitions/GuestMemoryRegionMapping" GuestMemoryRegionMapping: type: object @@ -1014,7 +1014,7 @@ definitions: description: The offset of the region in bytes. type: integer page_size: - description: The page size of the region in pages. + description: The page size in bytes. type: integer Logger: From d3dca801b2f041e923a38db9af5b133b177fc078 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 5 Dec 2025 17:00:16 -0800 Subject: [PATCH 07/18] Add upload script --- .gitignore | 1 + .tool-versions | 1 + Makefile | 12 ++++++++++++ scripts/build.sh | 17 +++++++++++++++++ scripts/upload.sh | 13 +++++++++++++ 5 files changed, 44 insertions(+) create mode 100644 Makefile create mode 100755 scripts/build.sh create mode 100755 scripts/upload.sh diff --git a/.gitignore b/.gitignore index 155e4cbd8a8..f56db437d09 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ test_results/* /resources/linux /resources/x86_64 /resources/aarch64 +.env \ No newline at end of file diff --git a/.tool-versions b/.tool-versions index 6e0d11c9ac4..ff8f8f8c879 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1 +1,2 @@ +gcloud 534.0.0 rust 1.79.0 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000000..1fda2f26881 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +-include .env + +.PHONY: build +build: + ./scripts/build.sh + +.PHONY: upload +upload: + ./scripts/upload.sh $(GCP_PROJECT_ID) + +.PHONY: build-and-upload +make build-and-upload: build upload diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 00000000000..b3b97e31d9d --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -euo pipefail + +# The format will be: v.._g — e.g. v1.7.2_g8bb88311 +# Extract full version from src/firecracker/swagger/firecracker.yaml +FC_VERSION=$(python3 -c "import yaml; print(yaml.safe_load(open('src/firecracker/swagger/firecracker.yaml'))['info']['version'])") +commit_hash=$(git rev-parse --short HEAD) +version_name="v${FC_VERSION}_g${commit_hash}" +echo "Version name: $version_name" + +echo "Starting to build Firecracker version: $version_name" +tools/devtool -y build --release + +mkdir -p "./build/fc/${version_name}" +cp ./build/cargo_target/x86_64-unknown-linux-musl/release/firecracker "./build/fc/${version_name}/firecracker" +echo "Finished building Firecracker version: $version_name and copied to ./build/fc/${version_name}/firecracker" diff --git a/scripts/upload.sh b/scripts/upload.sh new file mode 100755 index 00000000000..4227c642593 --- /dev/null +++ b/scripts/upload.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -euo pipefail + +GCP_PROJECT_ID=$1 + +gsutil -h "Cache-Control:no-cache, max-age=0" cp -r "build/fc/*" "gs://${GCP_PROJECT_ID}-fc-versions" +if [ "$GCP_PROJECT_ID" == "e2b-prod" ]; then + # Upload kernel to GCP public builds bucket + gsutil -h "Cache-Control:no-cache, max-age=0" cp -r "build/fc/*" "gs://${GCP_PROJECT_ID}-public-builds/firecrackers/" +fi + +rm -rf build/fc/* From 99e280c50fafd2e0961190549af4b87d099d6af8 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Mon, 8 Dec 2025 16:54:52 -0800 Subject: [PATCH 08/18] Parse without python --- scripts/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.sh b/scripts/build.sh index b3b97e31d9d..6f459f63e07 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -4,7 +4,7 @@ set -euo pipefail # The format will be: v.._g — e.g. v1.7.2_g8bb88311 # Extract full version from src/firecracker/swagger/firecracker.yaml -FC_VERSION=$(python3 -c "import yaml; print(yaml.safe_load(open('src/firecracker/swagger/firecracker.yaml'))['info']['version'])") +FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) commit_hash=$(git rev-parse --short HEAD) version_name="v${FC_VERSION}_g${commit_hash}" echo "Version name: $version_name" From aac85412a15d9e5d9d706b226a6912fb0b5766da Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Wed, 10 Dec 2025 14:56:32 -0800 Subject: [PATCH 09/18] Split endpoints --- .../seccomp/aarch64-unknown-linux-musl.json | 4 + .../seccomp/x86_64-unknown-linux-musl.json | 10 +- .../src/api_server/parsed_request.rs | 50 ++++ .../src/api_server/request/memory.rs | 39 +++ src/firecracker/src/api_server/request/mod.rs | 1 + src/firecracker/swagger/firecracker.yaml | 66 ++++- src/vmm/src/rpc_interface.rs | 39 ++- src/vmm/src/vmm_config/instance_info.rs | 17 ++ src/vmm/src/vstate/vm.rs | 152 ++++++++++- tests/framework/http_api.py | 2 + .../integration_tests/functional/test_api.py | 249 ++++++++++++++++++ 11 files changed, 607 insertions(+), 22 deletions(-) create mode 100644 src/firecracker/src/api_server/request/memory.rs diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index 48d94a0f050..b04d2886a35 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -220,6 +220,10 @@ "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, + { + "syscall": "mincore", + "comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident" + }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index 861b69c6b44..dea4b3ed83a 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -216,6 +216,10 @@ "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, + { + "syscall": "mincore", + "comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident" + }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", @@ -524,8 +528,8 @@ "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { - "syscall": "getrandom", - "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" + "syscall": "getrandom", + "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" }, { "syscall": "accept4", @@ -1276,4 +1280,4 @@ } ] } -} +} \ No newline at end of file diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 125463d1d05..00c04a6a34b 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -20,6 +20,7 @@ use super::request::logger::parse_put_logger; use super::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; +use super::request::memory::{parse_get_memory, parse_get_memory_mappings}; use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; @@ -82,6 +83,14 @@ impl TryFrom<&Request> for ParsedRequest { Ok(ParsedRequest::new_sync(VmmAction::GetFullVmConfig)) } (Method::Get, "machine-config", None) => parse_get_machine_config(), + (Method::Get, "memory", None) => match path_tokens.next() { + Some("mappings") => parse_get_memory_mappings(), + None => parse_get_memory(), + _ => Err(RequestError::InvalidPathMethod( + request_uri.to_string(), + Method::Get, + )), + }, (Method::Get, "mmds", None) => parse_get_mmds(), (Method::Get, _, Some(_)) => method_to_error(Method::Get), (Method::Put, "actions", Some(body)) => parse_put_actions(body), @@ -172,6 +181,8 @@ impl ParsedRequest { } VmmData::BalloonStats(stats) => Self::success_response_with_data(stats), VmmData::InstanceInformation(info) => Self::success_response_with_data(info), + VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings), + VmmData::Memory(memory) => Self::success_response_with_data(memory), VmmData::VmmVersion(version) => Self::success_response_with_data( &serde_json::json!({ "firecracker_version": version.as_str() }), ), @@ -568,6 +579,12 @@ pub mod tests { VmmData::InstanceInformation(info) => { http_response(&serde_json::to_string(info).unwrap(), 200) } + VmmData::MemoryMappings(mappings) => { + http_response(&serde_json::to_string(mappings).unwrap(), 200) + } + VmmData::Memory(memory) => { + http_response(&serde_json::to_string(memory).unwrap(), 200) + } VmmData::VmmVersion(version) => http_response( &serde_json::json!({ "firecracker_version": version.as_str() }).to_string(), 200, @@ -589,6 +606,15 @@ pub mod tests { verify_ok_response_with(VmmData::MachineConfiguration(MachineConfig::default())); verify_ok_response_with(VmmData::MmdsValue(serde_json::from_str("{}").unwrap())); verify_ok_response_with(VmmData::InstanceInformation(InstanceInfo::default())); + verify_ok_response_with(VmmData::MemoryMappings( + vmm::vmm_config::instance_info::MemoryMappingsResponse { mappings: vec![] }, + )); + verify_ok_response_with(VmmData::Memory( + vmm::vmm_config::instance_info::MemoryResponse { + resident: vec![], + empty: vec![], + }, + )); verify_ok_response_with(VmmData::VmmVersion(String::default())); // Error. @@ -662,6 +688,30 @@ pub mod tests { ParsedRequest::try_from(&req).unwrap(); } + #[test] + fn test_try_from_get_memory_mappings() { + let (mut sender, receiver) = UnixStream::pair().unwrap(); + let mut connection = HttpConnection::new(receiver); + sender + .write_all(http_request("GET", "/memory/mappings", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + } + + #[test] + fn test_try_from_get_memory() { + let (mut sender, receiver) = UnixStream::pair().unwrap(); + let mut connection = HttpConnection::new(receiver); + sender + .write_all(http_request("GET", "/memory", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + } + #[test] fn test_try_from_get_version() { let (mut sender, receiver) = UnixStream::pair().unwrap(); diff --git a/src/firecracker/src/api_server/request/memory.rs b/src/firecracker/src/api_server/request/memory.rs new file mode 100644 index 00000000000..e879d6b3b02 --- /dev/null +++ b/src/firecracker/src/api_server/request/memory.rs @@ -0,0 +1,39 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use vmm::logger::{IncMetric, METRICS}; +use vmm::rpc_interface::VmmAction; + +use super::super::parsed_request::{ParsedRequest, RequestError}; + +pub(crate) fn parse_get_memory_mappings() -> Result { + METRICS.get_api_requests.instance_info_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::GetMemoryMappings)) +} + +pub(crate) fn parse_get_memory() -> Result { + METRICS.get_api_requests.instance_info_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::GetMemory)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api_server::parsed_request::RequestAction; + + #[test] + fn test_parse_get_memory_mappings_request() { + match parse_get_memory_mappings().unwrap().into_parts() { + (RequestAction::Sync(action), _) if *action == VmmAction::GetMemoryMappings => {} + _ => panic!("Test failed."), + } + } + + #[test] + fn test_parse_get_memory_request() { + match parse_get_memory().unwrap().into_parts() { + (RequestAction::Sync(action), _) if *action == VmmAction::GetMemory => {} + _ => panic!("Test failed."), + } + } +} diff --git a/src/firecracker/src/api_server/request/mod.rs b/src/firecracker/src/api_server/request/mod.rs index 0c1622798f4..4442436986c 100644 --- a/src/firecracker/src/api_server/request/mod.rs +++ b/src/firecracker/src/api_server/request/mod.rs @@ -10,6 +10,7 @@ pub mod entropy; pub mod instance_info; pub mod logger; pub mod machine_configuration; +pub mod memory; pub mod metrics; pub mod mmds; pub mod net; diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 83790da7ba4..4200d4d0fab 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -618,6 +618,35 @@ paths: schema: $ref: "#/definitions/Error" + /memory/mappings: + get: + summary: Gets the memory mappings with skippable pages bitmap. + operationId: getMemoryMappings + responses: + 200: + description: OK + schema: + $ref: "#/definitions/MemoryMappingsResponse" + default: + description: Internal server error + schema: + $ref: "#/definitions/Error" + + /memory: + get: + summary: Gets the memory info (resident and empty pages). + description: Returns an object with resident and empty bitmaps. The resident bitmap marks all pages that are resident. The empty bitmap marks zero pages (subset of resident pages). This is checked at the pageSize of each region. All regions must have the same page size. + operationId: getMemory + responses: + 200: + description: OK + schema: + $ref: "#/definitions/MemoryResponse" + default: + description: Internal server error + schema: + $ref: "#/definitions/Error" + /version: get: summary: Gets the Firecracker version. @@ -990,11 +1019,6 @@ definitions: vmm_version: description: MicroVM hypervisor build version. type: string - memory_regions: - type: array - description: The regions of the guest memory. - items: - $ref: "#/definitions/GuestMemoryRegionMapping" GuestMemoryRegionMapping: type: object @@ -1017,6 +1041,38 @@ definitions: description: The page size in bytes. type: integer + MemoryMappingsResponse: + type: object + description: Response containing memory region mappings. + required: + - mappings + properties: + mappings: + type: array + description: The memory region mappings. + items: + $ref: "#/definitions/GuestMemoryRegionMapping" + + MemoryResponse: + type: object + description: Response containing the memory info (resident and empty pages). + required: + - resident + - empty + properties: + resident: + type: array + description: The resident bitmap as a vector of u64 values. Each bit represents if the page is resident. + items: + type: integer + format: uint64 + empty: + type: array + description: The empty bitmap as a vector of u64 values. Each bit represents if the page is zero (empty). This is a subset of the resident pages. + items: + type: integer + format: uint64 + Logger: type: object description: diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 5896d33906f..60616b09f9a 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -25,8 +25,8 @@ use crate::vmm_config::balloon::{ use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; -use crate::vmm_config::instance_info::InstanceInfo; -use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfigError}; +use crate::vmm_config::instance_info::{InstanceInfo, MemoryMappingsResponse, MemoryResponse}; +use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ @@ -65,6 +65,10 @@ pub enum VmmAction { GetVmMachineConfig, /// Get microVM instance information. GetVmInstanceInfo, + /// Get memory mappings with skippable pages bitmap. + GetMemoryMappings, + /// Get memory info (resident and empty pages). + GetMemory, /// Get microVM version. GetVmmVersion, /// Flush the metrics. This action can only be called after the logger has been configured. @@ -189,6 +193,10 @@ pub enum VmmData { MmdsValue(serde_json::Value), /// The microVM instance information. InstanceInformation(InstanceInfo), + /// Memory mappings with skippable pages bitmap. + MemoryMappings(MemoryMappingsResponse), + /// Memory info (resident and empty pages). + Memory(MemoryResponse), /// The microVM version. VmmVersion(String), } @@ -419,6 +427,7 @@ impl<'a> PrebootApiController<'a> { &self.vm_resources.vm_config, ))), GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())), + GetMemoryMappings | GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot), GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())), InsertBlockDevice(config) => self.insert_block_device(config), InsertNetworkDevice(config) => self.insert_net_device(config), @@ -648,14 +657,28 @@ impl RuntimeApiController { ))), GetVmInstanceInfo => { let locked_vmm = self.vmm.lock().expect("Poisoned lock"); - - let mut instance_info = locked_vmm.instance_info(); - - instance_info.memory_regions = - Some(locked_vmm.guest_memory_mappings(&VmInfo::from(&self.vm_resources))); - + let instance_info = locked_vmm.instance_info(); Ok(VmmData::InstanceInformation(instance_info)) } + GetMemoryMappings => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + let mappings = locked_vmm + .vm + .guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + + Ok(VmmData::MemoryMappings(MemoryMappingsResponse { mappings })) + } + GetMemory => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + let (resident_bitmap, empty_bitmap) = locked_vmm + .vm + .get_memory_info(&VmInfo::from(&self.vm_resources)) + .map_err(|e| VmmActionError::InternalVmm(VmmError::Vm(e)))?; + Ok(VmmData::Memory(MemoryResponse { + resident: resident_bitmap, + empty: empty_bitmap, + })) + } GetVmmVersion => Ok(VmmData::VmmVersion( self.vmm.lock().expect("Poisoned lock").version(), )), diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index 3b281ca1cbe..b945803cec9 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -51,3 +51,20 @@ pub struct InstanceInfo { #[serde(skip_serializing_if = "Option::is_none")] pub memory_regions: Option>, } + +/// Response structure for the memory mappings endpoint. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct MemoryMappingsResponse { + /// The memory region mappings. + pub mappings: Vec, +} + +/// Response structure for the memory endpoint. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct MemoryResponse { + /// The resident bitmap as a vector of u64 values. Each bit represents if the page is resident. + pub resident: Vec, + /// The empty bitmap as a vector of u64 values. Each bit represents if the page is zero (empty). + /// This is a subset of the resident pages. + pub empty: Vec, +} diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 0f72abcf68f..51b37354354 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -8,11 +8,19 @@ #[cfg(target_arch = "x86_64")] use std::fmt; -#[cfg(target_arch = "x86_64")] -use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, CpuId, MsrList, - KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, - KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, +use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region}; +use kvm_ioctls::VmFd; +use serde::{Deserialize, Serialize}; +use vmm_sys_util::eventfd::EventFd; + +use crate::arch::host_page_size; +pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; +use crate::logger::info; +use crate::persist::{CreateSnapshotError, VmInfo}; +use crate::utils::u64_to_usize; +use crate::vmm_config::snapshot::SnapshotType; +use crate::vstate::memory::{ + Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, }; use kvm_bindings::{kvm_userspace_memory_region, KVM_API_VERSION, KVM_MEM_LOG_DIRTY_PAGES}; use kvm_ioctls::{Kvm, VmFd}; @@ -296,7 +304,139 @@ impl Vm { }) } - /// Restore the KVM VM state + /// Gets the memory info (resident and empty pages) for all memory regions. + /// Returns two bitmaps: resident (all resident pages) and empty (zero pages, subset of resident). + /// This checks at the pageSize of each region and requires all regions to have the same page size. + pub fn get_memory_info(&self, vm_info: &VmInfo) -> Result<(Vec, Vec), VmError> { + let mappings = self.guest_memory_mappings(vm_info); + + if mappings.is_empty() { + return Ok((Vec::new(), Vec::new())); + } + + // Check that all regions have the same page size + let page_size = mappings[0].page_size; + if mappings.iter().any(|m| m.page_size != page_size) { + return Err(VmError::InvalidMemoryConfiguration( + "All memory regions must have the same page size".to_string(), + )); + } + + // Calculate total number of pages across all regions + let total_pages: usize = mappings.iter().map(|m| m.size / page_size).sum(); + let bitmap_size = total_pages.div_ceil(64); + let mut resident_bitmap = vec![0u64; bitmap_size]; + let mut empty_bitmap = vec![0u64; bitmap_size]; + + let mut global_page_idx = 0; + + // SAFETY: We're reading from valid memory regions that we own + unsafe { + // Pre-allocate zero buffer once per page size (reused for all pages) + // This is the most important optimization - avoids repeated allocations + let zero_buf = vec![0u8; page_size]; + + for mapping in &mappings { + // Find the memory region that matches this mapping + let mem_region = self + .guest_memory() + .iter() + .find(|region| region.as_ptr() as u64 == mapping.base_host_virt_addr) + .expect("Memory region not found for mapping"); + + let region_ptr = mem_region.as_ptr(); + let region_size = mem_region.size(); + let num_pages = region_size / page_size; + + // Use mincore on the entire region to check residency + let sys_page_size = host_page_size(); + let mincore_pages = region_size.div_ceil(sys_page_size); + let mut mincore_vec = vec![0u8; mincore_pages]; + + let mincore_result = libc::mincore( + region_ptr.cast::(), + region_size, + mincore_vec.as_mut_ptr(), + ); + + // Check each page + for page_idx in 0..num_pages { + let page_offset = page_idx * page_size; + let page_ptr = region_ptr.add(page_offset); + + // Check if page is resident using mincore + let is_resident = if mincore_result == 0 { + let page_mincore_start = page_offset / sys_page_size; + let page_mincore_count = page_size.div_ceil(sys_page_size); + if page_mincore_start + page_mincore_count <= mincore_vec.len() { + // Page is resident if any 4KB sub-page is resident (check LSB only) + mincore_vec[page_mincore_start..page_mincore_start + page_mincore_count] + .iter() + .any(|&v| (v & 0x1) != 0) + } else { + false + } + } else { + // If mincore failed, assume resident (conservative approach) + true + }; + + let bitmap_idx = global_page_idx / 64; + let bit_idx = global_page_idx % 64; + + if is_resident { + // Set bit in resident bitmap + if bitmap_idx < resident_bitmap.len() { + resident_bitmap[bitmap_idx] |= 1u64 << bit_idx; + } + + // Check if page is zero (empty) + let is_zero = libc::memcmp( + page_ptr.cast::(), + zero_buf.as_ptr().cast::(), + page_size, + ) == 0; + + // Set bit in empty bitmap if page is zero + if is_zero && bitmap_idx < empty_bitmap.len() { + empty_bitmap[bitmap_idx] |= 1u64 << bit_idx; + } + } + + global_page_idx += 1; + } + } + } + + Ok((resident_bitmap, empty_bitmap)) + } + + /// Resets the KVM dirty bitmap for each of the guest's memory regions. + pub fn reset_dirty_bitmap(&self) { + self.guest_memory() + .iter() + .zip(0u32..) + .for_each(|(region, slot)| { + let _ = self.fd().get_dirty_log(slot, u64_to_usize(region.len())); + }); + } + + /// Retrieves the KVM dirty bitmap for each of the guest's memory regions. + pub fn get_dirty_bitmap(&self) -> Result { + let mut bitmap: DirtyBitmap = HashMap::new(); + self.guest_memory() + .iter() + .zip(0u32..) + .try_for_each(|(region, slot)| { + self.fd() + .get_dirty_log(slot, u64_to_usize(region.len())) + .map(|bitmap_region| _ = bitmap.insert(slot, bitmap_region)) + })?; + Ok(bitmap) + } + + /// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to + /// `mem_file_path`. /// /// # Errors /// diff --git a/tests/framework/http_api.py b/tests/framework/http_api.py index a1ee37174b0..1442a253b25 100644 --- a/tests/framework/http_api.py +++ b/tests/framework/http_api.py @@ -123,3 +123,5 @@ def __init__(self, api_usocket_full_name): self.snapshot_load = Resource(self, "/snapshot/load") self.cpu_config = Resource(self, "/cpu-config") self.entropy = Resource(self, "/entropy") + self.memory_mappings = Resource(self, "/memory/mappings") + self.memory = Resource(self, "/memory") diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 5aebe7b5265..a03e8020098 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -18,6 +18,7 @@ import host_tools.network as net_tools from framework import utils_cpuid from framework.utils import get_firecracker_version_from_toml, is_io_uring_supported +from framework.microvm import HugePagesConfig MEM_LIMIT = 1000000000 @@ -1389,3 +1390,251 @@ def test_negative_snapshot_load_api(microvm_factory): # The snapshot/memory files above don't exist, but the request is otherwise syntactically valid. # In this case, Firecracker exits. vm.mark_killed() + + +def test_memory_mappings_pre_boot(uvm_plain): + """Test that memory mappings endpoint is not available before boot.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config() + + # Use session directly since get() asserts on 200 + url = test_microvm.api.endpoint + "/memory/mappings" + res = test_microvm.api.session.get(url) + assert res.status_code == 400 + assert NOT_SUPPORTED_BEFORE_START in res.json()["fault_message"] + + +def test_memory_pre_boot(uvm_plain): + """Test that memory endpoint is not available before boot.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config() + + # Use session directly since get() asserts on 200 + url = test_microvm.api.endpoint + "/memory" + res = test_microvm.api.session.get(url) + assert res.status_code == 400 + assert NOT_SUPPORTED_BEFORE_START in res.json()["fault_message"] + + +def test_memory_mappings_post_boot(uvm_plain): + """Test that memory mappings endpoint works after boot with hugepages.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) + test_microvm.start() + + response = test_microvm.api.memory_mappings.get() + assert response.status_code == 200 + + data = response.json() + assert isinstance(data, dict) + assert "mappings" in data + mappings = data["mappings"] + assert isinstance(mappings, list) + assert len(mappings) > 0 + + # Verify structure of each mapping + for mapping in mappings: + assert "base_host_virt_addr" in mapping + assert "size" in mapping + assert "offset" in mapping + assert "page_size" in mapping + assert isinstance(mapping["base_host_virt_addr"], int) + assert isinstance(mapping["size"], int) + assert isinstance(mapping["offset"], int) + assert isinstance(mapping["page_size"], int) + assert mapping["size"] > 0 + # Verify page size is 2MB (2097152 bytes) for hugepages + assert mapping["page_size"] == 2 * 1024 * 1024 + + +def test_memory_post_boot(uvm_plain): + """Test that memory endpoint works after boot with hugepages.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) + test_microvm.start() + + # Get memory mappings to determine page size and total memory + mappings_response = test_microvm.api.memory_mappings.get() + assert mappings_response.status_code == 200 + mappings_data = mappings_response.json() + assert isinstance(mappings_data, dict) + assert "mappings" in mappings_data + mappings = mappings_data["mappings"] + assert len(mappings) > 0 + + # All regions should have the same page size (2MB for hugepages) + page_size = mappings[0]["page_size"] + assert page_size == 2 * 1024 * 1024, "Expected 2MB page size for hugepages" + + # Verify all regions have the same page size + for mapping in mappings: + assert ( + mapping["page_size"] == page_size + ), "All regions must have the same page size" + + total_memory_size = sum(mapping["size"] for mapping in mappings) + total_pages = total_memory_size // page_size + expected_bitmap_size = (total_pages + 63) // 64 # ceil(total_pages / 64) + + # Get memory info + response = test_microvm.api.memory.get() + assert response.status_code == 200 + + data = response.json() + assert isinstance(data, dict) + assert "resident" in data + assert "empty" in data + resident_bitmap = data["resident"] + empty_bitmap = data["empty"] + assert isinstance(resident_bitmap, list) + assert isinstance(empty_bitmap, list) + assert len(resident_bitmap) == expected_bitmap_size + assert len(empty_bitmap) == expected_bitmap_size + + # Verify all values are valid u64 integers + for value in resident_bitmap: + assert isinstance(value, int) + assert value >= 0 + assert value <= 0xFFFFFFFFFFFFFFFF # Max u64 value + + for value in empty_bitmap: + assert isinstance(value, int) + assert value >= 0 + assert value <= 0xFFFFFFFFFFFFFFFF # Max u64 value + + # After boot, there should be at least one resident page + has_resident_page = any(value != 0 for value in resident_bitmap) + assert has_resident_page, "Expected at least one resident page after VM boot" + + # Empty pages should be a subset of resident pages + # (empty_bitmap & resident_bitmap) == empty_bitmap + for i in range(len(empty_bitmap)): + assert (empty_bitmap[i] & resident_bitmap[i]) == empty_bitmap[ + i + ], "Empty pages must be a subset of resident pages" + + +@pytest.mark.nonci +def test_memory_benchmark(microvm_factory, guest_kernel_linux_6_1, rootfs): + """Benchmark the memory endpoint performance (resident + zero page checking).""" + test_microvm = microvm_factory.build(guest_kernel_linux_6_1, rootfs) + test_microvm.spawn() + + # Use larger memory size for benchmarking + # Check available hugepages and use a size that fits (need at least some headroom) + # Default to 256MB if we can't determine, or use available - 64MB headroom + try: + with open("/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages", "r") as f: + free_hugepages = int(f.read().strip()) + # Each hugepage is 2MB, reserve 32 pages (64MB) for system + available_mib = max(128, (free_hugepages - 32) * 2) + mem_size_mib = min(1024, available_mib) # Cap at 1GB for proper benchmark + except (FileNotFoundError, ValueError, OSError): + # Fallback to 256MB if we can't read hugepage info + mem_size_mib = 256 + test_microvm.basic_config( + mem_size_mib=mem_size_mib, huge_pages=HugePagesConfig.HUGETLBFS_2MB + ) + # Add network interface for SSH access + test_microvm.add_net_iface() + test_microvm.start() + + # Get memory mappings to determine actual memory size + mappings_response = test_microvm.api.memory_mappings.get() + assert mappings_response.status_code == 200 + mappings_data = mappings_response.json() + mappings = mappings_data["mappings"] + + # Calculate total memory size + total_memory_bytes = sum(mapping["size"] for mapping in mappings) + total_memory_mib = total_memory_bytes / (1024 * 1024) + page_size = mappings[0]["page_size"] + + # Ensure memory is resident by writing zeros to it via guest + # This will fault in the pages and make them resident + # Using tmpfs (/dev/shm) ensures the memory is actually resident + # Allocate a reasonable portion (e.g., 256MB) to avoid freezing the sandbox + fault_memory_mib = min(256, int(total_memory_mib * 0.25)) # 25% or max 256MB + test_microvm.ssh.run( + "dd if=/dev/zero of=/dev/shm/zero_mem bs=1M count={} 2>/dev/null || true".format( + fault_memory_mib + ) + ) + + # Give the system a moment to fault in pages + time.sleep(0.1) + + # Benchmark the /memory endpoint call + start_time = time.perf_counter() + response = test_microvm.api.memory.get() + end_time = time.perf_counter() + + assert response.status_code == 200 + data = response.json() + assert "resident" in data + assert "empty" in data + + # Verify the response is valid + resident_bitmap = data["resident"] + empty_bitmap = data["empty"] + + # Calculate expected bitmap size + page_size = mappings[0]["page_size"] + total_pages = total_memory_bytes // page_size + expected_bitmap_size = (total_pages + 63) // 64 + + assert len(resident_bitmap) == expected_bitmap_size + assert len(empty_bitmap) == expected_bitmap_size + + # Count actual resident pages (faulted-in memory) + resident_page_count = 0 + for bitmap_value in resident_bitmap: + # Count set bits in each u64 value + resident_page_count += bin(bitmap_value).count("1") + + # Calculate resident memory size (actual memory that was checked) + resident_memory_bytes = resident_page_count * page_size + resident_memory_mib = resident_memory_bytes / (1024 * 1024) + + # Calculate elapsed time and throughput based on actual resident memory + elapsed_seconds = end_time - start_time + + if resident_memory_bytes > 0: + throughput_mib_per_sec = resident_memory_mib / elapsed_seconds + time_per_mb_ms = (elapsed_seconds * 1000) / resident_memory_mib + else: + throughput_mib_per_sec = 0 + time_per_mb_ms = 0 + + # Count empty pages + empty_page_count = 0 + for bitmap_value in empty_bitmap: + empty_page_count += bin(bitmap_value).count("1") + + # Print benchmark results + print(f"\n{'='*60}") + print(f"Memory Benchmark Results") + print(f"{'='*60}") + print( + f"Total Memory: {total_memory_mib:.2f} MiB ({total_memory_bytes / (1024**3):.3f} GB)" + ) + print( + f"Resident Pages: {resident_page_count} / {total_pages} ({resident_page_count * 100 / total_pages:.1f}%)" + ) + print( + f"Resident Memory: {resident_memory_mib:.2f} MiB ({resident_memory_bytes / (1024**3):.3f} GB)" + ) + print( + f"Empty Pages: {empty_page_count} / {resident_page_count} ({empty_page_count * 100 / resident_page_count if resident_page_count > 0 else 0:.1f}% of resident)" + ) + print(f"Elapsed Time: {elapsed_seconds*1000:.2f} ms") + print(f"Throughput (resident): {throughput_mib_per_sec:.2f} MiB/s") + print(f"Time per MB (resident): {time_per_mb_ms:.3f} ms/MB") + print(f"{'='*60}\n") + + # Verify at least some pages are resident + assert resident_page_count > 0, "Expected at least one resident page" From 512442204e4ad85af5cef0a4ec7fa83a72d0d6da Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Thu, 18 Dec 2025 06:13:10 -0800 Subject: [PATCH 10/18] Fix build --- src/vmm/src/rpc_interface.rs | 11 +-- src/vmm/src/vstate/vm.rs | 155 ++++++++++++++++++++--------------- 2 files changed, 95 insertions(+), 71 deletions(-) diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 60616b09f9a..3eb6044d430 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -26,7 +26,7 @@ use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; use crate::vmm_config::instance_info::{InstanceInfo, MemoryMappingsResponse, MemoryResponse}; -use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; +use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfigError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ @@ -662,9 +662,7 @@ impl RuntimeApiController { } GetMemoryMappings => { let locked_vmm = self.vmm.lock().expect("Poisoned lock"); - let mappings = locked_vmm - .vm - .guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + let mappings = locked_vmm.guest_memory_mappings(&VmInfo::from(&self.vm_resources)); Ok(VmmData::MemoryMappings(MemoryMappingsResponse { mappings })) } @@ -672,7 +670,10 @@ impl RuntimeApiController { let locked_vmm = self.vmm.lock().expect("Poisoned lock"); let (resident_bitmap, empty_bitmap) = locked_vmm .vm - .get_memory_info(&VmInfo::from(&self.vm_resources)) + .get_memory_info( + &locked_vmm.guest_memory(), + &VmInfo::from(&self.vm_resources), + ) .map_err(|e| VmmActionError::InternalVmm(VmmError::Vm(e)))?; Ok(VmmData::Memory(MemoryResponse { resident: resident_bitmap, diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 51b37354354..099e133928f 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -8,21 +8,12 @@ #[cfg(target_arch = "x86_64")] use std::fmt; -use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region}; -use kvm_ioctls::VmFd; -use serde::{Deserialize, Serialize}; -use vmm_sys_util::eventfd::EventFd; - -use crate::arch::host_page_size; -pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; -use crate::logger::info; -use crate::persist::{CreateSnapshotError, VmInfo}; -use crate::utils::u64_to_usize; -use crate::vmm_config::snapshot::SnapshotType; -use crate::vstate::memory::{ - Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, +use kvm_bindings::{ + kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, kvm_userspace_memory_region, + CpuId, MsrList, KVM_API_VERSION, KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, + KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_MAX_CPUID_ENTRIES, KVM_MEM_LOG_DIRTY_PAGES, + KVM_PIT_SPEAKER_DUMMY, }; -use kvm_bindings::{kvm_userspace_memory_region, KVM_API_VERSION, KVM_MEM_LOG_DIRTY_PAGES}; use kvm_ioctls::{Kvm, VmFd}; use serde::{Deserialize, Serialize}; @@ -31,9 +22,16 @@ use crate::arch::aarch64::gic::GICDevice; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::GicState; use crate::cpu_config::templates::KvmCapability; -#[cfg(target_arch = "x86_64")] -use crate::utils::u64_to_usize; +use crate::persist::VmInfo; +use crate::utils::{get_page_size, u64_to_usize}; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; +use crate::GuestMemoryRegionMapping; + +/// Get the host page size in bytes. +/// This should always succeed on a valid system. +fn host_page_size() -> usize { + get_page_size().expect("Failed to get system page size") +} /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work @@ -85,6 +83,8 @@ pub enum VmError { #[cfg(target_arch = "aarch64")] /// Failed to restore the VM's GIC state: {0} RestoreGic(crate::arch::aarch64::gic::GicError), + /// Invalid memory configuration: {0} + InvalidMemoryConfiguration(String), } /// Error type for [`Vm::restore_state`] @@ -265,50 +265,35 @@ impl Vm { pub fn fd(&self) -> &VmFd { &self.fd } -} - -#[cfg(target_arch = "aarch64")] -impl Vm { - const DEFAULT_CAPABILITIES: [u32; 7] = [ - kvm_bindings::KVM_CAP_IOEVENTFD, - kvm_bindings::KVM_CAP_IRQFD, - kvm_bindings::KVM_CAP_USER_MEMORY, - kvm_bindings::KVM_CAP_ARM_PSCI_0_2, - kvm_bindings::KVM_CAP_DEVICE_CTRL, - kvm_bindings::KVM_CAP_MP_STATE, - kvm_bindings::KVM_CAP_ONE_REG, - ]; - /// Creates the GIC (Global Interrupt Controller). - pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { - self.irqchip_handle = Some( - crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) - .map_err(VmError::VmCreateGIC)?, - ); - Ok(()) - } - - /// Gets a reference to the irqchip of the VM. - pub fn get_irqchip(&self) -> &GICDevice { - self.irqchip_handle.as_ref().expect("IRQ chip not set") - } - - /// Saves and returns the Kvm Vm state. - pub fn save_state(&self, mpidrs: &[u64]) -> Result { - Ok(VmState { - gic: self - .get_irqchip() - .save_device(mpidrs) - .map_err(VmError::SaveGic)?, - kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), - }) + /// Returns the memory mappings for the guest memory. + pub fn guest_memory_mappings( + guest_memory: &GuestMemoryMmap, + vm_info: &VmInfo, + ) -> Vec { + let mut offset = 0; + let mut mappings = Vec::new(); + for mem_region in guest_memory.iter() { + mappings.push(GuestMemoryRegionMapping { + base_host_virt_addr: mem_region.as_ptr() as u64, + size: mem_region.size(), + offset, + page_size: vm_info.huge_pages.page_size_kib(), + }); + offset += mem_region.size() as u64; + } + mappings } /// Gets the memory info (resident and empty pages) for all memory regions. /// Returns two bitmaps: resident (all resident pages) and empty (zero pages, subset of resident). /// This checks at the pageSize of each region and requires all regions to have the same page size. - pub fn get_memory_info(&self, vm_info: &VmInfo) -> Result<(Vec, Vec), VmError> { - let mappings = self.guest_memory_mappings(vm_info); + pub fn get_memory_info( + &self, + guest_memory: &GuestMemoryMmap, + vm_info: &VmInfo, + ) -> Result<(Vec, Vec), VmError> { + let mappings = Self::guest_memory_mappings(guest_memory, vm_info); if mappings.is_empty() { return Ok((Vec::new(), Vec::new())); @@ -338,8 +323,7 @@ impl Vm { for mapping in &mappings { // Find the memory region that matches this mapping - let mem_region = self - .guest_memory() + let mem_region = guest_memory .iter() .find(|region| region.as_ptr() as u64 == mapping.base_host_virt_addr) .expect("Memory region not found for mapping"); @@ -410,21 +394,60 @@ impl Vm { Ok((resident_bitmap, empty_bitmap)) } +} + +#[cfg(target_arch = "aarch64")] +impl Vm { + const DEFAULT_CAPABILITIES: [u32; 7] = [ + kvm_bindings::KVM_CAP_IOEVENTFD, + kvm_bindings::KVM_CAP_IRQFD, + kvm_bindings::KVM_CAP_USER_MEMORY, + kvm_bindings::KVM_CAP_ARM_PSCI_0_2, + kvm_bindings::KVM_CAP_DEVICE_CTRL, + kvm_bindings::KVM_CAP_MP_STATE, + kvm_bindings::KVM_CAP_ONE_REG, + ]; + + /// Creates the GIC (Global Interrupt Controller). + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { + self.irqchip_handle = Some( + crate::arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into(), None) + .map_err(VmError::VmCreateGIC)?, + ); + Ok(()) + } + + /// Gets a reference to the irqchip of the VM. + pub fn get_irqchip(&self) -> &GICDevice { + self.irqchip_handle.as_ref().expect("IRQ chip not set") + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self, mpidrs: &[u64]) -> Result { + Ok(VmState { + gic: self + .get_irqchip() + .save_device(mpidrs) + .map_err(VmError::SaveGic)?, + kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), + }) + } /// Resets the KVM dirty bitmap for each of the guest's memory regions. - pub fn reset_dirty_bitmap(&self) { - self.guest_memory() - .iter() - .zip(0u32..) - .for_each(|(region, slot)| { - let _ = self.fd().get_dirty_log(slot, u64_to_usize(region.len())); - }); + pub fn reset_dirty_bitmap(&self, guest_memory: &GuestMemoryMmap) { + guest_memory.iter().zip(0u32..).for_each(|(region, slot)| { + let _ = self.fd().get_dirty_log(slot, u64_to_usize(region.len())); + }); } /// Retrieves the KVM dirty bitmap for each of the guest's memory regions. - pub fn get_dirty_bitmap(&self) -> Result { - let mut bitmap: DirtyBitmap = HashMap::new(); - self.guest_memory() + pub fn get_dirty_bitmap( + &self, + guest_memory: &GuestMemoryMmap, + ) -> Result { + use std::collections::HashMap; + let mut bitmap: crate::DirtyBitmap = HashMap::new(); + guest_memory .iter() .zip(0u32..) .try_for_each(|(region, slot)| { From 85abaa221eba858813eb64e266caced3575f4b26 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 21 Dec 2025 02:34:41 -0800 Subject: [PATCH 11/18] Rollback deps --- Cargo.lock | 33 ++++++++++++++++++++++----------- src/vmm/Cargo.toml | 5 ----- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d515b776e19..9ad999e44d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,41 +125,46 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-fips-sys" -version = "0.13.7" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2608e5a7965cc9d58c56234d346c9c89b824c4c8652b6f047b3bd0a777c0644f" +checksum = "bf12b67bc9c5168f68655aadb2a12081689a58f1d9b1484705e4d1810ed6e4ac" dependencies = [ - "bindgen 0.69.5", + "bindgen 0.69.4", "cc", "cmake", "dunce", "fs_extra", - "regex", + "libc", + "paste", ] [[package]] name = "aws-lc-rs" -version = "1.13.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fcc8f365936c834db5514fc45aee5b1202d677e6b40e48468aaaa8183ca8c7" +checksum = "cdd82dba44d209fddb11c190e0a94b78651f95299598e472215667417a03ff1d" dependencies = [ "aws-lc-fips-sys", "aws-lc-sys", + "mirai-annotations", + "paste", "untrusted", "zeroize", ] [[package]] name = "aws-lc-sys" -version = "0.29.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61b1d86e7705efe1be1b569bab41d4fa1e14e220b60a160f78de2db687add079" +checksum = "df7a4168111d7eb622a31b214057b8509c0a7e1794f44c546d742330dc793972" dependencies = [ - "bindgen 0.69.5", + "bindgen 0.69.4", "cc", "cmake", "dunce", "fs_extra", + "libc", + "paste", ] [[package]] @@ -199,9 +204,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.5" +version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ "bitflags 2.6.0", "cexpr", @@ -950,6 +955,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "mirai-annotations" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" + [[package]] name = "nix" version = "0.27.1" diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 6b22376c482..782280d1a79 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -86,8 +86,3 @@ harness = false [lints] workspace = true - -[patch.crates-io] -aws-lc-sys = "=0.29.0" -aws-lc-rs = "=1.13.1" -aws-lc-fips-sys = "=0.13.7" From 2d550e6efe8cffa05bbb0aa2917e22d014f0bfa5 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 21 Dec 2025 08:46:35 -0800 Subject: [PATCH 12/18] Fix build script --- scripts/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.sh b/scripts/build.sh index 6f459f63e07..472cda0bf3b 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -5,7 +5,7 @@ set -euo pipefail # The format will be: v.._g — e.g. v1.7.2_g8bb88311 # Extract full version from src/firecracker/swagger/firecracker.yaml FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) -commit_hash=$(git rev-parse --short HEAD) +commit_hash=$(git rev-parse --short=8 HEAD) version_name="v${FC_VERSION}_g${commit_hash}" echo "Version name: $version_name" From 2757ca039eb87793bd3756aed6e5a2265f3b76a1 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 21 Dec 2025 08:58:35 -0800 Subject: [PATCH 13/18] Switch to 7 hash chars everywhere --- scripts/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.sh b/scripts/build.sh index 472cda0bf3b..4d8d64c918e 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -5,7 +5,7 @@ set -euo pipefail # The format will be: v.._g — e.g. v1.7.2_g8bb88311 # Extract full version from src/firecracker/swagger/firecracker.yaml FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) -commit_hash=$(git rev-parse --short=8 HEAD) +commit_hash=$(git rev-parse --short=7 HEAD) version_name="v${FC_VERSION}_g${commit_hash}" echo "Version name: $version_name" From 6c05dd52688398847ba8def797275f6fd9211db4 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 21 Dec 2025 09:53:18 -0800 Subject: [PATCH 14/18] Remove g --- scripts/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.sh b/scripts/build.sh index 4d8d64c918e..9f3bce8a384 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -6,7 +6,7 @@ set -euo pipefail # Extract full version from src/firecracker/swagger/firecracker.yaml FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) commit_hash=$(git rev-parse --short=7 HEAD) -version_name="v${FC_VERSION}_g${commit_hash}" +version_name="v${FC_VERSION}_${commit_hash}" echo "Version name: $version_name" echo "Starting to build Firecracker version: $version_name" From fb257a102b1191151e483f61165e7c5ea723dc56 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Mon, 22 Dec 2025 01:53:58 -0800 Subject: [PATCH 15/18] Cleanup --- Makefile | 2 +- scripts/build.sh | 2 +- src/vmm/src/vstate/vm.rs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1fda2f26881..e96fce61f33 100644 --- a/Makefile +++ b/Makefile @@ -9,4 +9,4 @@ upload: ./scripts/upload.sh $(GCP_PROJECT_ID) .PHONY: build-and-upload -make build-and-upload: build upload +build-and-upload: build upload diff --git a/scripts/build.sh b/scripts/build.sh index 9f3bce8a384..6b831c9b99f 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -2,7 +2,7 @@ set -euo pipefail -# The format will be: v.._g — e.g. v1.7.2_g8bb88311 +# The format will be: v.._ — e.g. v1.7.2_8bb88311 # Extract full version from src/firecracker/swagger/firecracker.yaml FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) commit_hash=$(git rev-parse --short=7 HEAD) diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 099e133928f..d776b44689b 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -321,6 +321,8 @@ impl Vm { // This is the most important optimization - avoids repeated allocations let zero_buf = vec![0u8; page_size]; + let sys_page_size = host_page_size(); + for mapping in &mappings { // Find the memory region that matches this mapping let mem_region = guest_memory @@ -333,7 +335,6 @@ impl Vm { let num_pages = region_size / page_size; // Use mincore on the entire region to check residency - let sys_page_size = host_page_size(); let mincore_pages = region_size.div_ceil(sys_page_size); let mut mincore_vec = vec![0u8; mincore_pages]; From 7eaafb6ebafa617d182169b0b0d40004972a6af9 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 18 Feb 2026 15:13:08 +0100 Subject: [PATCH 16/18] api: implement API for dirty memory Implement API /memory/dirty which returns a bitmap tracking dirty guest memory. The bitmap is structured as a vector of u64, so its length is: total_number_of_pages.div_ceil(64). Pages are ordered in the order of pages as reported by /memory/mappings. Signed-off-by: Babis Chalios --- .../seccomp/x86_64-unknown-linux-musl.json | 4 + .../src/api_server/parsed_request.rs | 10 +- .../src/api_server/request/memory.rs | 13 ++ src/vmm/src/lib.rs | 65 ++++++++++ src/vmm/src/rpc_interface.rs | 37 +++++- src/vmm/src/utils/mod.rs | 2 + src/vmm/src/utils/pagemap.rs | 111 ++++++++++++++++++ src/vmm/src/vmm_config/instance_info.rs | 8 ++ 8 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 src/vmm/src/utils/pagemap.rs diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index dea4b3ed83a..455572fa7b8 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -220,6 +220,10 @@ "syscall": "mincore", "comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident" }, + { + "syscall": "pread64", + "comment": "Used by get_dirty_memory to read pagemap entries" + }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 00c04a6a34b..ef629a398a4 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -20,7 +20,7 @@ use super::request::logger::parse_put_logger; use super::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; -use super::request::memory::{parse_get_memory, parse_get_memory_mappings}; +use super::request::memory::{parse_get_memory, parse_get_memory_dirty, parse_get_memory_mappings}; use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; @@ -85,6 +85,7 @@ impl TryFrom<&Request> for ParsedRequest { (Method::Get, "machine-config", None) => parse_get_machine_config(), (Method::Get, "memory", None) => match path_tokens.next() { Some("mappings") => parse_get_memory_mappings(), + Some("dirty") => parse_get_memory_dirty(), None => parse_get_memory(), _ => Err(RequestError::InvalidPathMethod( request_uri.to_string(), @@ -183,6 +184,7 @@ impl ParsedRequest { VmmData::InstanceInformation(info) => Self::success_response_with_data(info), VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings), VmmData::Memory(memory) => Self::success_response_with_data(memory), + VmmData::MemoryDirty(dirty) => Self::success_response_with_data(dirty), VmmData::VmmVersion(version) => Self::success_response_with_data( &serde_json::json!({ "firecracker_version": version.as_str() }), ), @@ -585,6 +587,9 @@ pub mod tests { VmmData::Memory(memory) => { http_response(&serde_json::to_string(memory).unwrap(), 200) } + VmmData::MemoryDirty(dirty) => { + http_response(&serde_json::to_string(dirty).unwrap(), 200) + } VmmData::VmmVersion(version) => http_response( &serde_json::json!({ "firecracker_version": version.as_str() }).to_string(), 200, @@ -615,6 +620,9 @@ pub mod tests { empty: vec![], }, )); + verify_ok_response_with(VmmData::MemoryDirty( + vmm::vmm_config::instance_info::MemoryDirty { bitmap: vec![] }, + )); verify_ok_response_with(VmmData::VmmVersion(String::default())); // Error. diff --git a/src/firecracker/src/api_server/request/memory.rs b/src/firecracker/src/api_server/request/memory.rs index e879d6b3b02..54df9f3d2a7 100644 --- a/src/firecracker/src/api_server/request/memory.rs +++ b/src/firecracker/src/api_server/request/memory.rs @@ -16,6 +16,11 @@ pub(crate) fn parse_get_memory() -> Result { Ok(ParsedRequest::new_sync(VmmAction::GetMemory)) } +pub(crate) fn parse_get_memory_dirty() -> Result { + METRICS.get_api_requests.instance_info_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::GetMemoryDirty)) +} + #[cfg(test)] mod tests { use super::*; @@ -36,4 +41,12 @@ mod tests { _ => panic!("Test failed."), } } + + #[test] + fn test_parse_get_memory_dirty_request() { + match parse_get_memory_dirty().unwrap().into_parts() { + (RequestAction::Sync(action), _) if *action == VmmAction::GetMemoryDirty => {} + _ => panic!("Test failed."), + } + } } diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 3054d9ae30e..4d8a43c60f4 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -276,6 +276,8 @@ pub enum VmmError { VmmObserverTeardown(vmm_sys_util::errno::Error), /// VMGenID error: {0} VMGenID(#[from] VmGenIdError), + /// Pagemap error: {0} + Pagemap(#[from] utils::pagemap::PagemapError), } /// Shorthand type for KVM dirty page bitmap. @@ -482,6 +484,69 @@ impl Vmm { mappings } + /// Get dirty pages bitmap for guest memory. + /// + /// Returns a bitmap where each bit represents whether a guest page has been written to + /// (i.e., present in RAM and not write-protected via userfaultfd). Pages are ordered + /// following the order of memory regions as returned by `guest_memory_mappings`. + pub fn get_dirty_memory(&self, page_size: usize) -> Result, VmmError> { + let pagemap = utils::pagemap::PagemapReader::new(page_size)?; + let mut dirty_bitmap = vec![]; + + let sys_page_size = utils::get_page_size().expect("Failed to get system page size"); + + for region in self.guest_memory().iter() { + let base_addr = region.as_ptr() as usize; + let len = region.size(); + let nr_pages = len / page_size; + + // Use mincore to get resident pages at guest page size granularity + let mincore_n = len.div_ceil(sys_page_size); + let mut mincore_vec = vec![0u8; mincore_n]; + + // SAFETY: base_addr points to a valid guest memory region we own. + let mincore_result = unsafe { + libc::mincore( + base_addr as *mut libc::c_void, + len, + mincore_vec.as_mut_ptr(), + ) + }; + + // Build dirty bitmap: check pagemap only for pages that mincore reports resident. + let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)]; + for page_idx in 0..nr_pages { + let page_offset = page_idx * page_size; + + let is_resident = if mincore_result == 0 { + let start = page_offset / sys_page_size; + let count = page_size.div_ceil(sys_page_size); + if start + count <= mincore_vec.len() { + mincore_vec[start..start + count] + .iter() + .any(|&v| (v & 0x1) != 0) + } else { + false + } + } else { + // If mincore failed, assume resident (conservative) + true + }; + + if is_resident { + let virt_addr = base_addr + page_offset; + if pagemap.is_page_dirty(virt_addr)? { + slot_bitmap[page_idx / 64] |= 1u64 << (page_idx % 64); + } + } + } + + dirty_bitmap.extend_from_slice(&slot_bitmap); + } + + Ok(dirty_bitmap) + } + /// Sets RDA bit in serial console pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { // When restoring from a previously saved state, there is no serial diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 3eb6044d430..42270c89161 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -25,7 +25,9 @@ use crate::vmm_config::balloon::{ use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; -use crate::vmm_config::instance_info::{InstanceInfo, MemoryMappingsResponse, MemoryResponse}; +use crate::vmm_config::instance_info::{ + InstanceInfo, MemoryDirty, MemoryMappingsResponse, MemoryResponse, VmState, +}; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfigError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; @@ -69,6 +71,8 @@ pub enum VmmAction { GetMemoryMappings, /// Get memory info (resident and empty pages). GetMemory, + /// Get guest memory dirty pages information + GetMemoryDirty, /// Get microVM version. GetVmmVersion, /// Flush the metrics. This action can only be called after the logger has been configured. @@ -168,6 +172,8 @@ pub enum VmmActionError { OperationNotSupportedPostBoot, /// The requested operation is not supported before starting the microVM. OperationNotSupportedPreBoot, + /// The requested operation is not supported while the microVM is running. + OperationNotSupportedWhileRunning, /// Start microvm error: {0} StartMicrovm(#[from] StartMicrovmError), /// Vsock config error: {0} @@ -197,6 +203,8 @@ pub enum VmmData { MemoryMappings(MemoryMappingsResponse), /// Memory info (resident and empty pages). Memory(MemoryResponse), + /// The guest memory dirty pages information + MemoryDirty(MemoryDirty), /// The microVM version. VmmVersion(String), } @@ -427,7 +435,9 @@ impl<'a> PrebootApiController<'a> { &self.vm_resources.vm_config, ))), GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())), - GetMemoryMappings | GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot), + GetMemoryMappings | GetMemory | GetMemoryDirty => { + Err(VmmActionError::OperationNotSupportedPreBoot) + } GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())), InsertBlockDevice(config) => self.insert_block_device(config), InsertNetworkDevice(config) => self.insert_net_device(config), @@ -680,6 +690,7 @@ impl RuntimeApiController { empty: empty_bitmap, })) } + GetMemoryDirty => self.get_dirty_memory_info(), GetVmmVersion => Ok(VmmData::VmmVersion( self.vmm.lock().expect("Poisoned lock").version(), )), @@ -779,6 +790,28 @@ impl RuntimeApiController { .map_err(VmmActionError::InternalVmm) } + /// Get dirty pages information for guest memory + fn get_dirty_memory_info(&self) -> Result { + let start_us = get_time_us(ClockType::Monotonic); + let vmm = self.vmm.lock().expect("Poisoned lock"); + + // Dirty page tracking via pagemap requires the VM to be paused so that guest + // pages are not modified while we are reading the pagemap. + if vmm.instance_info.state != VmState::Paused { + return Err(VmmActionError::OperationNotSupportedWhileRunning); + } + + let page_size = self.vm_resources.vm_config.huge_pages.page_size_kib(); + let bitmap = vmm + .get_dirty_memory(page_size) + .map_err(VmmActionError::InternalVmm)?; + + let elapsed_time_us = get_time_us(ClockType::Monotonic) - start_us; + info!("'get dirty memory' VMM action took {elapsed_time_us} us."); + + Ok(VmmData::MemoryDirty(MemoryDirty { bitmap })) + } + fn create_snapshot( &mut self, create_params: &CreateSnapshotParams, diff --git a/src/vmm/src/utils/mod.rs b/src/vmm/src/utils/mod.rs index a0ee2e90b6b..762aaa6bffb 100644 --- a/src/vmm/src/utils/mod.rs +++ b/src/vmm/src/utils/mod.rs @@ -9,6 +9,8 @@ pub mod net; pub mod signal; /// Module with state machine pub mod sm; +/// Module with pagemap utilities +pub mod pagemap; use std::num::Wrapping; use std::result::Result; diff --git a/src/vmm/src/utils/pagemap.rs b/src/vmm/src/utils/pagemap.rs new file mode 100644 index 00000000000..7cf626cb89c --- /dev/null +++ b/src/vmm/src/utils/pagemap.rs @@ -0,0 +1,111 @@ +//! Utilities for reading /proc/self/pagemap to track dirty pages. + +#![allow(clippy::cast_possible_wrap)] + +use std::fs::File; +use std::os::unix::io::AsRawFd; + +use crate::utils::get_page_size; + +const PAGEMAP_ENTRY_SIZE: usize = 8; + +/// Errors related to pagemap operations +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum PagemapError { + /// Failed to open /proc/self/pagemap: {0} + OpenPagemap(#[source] std::io::Error), + /// Failed to read pagemap entry: {0} + ReadEntry(#[source] std::io::Error), +} + +/// Represents a single entry in /proc/pid/pagemap. +/// +/// Each virtual page has an 8-byte entry with the following layout: +/// - Bits 0-54: Page frame number (PFN) if present +/// - Bit 55: Page is soft-dirty (written to since last clear) +/// - Bit 56: Page is exclusively mapped +/// - Bit 57: Page is write-protected via userfaultfd +/// - Bit 58: Unused +/// - Bit 59-60: Unused +/// - Bit 61: Page is file-page or shared-anon +/// - Bit 62: Page is swapped +/// - Bit 63: Page is present in RAM +#[derive(Debug, Clone, Copy)] +pub struct PagemapEntry { + raw: u64, +} + +impl PagemapEntry { + /// Create a PagemapEntry from bytes (little-endian) + pub fn from_bytes(bytes: [u8; 8]) -> Self { + Self { + raw: u64::from_ne_bytes(bytes), + } + } + + /// Check if page is write-protected via userfaultfd + pub fn is_write_protected(&self) -> bool { + (self.raw & (1u64 << 57)) != 0 + } + + /// Check if page is present in RAM (bit 63) + pub fn is_present(&self) -> bool { + (self.raw & (1u64 << 63)) != 0 + } +} + +/// Reader for /proc/self/pagemap +#[derive(Debug)] +pub struct PagemapReader { + pagemap_fd: File, +} + +impl PagemapReader { + /// Create a new PagemapReader + pub fn new(_page_size: usize) -> Result { + let pagemap_fd = File::open("/proc/self/pagemap").map_err(PagemapError::OpenPagemap)?; + + Ok(Self { pagemap_fd }) + } + + /// Check if a single page is dirty (write-protected bit cleared). + /// + /// Checks the first host page (4K) of the guest page at the given address. + /// For huge pages, all host pages within the huge page typically have the same + /// dirty status, so sampling the first is sufficient. + /// + /// # Arguments + /// * `virt_addr` - Virtual address of the page to check + /// + /// # Returns + /// True if the page is present and write-protected bit is cleared (dirty). + pub fn is_page_dirty(&self, virt_addr: usize) -> Result { + // Pagemap always uses host (4K) page size + let host_page_size = get_page_size().expect("Failed to get system page size"); + + // Calculate offset for this virtual page (using host page size) + let host_vpn = virt_addr / host_page_size; + let offset = (host_vpn * PAGEMAP_ENTRY_SIZE) as i64; + + let mut entry_bytes = [0u8; 8]; + + // SAFETY: pread is safe as long as the fd is valid and the buffer is properly sized + let ret = unsafe { + libc::pread( + self.pagemap_fd.as_raw_fd(), + entry_bytes.as_mut_ptr().cast(), + PAGEMAP_ENTRY_SIZE, + offset, + ) + }; + + if ret != PAGEMAP_ENTRY_SIZE as isize { + return Err(PagemapError::ReadEntry(std::io::Error::last_os_error())); + } + + let entry = PagemapEntry::from_bytes(entry_bytes); + + // Page must be present and the write_protected bit cleared (indicating it was written to) + Ok(entry.is_present() && !entry.is_write_protected()) + } +} diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index b945803cec9..c77c8d90cee 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -68,3 +68,11 @@ pub struct MemoryResponse { /// This is a subset of the resident pages. pub empty: Vec, } + +/// Information about dirty guest memory pages +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)] +pub struct MemoryDirty { + /// Bitmap for dirty pages. The bitmap is encoded as a vector of u64 values. + /// Each bit represents whether a page has been written since the last snapshot. + pub bitmap: Vec, +} From ac4bc0ef40442cb4f33e08c84876951984bd150f Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 18 Feb 2026 15:19:05 +0100 Subject: [PATCH 17/18] feat: enable write-protection on guest memory UFFD provides an API to enable write-protection for memory ranges tracked by a userfault file descriptor. Detailed information can be found here: https://docs.kernel.org/admin-guide/mm/userfaultfd.html. To use the feature, users need to register the memory region with UFFDIO_REGISTER_MODE_WP. Then, users need to enable explicitly write-protection for sub-ranges of the registered region. Writes in pages within write-protected memory ranges can be handled in one of two ways. In synchronous mode, writes in a protected page will cause kernel to send a write protection event over the userfaultfd. In asynchronous mode, the kernel will automatically handle writes to protected pages by clearing the write-protection bit. Userspace can later observe the write protection bit by looking into the corresponding entry of /proc//pagemap. This commit, uncoditionally, enables write protection for guest memory using the asynchronous mode. !NOTE!: asynchronous write protection requires (host) kernel version 6.7 or later). Signed-off-by: Babis Chalios --- Cargo.lock | 29 ++++++++++++++++++++++++++--- src/vmm/Cargo.toml | 6 +++++- src/vmm/src/lib.rs | 4 ++++ src/vmm/src/persist.rs | 26 +++++++++++++++++++++++--- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9ad999e44d0..cd65527cfa1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -622,7 +622,7 @@ dependencies = [ "serde_json", "thiserror", "timerfd", - "userfaultfd", + "userfaultfd 0.8.1", "utils", "vmm", "vmm-sys-util", @@ -1456,7 +1456,20 @@ dependencies = [ "libc", "nix 0.27.1", "thiserror", - "userfaultfd-sys", + "userfaultfd-sys 0.5.0", +] + +[[package]] +name = "userfaultfd" +version = "0.9.0" +source = "git+https://github.com/e2b-dev/userfaultfd-rs?branch=feat_write_protection#9f4f7b42adbb9bea59016f4af248ed547cf160f0" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "libc", + "nix 0.27.1", + "thiserror", + "userfaultfd-sys 0.6.0", ] [[package]] @@ -1470,6 +1483,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "userfaultfd-sys" +version = "0.6.0" +source = "git+https://github.com/e2b-dev/userfaultfd-rs?branch=feat_write_protection#9f4f7b42adbb9bea59016f4af248ed547cf160f0" +dependencies = [ + "bindgen 0.69.4", + "cc", + "cfg-if", +] + [[package]] name = "utf8parse" version = "0.2.2" @@ -1602,7 +1625,7 @@ dependencies = [ "slab", "thiserror", "timerfd", - "userfaultfd", + "userfaultfd 0.9.0", "utils", "vhost", "vm-allocator", diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 782280d1a79..afef9deac59 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -42,7 +42,11 @@ serde_json = "1.0.132" slab = "0.4.7" thiserror = "1.0.67" timerfd = "1.5.0" -userfaultfd = "0.8.1" +userfaultfd = { git = "https://github.com/e2b-dev/userfaultfd-rs", branch = "feat_write_protection", features = [ + "linux5_7", + "linux5_13", + "linux6_7" +] } utils = { path = "../utils" } vhost = { version = "0.13.0", features = ["vhost-user-frontend"] } vm-allocator = "0.1.0" diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 4d8a43c60f4..96288903af3 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -513,6 +513,10 @@ impl Vmm { ) }; + // TODO: if we don't support UFFD/async WP, we can completely skip this bit, as the + // UFFD handler already tracks dirty pages through the WriteProtected events. For the + // time being, we always do. + // // Build dirty bitmap: check pagemap only for pages that mincore reports resident. let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)]; for page_idx in 0..nr_pages { diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 74c23bb4f75..358050da7e8 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex}; use seccompiler::BpfThreadMap; use semver::Version; use serde::{Deserialize, Serialize}; -use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; +use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder}; use vmm_sys_util::sock_ctrl_msg::ScmSocket; #[cfg(target_arch = "aarch64")] @@ -536,6 +536,8 @@ pub enum GuestMemoryFromUffdError { Create(userfaultfd::Error), /// Failed to register memory address range with the userfaultfd object: {0} Register(userfaultfd::Error), + /// Failed to enable write protection on memory address range with the userfaultfd object: {0} + WriteProtect(userfaultfd::Error), /// Failed to connect to UDS Unix stream: {0} Connect(#[from] std::io::Error), /// Failed to sends file descriptor: {0} @@ -560,6 +562,10 @@ fn guest_memory_from_uffd( uffd_builder.require_features(FeatureFlags::EVENT_REMOVE); } + uffd_builder.require_features( + FeatureFlags::MISSING_HUGETLBFS | FeatureFlags::WP_ASYNC, + ); + let uffd = uffd_builder .close_on_exec(true) .non_blocking(true) @@ -568,8 +574,22 @@ fn guest_memory_from_uffd( .map_err(GuestMemoryFromUffdError::Create)?; for mem_region in guest_memory.iter() { - uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _) - .map_err(GuestMemoryFromUffdError::Register)?; + uffd.register_with_mode( + mem_region.as_ptr().cast(), + mem_region.size() as _, + RegisterMode::MISSING | RegisterMode::WRITE_PROTECT, + ) + .map_err(GuestMemoryFromUffdError::Register)?; + + // If memory is backed by huge pages, we can immediately write protect it. + // Otherwise (memory is backed by anonymous memory), write protecting here + // won't have any effect, as the write-protection bit for a page will be + // wiped when the first page fault occurs. These cases need to be handled + // directly from the UFFD handler. + if huge_pages.is_hugetlbfs() { + uffd.write_protect(mem_region.as_ptr().cast(), mem_region.size() as _) + .map_err(GuestMemoryFromUffdError::WriteProtect)?; + } } send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?; From 30cbb076d0f362bcc1e2898afbcde835681f116b Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Fri, 13 Feb 2026 16:00:06 -0800 Subject: [PATCH 18/18] ci: remove dependency changes test This is an optional test on the Firecracker side and most of the times it's ignored (when valid dependency changes happen). Having this fail blocks our fc-versions releases. Signed-off-by: Babis Chalios --- .../workflows/dependency_modification_check.yml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/workflows/dependency_modification_check.yml diff --git a/.github/workflows/dependency_modification_check.yml b/.github/workflows/dependency_modification_check.yml deleted file mode 100644 index ac6537af102..00000000000 --- a/.github/workflows/dependency_modification_check.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: Check no dependencies were modified - -on: pull_request - -jobs: - dependency_changed_check: - runs-on: ubuntu-latest - steps: - - name: "Checkout repository" - uses: actions/checkout@v3 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: "Check Cargo.lock not in changeset" - run: | - git fetch origin - git diff origin/$GITHUB_BASE_REF.. --name-only| ( ! grep "Cargo.lock")