From 4d3fa5c529653cdbe5448b4cd6b314dc59c24376 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 20 May 2026 16:05:37 -0700 Subject: [PATCH 01/10] Add openvmm as vmm option for wslc vms --- CMakeLists.txt | 46 +- cgmanifest.json | 11 + msipackage/CMakeLists.txt | 7 + msipackage/package.wix.in | 1 + packages.config | 2 +- src/linux/init/WSLCInit.cpp | 90 +- src/shared/inc/lxinitshared.h | 24 + src/windows/common/CMakeLists.txt | 15 + src/windows/common/ConsommeNetworking.cpp | 38 + src/windows/common/ConsommeNetworking.h | 41 + src/windows/common/WSLCUserSettings.cpp | 8 + src/windows/common/WSLCUserSettings.h | 2 + src/windows/service/exe/CMakeLists.txt | 20 + src/windows/service/exe/HcsVirtualMachine.cpp | 27 +- src/windows/service/exe/HcsVirtualMachine.h | 3 + .../service/exe/OpenVmmVirtualMachine.cpp | 881 ++++++++++++++++++ .../service/exe/OpenVmmVirtualMachine.h | 163 ++++ src/windows/service/exe/TtrpcClient.cpp | 445 +++++++++ src/windows/service/exe/TtrpcClient.h | 154 +++ .../service/exe/TtrpcEnvelopeCodec.cpp | 204 ++++ src/windows/service/exe/TtrpcEnvelopeCodec.h | 59 ++ .../service/exe/WSLCSessionManager.cpp | 37 +- src/windows/service/inc/wslc.idl | 17 +- src/windows/wslc/CMakeLists.txt | 4 + src/windows/wslcsession/DockerHTTPClient.cpp | 10 +- src/windows/wslcsession/DockerHTTPClient.h | 19 +- src/windows/wslcsession/WSLCSession.cpp | 22 +- .../wslcsession/WSLCVirtualMachine.cpp | 59 +- src/windows/wslcsession/WSLCVirtualMachine.h | 7 +- 29 files changed, 2388 insertions(+), 28 deletions(-) create mode 100644 src/windows/common/ConsommeNetworking.cpp create mode 100644 src/windows/common/ConsommeNetworking.h create mode 100644 src/windows/service/exe/OpenVmmVirtualMachine.cpp create mode 100644 src/windows/service/exe/OpenVmmVirtualMachine.h create mode 100644 src/windows/service/exe/TtrpcClient.cpp create mode 100644 src/windows/service/exe/TtrpcClient.h create mode 100644 src/windows/service/exe/TtrpcEnvelopeCodec.cpp create mode 100644 src/windows/service/exe/TtrpcEnvelopeCodec.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d6e9c1c96..05f519d2eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${TARGET_PLATFORM}) # BUNDLE_ONLY mode: skip all source builds; only configure the msixbundle target and nuspec files. # Used by the package pipeline stage to create the bundle from pre-built per-platform msix files. option(BUNDLE_ONLY "Only configure the msixbundle target, skip all source builds" OFF) +option(INCLUDE_OPENVMM "Include OpenVMM build integrations" OFF) if (BUNDLE_ONLY) if (NOT PACKAGE_VERSION) message(FATAL_ERROR "PACKAGE_VERSION is required for BUNDLE_ONLY mode") @@ -63,7 +64,8 @@ include(FetchContent) set(FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/_deps/${TARGET_PLATFORM}) -# N.B. Changes to any of the FetchContent dependencies below (GSL, nlohmannjson) must be reflected in cgmanifest.json +# N.B. Changes to any of the FetchContent dependencies below (GSL, nlohmannjson, yaml-cpp, boost, protobuf) +# must be reflected in cgmanifest.json FetchContent_Declare(GSL URL https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.tar.gz @@ -104,6 +106,48 @@ FetchContent_Declare( FetchContent_MakeAvailable(boost_headers) +set(protobuf_BUILD_TESTS OFF CACHE BOOL "" FORCE) +set(protobuf_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) +set(protobuf_INSTALL OFF CACHE BOOL "" FORCE) + +if (INCLUDE_OPENVMM) + FetchContent_Declare(protobuf + URL https://github.com/protocolbuffers/protobuf/releases/download/v34.1/protobuf-34.1.tar.gz + URL_HASH SHA256=e4e6ff10760cf747a2decd1867741f561b216bd60cc4038c87564713a6da1848) + + FetchContent_MakeAvailable(protobuf) + include(${protobuf_SOURCE_DIR}/cmake/protobuf-generate.cmake) +endif() + +# Adds protobuf-generated C++ sources from VMService.proto to the given TARGET. +# The target must already exist. This function handles code generation, include +# directories, library linking, and MSVC warning suppression for generated code. +function(wsl_add_openvmm_proto TARGET) + set(_proto_file ${WSL_DEVICE_HOST_SOURCE_DIR}/proto/VMService.proto) + set(_proto_out_dir ${CMAKE_CURRENT_BINARY_DIR}/generated) + + if (NOT EXISTS ${_proto_file}) + message(FATAL_ERROR "Expected DeviceHost proto file was not found: ${_proto_file}") + endif() + + protobuf_generate( + TARGET ${TARGET} + PROTOS ${_proto_file} + IMPORT_DIRS ${WSL_DEVICE_HOST_SOURCE_DIR}/proto ${protobuf_SOURCE_DIR}/src + PROTOC_OUT_DIR ${_proto_out_dir} + ) + + target_include_directories(${TARGET} PRIVATE ${_proto_out_dir}) + target_link_libraries(${TARGET} protobuf::libprotobuf) + + if (MSVC) + set_source_files_properties( + ${_proto_out_dir}/VMService.pb.cc + TARGET_DIRECTORY ${TARGET} + PROPERTIES COMPILE_OPTIONS "/wd4267;/wd4244;/wd4018") + endif() +endfunction() + # Import modules list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") diff --git a/cgmanifest.json b/cgmanifest.json index f81e699f31..3b14dfb3fa 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -84,6 +84,17 @@ "hash": "sha256:42f6e95cad6ec532fd372391373363b62a14af6d771056dbfc86160e6dfff7aa" } } + }, + { + "component": { + "type": "other", + "other": { + "name": "protobuf", + "version": "34.1", + "downloadUrl": "https://github.com/protocolbuffers/protobuf/releases/download/v34.1/protobuf-34.1.tar.gz", + "hash": "sha256:e4e6ff10760cf747a2decd1867741f561b216bd60cc4038c87564713a6da1848" + } + } } ] } \ No newline at end of file diff --git a/msipackage/CMakeLists.txt b/msipackage/CMakeLists.txt index 99586c9727..5583fad210 100644 --- a/msipackage/CMakeLists.txt +++ b/msipackage/CMakeLists.txt @@ -27,6 +27,13 @@ foreach(binary ${WINDOWS_BINARIES}) list(APPEND BINARIES_DEPENDENCIES "${PACKAGE_INPUT_DIR}/${binary}") endforeach() +if (INCLUDE_OPENVMM) + set(WSL_DEVICE_HOST_BINARIES wsldevicehost.dll;openvmm.exe) + foreach(binary ${WSL_DEVICE_HOST_BINARIES}) + list(APPEND BINARIES_DEPENDENCIES "${WSL_DEVICE_HOST_SOURCE_DIR}/bin/${TARGET_PLATFORM}/${binary}") + endforeach() +endif() + set(LINUX_BINARIES init;initrd.img) foreach(binary ${LINUX_BINARIES}) list(APPEND BINARIES_DEPENDENCIES "${BIN}/${binary}") diff --git a/msipackage/package.wix.in b/msipackage/package.wix.in index 2584248b68..28cba85b07 100644 --- a/msipackage/package.wix.in +++ b/msipackage/package.wix.in @@ -241,6 +241,7 @@ + diff --git a/packages.config b/packages.config index 5094472ed2..2acea0ff50 100644 --- a/packages.config +++ b/packages.config @@ -19,7 +19,7 @@ - + diff --git a/src/linux/init/WSLCInit.cpp b/src/linux/init/WSLCInit.cpp index 5def741aac..6da1474f9f 100644 --- a/src/linux/init/WSLCInit.cpp +++ b/src/linux/init/WSLCInit.cpp @@ -670,6 +670,94 @@ void HandleMessageImpl( Transaction.SendResultMessage(result < 0 ? errno : 0); } +void HandleMessageImpl( + wsl::shared::SocketChannel& Channel, + wsl::shared::Transaction& Transaction, + const WSLC_CONFIGURE_NETWORKING& Message, + const gsl::span& Buffer) +{ + int result = -EINVAL; + auto sendResult = wil::scope_exit([&]() { Transaction.SendResultMessage(result); }); + + const auto* iface = wsl::shared::string::FromSpan(Buffer, Message.InterfaceOffset); + const auto* address = wsl::shared::string::FromSpan(Buffer, Message.AddressOffset); + const auto* gateway = wsl::shared::string::FromSpan(Buffer, Message.GatewayOffset); + const auto* dnsServer = wsl::shared::string::FromSpan(Buffer, Message.DnsServerOffset); + + THROW_ERRNO_IF(EINVAL, iface == nullptr || address == nullptr || gateway == nullptr || dnsServer == nullptr); + + // Bring up the interface and configure the static address, route, and DNS. + auto configCmd = std::format( + "ip link set {} up && ip addr add {} dev {} && ip route add default via {}", + iface, address, iface, gateway); + + // Use a pipe to detect child completion. The child inherits the write end; + // when it exits (via execl or _exit), the write end is closed and read() + // returns 0. This avoids racing with the WSLC_WATCH_PROCESSES handler's + // waitpid(-1) which may reap the child before we can. + int pipeFds[2]{}; + THROW_LAST_ERROR_IF(pipe2(pipeFds, O_CLOEXEC) < 0); + wil::unique_fd pipeRead{pipeFds[0]}; + wil::unique_fd pipeWrite{pipeFds[1]}; + + int childPid = UtilCreateChildProcess("ConfigureNetworking", [&configCmd, &pipeWrite]() { + // Clear CLOEXEC on the write end so it stays open across execl. + // When the shell exits, the fd is closed and the parent's read returns. + fcntl(pipeWrite.get(), F_SETFD, 0); + execl("/bin/sh", "/bin/sh", "-c", configCmd.c_str(), nullptr); + LOG_ERROR("execl(/bin/sh) failed, {}", errno); + }); + + // Close the write end in the parent — only the child holds it now. + pipeWrite.reset(); + + if (childPid < 0) + { + result = -errno; + return; + } + + // Wait for the child to exit by reading from the pipe. When the child + // (and the shell it exec'd) exits, all write ends are closed and read + // returns 0. + char dummy{}; + TEMP_FAILURE_RETRY(read(pipeRead.get(), &dummy, sizeof(dummy))); + + // Try to reap the child. If WSLC_WATCH_PROCESSES already reaped it, we + // get ECHILD which is fine — the pipe close confirms the child exited. + int status = -1; + if (TEMP_FAILURE_RETRY(waitpid(childPid, &status, 0)) < 0) + { + if (errno == ECHILD) + { + // Child was already reaped by the WatchProcesses handler. + // The pipe confirmed it exited, so treat as success. + status = 0; + } + else + { + result = -errno; + return; + } + } + + result = UtilProcessChildExitCode(status, "ConfigureNetworking"); + if (result != 0) + { + return; + } + + // Write DNS configuration. + auto resolv = std::format("nameserver {}\n", dnsServer); + if (WriteToFile("/etc/resolv.conf", resolv.c_str()) < 0) + { + result = -errno; + return; + } + + result = 0; +} + void HandleMessageImpl(wsl::shared::SocketChannel& Channel, wsl::shared::Transaction& Transaction, const WSLC_UNMOUNT&, const gsl::span& Buffer) { auto* path = wsl::shared::string::FromMessageBuffer(Buffer); @@ -831,7 +919,7 @@ void ProcessMessage(wsl::shared::SocketChannel& Channel, wsl::shared::Transactio { try { - HandleMessage( + HandleMessage( Channel, Transaction, Type, Buffer); } catch (...) diff --git a/src/shared/inc/lxinitshared.h b/src/shared/inc/lxinitshared.h index eef6fc8a95..d47d11595f 100644 --- a/src/shared/inc/lxinitshared.h +++ b/src/shared/inc/lxinitshared.h @@ -403,6 +403,7 @@ typedef enum _LX_MESSAGE_TYPE LxMessageWSLCWatchProcesses, LxMessageWSLCProcessExited, LxMessageWSLCUnixConnect, + LxMessageWSLCConfigureNetworking, } LX_MESSAGE_TYPE, *PLX_MESSAGE_TYPE; @@ -513,6 +514,7 @@ inline auto ToString(LX_MESSAGE_TYPE messageType) X(LxMessageWSLCWatchProcesses) X(LxMessageWSLCProcessExited) X(LxMessageWSLCUnixConnect) + X(LxMessageWSLCConfigureNetworking) default: return ""; @@ -1836,6 +1838,28 @@ struct WSLC_UNIX_CONNECT PRETTY_PRINT(FIELD(Header), STRING_FIELD(PathOffset)); }; +struct WSLC_CONFIGURE_NETWORKING +{ + static inline auto Type = LxMessageWSLCConfigureNetworking; + using TResponse = RESULT_MESSAGE; + + DECLARE_MESSAGE_CTOR(WSLC_CONFIGURE_NETWORKING); + + MESSAGE_HEADER Header; + unsigned int InterfaceOffset{}; + unsigned int AddressOffset{}; // e.g., "10.0.0.2/24" + unsigned int GatewayOffset{}; // e.g., "10.0.0.1" + unsigned int DnsServerOffset{}; // e.g., "10.0.0.1" + char Buffer[]; + + PRETTY_PRINT( + FIELD(Header), + STRING_FIELD(InterfaceOffset), + STRING_FIELD(AddressOffset), + STRING_FIELD(GatewayOffset), + STRING_FIELD(DnsServerOffset)); +}; + typedef struct _LX_MINI_INIT_IMPORT_RESULT { static inline auto Type = LxMiniInitMessageImportResult; diff --git a/src/windows/common/CMakeLists.txt b/src/windows/common/CMakeLists.txt index 9551fcbaf0..10f68da681 100644 --- a/src/windows/common/CMakeLists.txt +++ b/src/windows/common/CMakeLists.txt @@ -25,6 +25,7 @@ set(SOURCES LxssMessagePort.cpp LxssSecurity.cpp LxssServerPort.cpp + ConsommeNetworking.cpp NatNetworking.cpp notifications.cpp Redirector.cpp @@ -100,6 +101,7 @@ set(HEADERS hcs_schema.h helpers.hpp hvsocket.hpp + ConsommeNetworking.h INetworkingEngine.h interop.hpp HandleIO.h @@ -147,6 +149,19 @@ target_precompile_headers(common PRIVATE precomp.h) set_target_properties(common PROPERTIES FOLDER windows) target_include_directories(common PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../service/mc/${TARGET_PLATFORM}/${CMAKE_BUILD_TYPE}) +# ATL headers (atlsafe.h) are needed by precomp.h but not automatically included +# by CMake. Derive the path from the MSVC toolset directory. +get_filename_component(_MSVC_TOOLS_DIR "${CMAKE_LINKER}" DIRECTORY) # .../bin/Hostx64/x64 +get_filename_component(_MSVC_TOOLS_DIR "${_MSVC_TOOLS_DIR}" DIRECTORY) # .../bin/Hostx64 +get_filename_component(_MSVC_TOOLS_DIR "${_MSVC_TOOLS_DIR}" DIRECTORY) # .../bin +get_filename_component(_MSVC_TOOLS_DIR "${_MSVC_TOOLS_DIR}" DIRECTORY) # .../MSVC/ +set(_ATL_INCLUDE_DIR "${_MSVC_TOOLS_DIR}/atlmfc/include") +if(EXISTS "${_ATL_INCLUDE_DIR}") + target_include_directories(common PRIVATE "${_ATL_INCLUDE_DIR}") +else() + message(WARNING "ATL include directory not found: ${_ATL_INCLUDE_DIR}") +endif() + # WSLCUserSettings.cpp uses yaml-cpp headers. set_source_files_properties(WSLCUserSettings.cpp PROPERTIES INCLUDE_DIRECTORIES "${yaml-cpp_SOURCE_DIR}/include" diff --git a/src/windows/common/ConsommeNetworking.cpp b/src/windows/common/ConsommeNetworking.cpp new file mode 100644 index 0000000000..6383fc2181 --- /dev/null +++ b/src/windows/common/ConsommeNetworking.cpp @@ -0,0 +1,38 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +#include "precomp.h" +#include "ConsommeNetworking.h" + +using wsl::core::ConsommeNetworking; + +ConsommeNetworking::ConsommeNetworking(bool enableLocalhostRelay) : m_enableLocalhostRelay(enableLocalhostRelay) +{ +} + +void ConsommeNetworking::Initialize() +{ + // No host-side initialization needed. Consomme configures NAT, DHCP, and + // DNS inside the VMM process at VM boot time. + WSL_LOG("ConsommeNetworking::Initialize"); +} + +void ConsommeNetworking::TraceLoggingRundown() noexcept +{ + WSL_LOG( + "ConsommeNetworking::TraceLoggingRundown", + TraceLoggingValue("Consomme", "NetworkingMode"), + TraceLoggingValue(m_enableLocalhostRelay, "LocalhostRelay")); +} + +void ConsommeNetworking::FillInitialConfiguration(LX_MINI_INIT_NETWORKING_CONFIGURATION& message) +{ + message.NetworkingMode = LxMiniInitNetworkingModeNat; + message.EnableDhcpClient = true; + message.DisableIpv6 = false; + message.PortTrackerType = m_enableLocalhostRelay ? LxMiniInitPortTrackerTypeRelay : LxMiniInitPortTrackerTypeNone; +} + +void ConsommeNetworking::StartPortTracker(wil::unique_socket&& socket) +{ + WSL_LOG("ConsommeNetworking::StartPortTracker", TraceLoggingValue("no-op", "Status")); +} diff --git a/src/windows/common/ConsommeNetworking.h b/src/windows/common/ConsommeNetworking.h new file mode 100644 index 0000000000..a09915998b --- /dev/null +++ b/src/windows/common/ConsommeNetworking.h @@ -0,0 +1,41 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +#pragma once + +#include "INetworkingEngine.h" + +namespace wsl::core { + +// Default network parameters for consomme's built-in NAT/DHCP. +// These match the consomme backend defaults in OpenVMM. +constexpr auto c_consommeGuestIp = "10.0.0.2"; +constexpr auto c_consommeGatewayIp = "10.0.0.1"; +constexpr auto c_consommeSubnetMask = "24"; +constexpr auto c_consommeInterface = "eth0"; + +// Networking engine for OpenVMM's built-in consomme NAT backend. +// +// Unlike NatNetworking and VirtioNetworking which manage the guest's network +// configuration from the host via GNS, consomme handles NAT, DHCP, and DNS +// entirely within the VMM process. +class ConsommeNetworking final : public INetworkingEngine +{ +public: + explicit ConsommeNetworking(bool enableLocalhostRelay); + ~ConsommeNetworking() override = default; + + ConsommeNetworking(const ConsommeNetworking&) = delete; + ConsommeNetworking(ConsommeNetworking&&) = delete; + ConsommeNetworking& operator=(const ConsommeNetworking&) = delete; + ConsommeNetworking& operator=(ConsommeNetworking&&) = delete; + + void Initialize() override; + void TraceLoggingRundown() noexcept override; + void FillInitialConfiguration(LX_MINI_INIT_NETWORKING_CONFIGURATION& message) override; + void StartPortTracker(wil::unique_socket&& socket) override; + +private: + bool m_enableLocalhostRelay{}; +}; + +} // namespace wsl::core diff --git a/src/windows/common/WSLCUserSettings.cpp b/src/windows/common/WSLCUserSettings.cpp index ecefba832e..1bd8cb6cf3 100644 --- a/src/windows/common/WSLCUserSettings.cpp +++ b/src/windows/common/WSLCUserSettings.cpp @@ -47,6 +47,9 @@ static constexpr std::string_view s_DefaultSettingsTemplate = " # Maximum disk image size (e.g. 500GB default: 1TB)\n" " # maxStorageSize: default\n" "\n" + " # Use OpenVMM as the virtual machine backend (experimental, default: false)\n" + " # openVmm: false\n" + "\n" "# Credential storage backend: \"wincred\" or \"file\" (default: wincred)\n" "# credentialStore: wincred\n"; @@ -116,6 +119,11 @@ namespace details { return value; } + WSLC_VALIDATE_SETTING(SessionOpenVmm) + { + return value; + } + WSLC_VALIDATE_SETTING(CredentialStore) { if (value == "wincred") diff --git a/src/windows/common/WSLCUserSettings.h b/src/windows/common/WSLCUserSettings.h index b7b9549db8..445c458576 100644 --- a/src/windows/common/WSLCUserSettings.h +++ b/src/windows/common/WSLCUserSettings.h @@ -41,6 +41,7 @@ enum class Setting : size_t SessionNetworkingMode, SessionHostFileShareMode, SessionDnsTunneling, + SessionOpenVmm, CredentialStore, Max @@ -88,6 +89,7 @@ namespace details { DEFINE_SETTING_MAPPING(SessionNetworkingMode, std::string, WSLCNetworkingMode, WSLCNetworkingModeVirtioProxy, "session.networkingMode") DEFINE_SETTING_MAPPING(SessionHostFileShareMode, std::string, HostFileShareMode, HostFileShareMode::VirtioFs, "session.hostFileShareMode") DEFINE_SETTING_MAPPING(SessionDnsTunneling, bool, bool, true, "session.dnsTunneling") + DEFINE_SETTING_MAPPING(SessionOpenVmm, bool, bool, false, "session.openVmm") DEFINE_SETTING_MAPPING(CredentialStore, std::string, CredentialStoreType, CredentialStoreType::WinCred, "credentialStore") #undef DEFINE_SETTING_MAPPING diff --git a/src/windows/service/exe/CMakeLists.txt b/src/windows/service/exe/CMakeLists.txt index c7b4ebb0dd..757dfe2daa 100644 --- a/src/windows/service/exe/CMakeLists.txt +++ b/src/windows/service/exe/CMakeLists.txt @@ -57,8 +57,28 @@ set(HEADERS WSLCSessionManagerFactory.h WSLCPluginNotifier.h) +if (INCLUDE_OPENVMM) + list(APPEND SOURCES + OpenVmmVirtualMachine.cpp + TtrpcEnvelopeCodec.cpp + TtrpcClient.cpp) + list(APPEND HEADERS + OpenVmmVirtualMachine.h + TtrpcEnvelopeCodec.h + TtrpcClient.h) +endif() + add_executable(wslservice ${SOURCES} ${HEADERS}) add_dependencies(wslservice wslserviceidl wslservicemc) +target_compile_definitions(wslservice PRIVATE WSL_INCLUDE_OPENVMM=$) + +if (INCLUDE_OPENVMM) + wsl_add_openvmm_proto(wslservice) + + if (MSVC) + set_source_files_properties(TtrpcClient.cpp PROPERTIES COMPILE_OPTIONS "/wd4267;/wd4244;/wd4018") + endif() +endif() add_compile_definitions(__WRL_CLASSIC_COM__) add_compile_definitions(__WRL_DISABLE_STATIC_INITIALIZE__) add_compile_definitions(USE_COM_CONTEXT_DEF=1) diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp index 8939867023..36ba3a4452 100644 --- a/src/windows/service/exe/HcsVirtualMachine.cpp +++ b/src/windows/service/exe/HcsVirtualMachine.cpp @@ -289,6 +289,9 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings) // Create a listening socket for mini_init to connect to once the VM is running. m_listenSocket = wsl::windows::common::hvsocket::Listen(m_vmId, LX_INIT_UTILITY_VM_INIT_PORT); + // Create a listening socket for crash dump collection from the guest. + m_crashDumpListenSocket = wsl::windows::common::hvsocket::Listen(m_vmId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT); + // Start the virtual machine hcs::StartComputeSystem(m_computeSystem.get(), json.c_str()); @@ -789,4 +792,26 @@ void HcsVirtualMachine::FreeLun(ULONG Lun) THROW_HR_IF(E_INVALIDARG, !m_lunBitmap[Lun]); m_lunBitmap[Lun] = false; -} \ No newline at end of file +} + +HRESULT HcsVirtualMachine::ConnectToVsockPort(_In_ ULONG Port, _Out_ HANDLE* Socket) +try +{ + auto socket = wsl::windows::common::hvsocket::Connect(m_vmId, Port); + *Socket = reinterpret_cast(socket.release()); + return S_OK; +} +CATCH_RETURN() + +HRESULT HcsVirtualMachine::AcceptCrashDumpConnection(_Out_ HANDLE* Socket) +try +{ + auto socket = wsl::windows::common::hvsocket::CancellableAccept( + m_crashDumpListenSocket.get(), INFINITE, m_vmExitEvent.get()); + + THROW_HR_IF(E_ABORT, !socket.has_value()); + + *Socket = reinterpret_cast(socket->release()); + return S_OK; +} +CATCH_RETURN() \ No newline at end of file diff --git a/src/windows/service/exe/HcsVirtualMachine.h b/src/windows/service/exe/HcsVirtualMachine.h index a0c25e635c..f6d085044d 100644 --- a/src/windows/service/exe/HcsVirtualMachine.h +++ b/src/windows/service/exe/HcsVirtualMachine.h @@ -45,6 +45,8 @@ class HcsVirtualMachine IFACEMETHOD(AddShare)(_In_ LPCWSTR WindowsPath, _In_ BOOL ReadOnly, _Out_ GUID* ShareId) override; IFACEMETHOD(RemoveShare)(_In_ REFGUID ShareId) override; IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; + IFACEMETHOD(ConnectToVsockPort)(_In_ ULONG Port, _Out_ HANDLE* Socket) override; + IFACEMETHOD(AcceptCrashDumpConnection)(_Out_ HANDLE* Socket) override; private: struct DiskInfo @@ -80,6 +82,7 @@ class HcsVirtualMachine WSLCNetworkingMode m_networkingMode{}; wil::unique_socket m_listenSocket; + wil::unique_socket m_crashDumpListenSocket; std::shared_ptr m_dmesgCollector; std::shared_ptr m_guestDeviceManager; std::optional m_natConfig; diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp new file mode 100644 index 0000000000..12f8bd5c3e --- /dev/null +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -0,0 +1,881 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +/*++ + +Module Name: + + OpenVmmVirtualMachine.cpp + +Abstract: + + Implementation of IWSLCVirtualMachine using OpenVMM as the VMM backend. + + Spawns openvmm.exe in ttrpc orchestration mode and configures the VM via + vmservice RPCs (CreateVM, ResumeVM, ModifyResource, etc.). + + Current limitations: + - AddShare/RemoveShare require vmservice.proto extensions. + - GPU passthrough is not supported. + +--*/ + +#include "precomp.h" + +#include "OpenVmmVirtualMachine.h" +#include +#include +#include +#include "wslutil.h" +#include "lxinitshared.h" +#include "ConsommeNetworking.h" + +using namespace wsl::windows::common; +using wsl::windows::service::wslc::OpenVmmVirtualMachine; +using wsl::windows::service::wslc::TtrpcClient; +namespace wslutil = wsl::windows::common::wslutil; + +OpenVmmVirtualMachine::OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Settings) +{ + THROW_HR_IF(E_POINTER, Settings == nullptr); + + std::lock_guard lock(m_lock); + + THROW_IF_FAILED(CoCreateGuid(&m_vmId)); + m_vmIdString = wsl::shared::string::GuidToString(m_vmId, wsl::shared::string::GuidToStringFlags::Uppercase); + m_featureFlags = Settings->FeatureFlags; + + // Disable features not yet supported by the OpenVMM backend. + WI_ClearFlag(m_featureFlags, WslcFeatureFlagsGPU); + WI_ClearFlag(m_featureFlags, WslcFeatureFlagsVirtioFs); + + m_networkingMode = Settings->NetworkingMode; + m_bootTimeoutMs = Settings->BootTimeoutMs; + m_cpuCount = Settings->CpuCount; + m_memoryMb = Settings->MemoryMb; + + // Resolve paths for kernel, initrd, and root VHD. + auto basePath = wslutil::GetBasePath(); + +#ifdef WSL_KERNEL_PATH + m_kernelPath = std::filesystem::path(WSL_KERNEL_PATH); +#else + m_kernelPath = basePath / L"tools" / L"vmlinux"; + if (!std::filesystem::exists(m_kernelPath)) + { + // Fall back to the standard kernel name if vmlinux is not found. + m_kernelPath = basePath / L"tools" / LXSS_VM_MODE_KERNEL_NAME; + } +#endif + + m_initrdPath = basePath / L"tools" / LXSS_VM_MODE_INITRD_NAME; + +#ifdef WSL_KERNEL_MODULES_PATH + m_modulesVhdPath = std::filesystem::path(TEXT(WSL_KERNEL_MODULES_PATH)); +#else + m_modulesVhdPath = basePath / L"tools" / L"modules.vhd"; +#endif + + if (Settings->RootVhdOverride != nullptr) + { + m_rootVhdPath = Settings->RootVhdOverride; + } + else + { +#ifdef WSL_SYSTEM_DISTRO_PATH + m_rootVhdPath = TEXT(WSL_SYSTEM_DISTRO_PATH); +#else + m_rootVhdPath = std::filesystem::path(wslutil::GetMsiPackagePath().value()) / L"system.vhd"; +#endif + } + + // Locate openvmm.exe. Expect it alongside the WSL binaries. + m_openvmmPath = basePath / L"openvmm.exe"; + THROW_HR_IF_MSG( + HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND), + !std::filesystem::exists(m_openvmmPath), + "openvmm.exe not found at: %ls", + m_openvmmPath.c_str()); + + // Pre-create the container storage VHDX so it's ready for hot-attach. + // WSLCSession::ConfigureStorage will attach, format, and mount it later. + if (Settings->StoragePath != nullptr) + { + std::filesystem::path storagePath{Settings->StoragePath}; + m_storageVhdPath = storagePath / L"storage.vhdx"; + + std::filesystem::create_directories(storagePath); + if (!std::filesystem::exists(m_storageVhdPath)) + { + VIRTUAL_STORAGE_TYPE storageType{VIRTUAL_STORAGE_TYPE_DEVICE_VHDX, VIRTUAL_STORAGE_TYPE_VENDOR_MICROSOFT}; + CREATE_VIRTUAL_DISK_PARAMETERS createParams{}; + createParams.Version = CREATE_VIRTUAL_DISK_VERSION_2; + createParams.Version2.MaximumSize = Settings->MaximumStorageSizeMb * 1024ULL * 1024ULL; + wil::unique_hfile diskHandle; + THROW_IF_WIN32_ERROR_MSG(CreateVirtualDisk( + &storageType, m_storageVhdPath.c_str(), VIRTUAL_DISK_ACCESS_NONE, + nullptr, CREATE_VIRTUAL_DISK_FLAG_NONE, 0, &createParams, nullptr, &diskHandle), + "Failed to create storage VHDX: %ls", m_storageVhdPath.c_str()); + } + } + + // Build kernel command line matching HcsVirtualMachine's format. + m_kernelCmdLine = L"initrd=\\" LXSS_VM_MODE_INITRD_NAME L" " TEXT(WSLC_ROOT_INIT_ENV) L"=1 panic=-1"; + m_kernelCmdLine += std::format(L" nr_cpus={}", Settings->CpuCount); + + // Append common WSL kernel parameters (timesync, printk, page reporting). + helpers::AppendCommonKernelCommandLine(m_kernelCmdLine, c_pageReportingOrder); + + // Setup dmesg collector with optional DmesgOutput handle, matching HcsVirtualMachine. + // The DmesgCollector creates named pipes that we pass to OpenVMM via serial and + // virtio console configs to capture kernel output. + wil::unique_handle dmesgOutputHandle; + if (Settings->DmesgOutput.Handle.File != nullptr && Settings->DmesgOutput.Handle.File != INVALID_HANDLE_VALUE) + { + dmesgOutputHandle.reset(wslutil::DuplicateHandle(wslutil::FromCOMInputHandle(Settings->DmesgOutput), GENERIC_WRITE | SYNCHRONIZE)); + } + + // REVIEW: Can we always enable earlycon? + m_dmesgCollector = DmesgCollector::Create( + m_vmId, m_vmExitEvent, true, false, L"", true /* earlycon */, std::move(dmesgOutputHandle)); + + // Earlycon captures kernel output via COM1 before the hvc0 driver loads. + m_kernelCmdLine += L" earlycon=uart8250,io,0x3f8,115200"; + + m_kernelCmdLine += L" console=hvc0 debug"; + + // Set up vsock bridge path for HvSocket emulation. + // OpenVMM uses a Unix domain socket for the hybrid_vsock bridge. + // The hybrid_vsock bridge appends "_" (e.g. "_50000") to this path, + // and Unix domain sockets have a 108-byte path limit on Windows. + // The SYSTEM profile temp path is too long, so use a short fixed directory. + auto vsockDir = std::filesystem::path(c_vsockBridgeDir); + std::filesystem::create_directories(vsockDir); + // Use first 8 chars of the GUID to keep it short but unique. + m_vsockPath = vsockDir / std::format(L"vm-{:.8}", m_vmIdString); + + // Set up the ttrpc socket path for runtime VM management. + m_ttrpcSocketPath = vsockDir / std::format(L"vm-{:.8}.ttrpc", m_vmIdString); + DeleteFileW(m_ttrpcSocketPath.c_str()); + + // Setup boot VHDs — use the same pattern as HcsVirtualMachine. + auto attachBootDisk = [&](PCWSTR path) { + const ULONG lun = AllocateLun(); + DiskInfo disk{path, true}; + m_attachedDisks.emplace(lun, std::move(disk)); + }; + + attachBootDisk(m_rootVhdPath.c_str()); + attachBootDisk(m_modulesVhdPath.c_str()); + + auto cleanupOnFailure = wil::scope_exit([this]() { + m_vmExitEvent.SetEvent(); + + if (m_ttrpcClient) + { + m_ttrpcClient->Disconnect(); + m_ttrpcClient.reset(); + } + + if (m_processHandle) + { + TerminateProcess(m_processHandle.get(), 1); + } + + if (m_processWatchThread.joinable()) + { + m_processWatchThread.join(); + } + + if (m_initListenSocket != INVALID_SOCKET) + { + closesocket(m_initListenSocket); + m_initListenSocket = INVALID_SOCKET; + } + DeleteFileW(m_initListenPath.c_str()); + + if (m_crashDumpListenSocket != INVALID_SOCKET) + { + closesocket(m_crashDumpListenSocket); + m_crashDumpListenSocket = INVALID_SOCKET; + } + DeleteFileW(m_crashDumpListenPath.c_str()); + + try + { + if (!m_ttrpcSocketPath.empty()) + { + std::filesystem::remove(m_ttrpcSocketPath); + } + } + CATCH_LOG() + }); + + // Create Unix domain socket listeners for the hybrid_vsock bridge BEFORE + // launching openvmm. The guest connects to vsock ports immediately on boot, + // and OpenVMM's hybrid_vsock bridge relays connections to the host. + // + // The bridge uses the HvSocket GUID template to construct the path: + // port 50000 (0xC350) becomes GUID 0000c350-facb-11e6-bd58-64006a7986d3, + // and the bridge looks for _ on the host. + std::tie(m_initListenSocket, m_initListenPath) = + CreateVsockListener(LX_INIT_UTILITY_VM_INIT_PORT); + + std::tie(m_crashDumpListenSocket, m_crashDumpListenPath) = + CreateVsockListener(LX_INIT_UTILITY_VM_CRASH_DUMP_PORT); + + // Launch the openvmm process. + LaunchOpenVmm(); + + cleanupOnFailure.release(); +} + +std::pair OpenVmmVirtualMachine::CreateVsockListener(ULONG port) +{ + auto portHex = std::format(L"{:08x}", port); + auto listenPath = std::format(L"{}_{}-facb-11e6-bd58-64006a7986d3", m_vsockPath.wstring(), portHex); + DeleteFileW(listenPath.c_str()); + + SOCKET listenSocket = ::socket(AF_UNIX, SOCK_STREAM, 0); + THROW_LAST_ERROR_IF(listenSocket == INVALID_SOCKET); + auto closeOnFailure = wil::scope_exit([&] { closesocket(listenSocket); }); + + sockaddr_un addr{}; + addr.sun_family = AF_UNIX; + auto narrowPath = wsl::shared::string::WideToMultiByte(listenPath); + THROW_HR_IF_MSG(E_INVALIDARG, narrowPath.size() >= sizeof(addr.sun_path), + "vsock bridge path too long: %hs", narrowPath.c_str()); + memcpy(addr.sun_path, narrowPath.c_str(), narrowPath.size() + 1); + + THROW_LAST_ERROR_IF(bind(listenSocket, reinterpret_cast(&addr), sizeof(addr)) == SOCKET_ERROR); + THROW_LAST_ERROR_IF(listen(listenSocket, 1) == SOCKET_ERROR); + + WSL_LOG("OpenVmmVsockListenerReady", + TraceLoggingValue(listenPath.c_str(), "ListenPath"), + TraceLoggingValue(port, "Port")); + + closeOnFailure.release(); + return {listenSocket, std::move(listenPath)}; +} + +std::wstring OpenVmmVirtualMachine::BuildCommandLine() const +{ + std::wstring cmd = std::format(L"\"{}\"", m_openvmmPath.wstring()); + cmd += std::format(L" --ttrpc \"{}\"", m_ttrpcSocketPath.wstring()); + + return cmd; +} + +TtrpcClient::VmConfig OpenVmmVirtualMachine::BuildVmConfig() const +{ + TtrpcClient::VmConfig config; + + config.KernelPath = wsl::shared::string::WideToMultiByte(m_kernelPath.wstring()); + config.InitrdPath = wsl::shared::string::WideToMultiByte(m_initrdPath.wstring()); + + // Kernel command line — the server prepends "panic=-1 debug pci=off console=ttyS0 " + // automatically via HyperVGen2LinuxDirect chipset type. + config.KernelCmdLine = wsl::shared::string::WideToMultiByte(m_kernelCmdLine); + + // Ensure 2MB granularity. Cap at 4GB because OpenVMM on WHP allocates guest RAM upfront. + constexpr ULONG c_maxMemoryMb = 4096; + config.MemoryMb = std::min(m_memoryMb, c_maxMemoryMb) & ~0x1; + + config.ProcessorCount = m_cpuCount; + + // HvSocket bridge via vsock path (for the guest init connection). + config.HvSocketPath = wsl::shared::string::WideToMultiByte(m_vsockPath.wstring()); + + // Boot disks: root VHD (LUN 0) and modules VHD (LUN 1), both read-only. + for (const auto& [lun, disk] : m_attachedDisks) + { + config.ScsiDisks.push_back({ + .Controller = 0, + .Lun = lun, + .HostPath = wsl::shared::string::WideToMultiByte(disk.Path), + .ReadOnly = disk.ReadOnly, + }); + } + + if (m_networkingMode == WSLCNetworkingModeConsomme) + { + // Generate a deterministic NIC instance ID from the VM ID so it's + // stable across restarts but unique per VM. + GUID nicGuid = m_vmId; + nicGuid.Data1 ^= c_nicGuidXorMask; + + config.Nic = TtrpcClient::VmConfig::ConsommeNic{ + .NicId = wsl::shared::string::GuidToString(nicGuid), + .MacAddress = c_defaultConsommeMacAddress, + }; + } + + // COM1 (port 0) — earlycon output before hvc0 loads. + config.SerialPorts.push_back({ + .Port = 0, + .SocketPath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->EarlyConsoleName()), + }); + + // Virtio console (/dev/hvc0) — primary console after boot. + config.VirtioConsolePath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->VirtioConsoleName()); + + return config; +} + +void OpenVmmVirtualMachine::LaunchOpenVmm() +{ + auto cmd = BuildCommandLine(); + + WSL_LOG("LaunchOpenVmm", TraceLoggingValue(cmd.c_str(), "cmd")); + + SubProcess process(m_openvmmPath.c_str(), cmd.c_str()); + + // Redirect stdout and stderr to a log file for diagnostics. + SECURITY_ATTRIBUTES sa{sizeof(sa), nullptr, TRUE}; + auto logPath = m_vsockPath.wstring() + L".log"; + wil::unique_hfile logFile{CreateFileW( + logPath.c_str(), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, &sa, + CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)}; + + process.SetStdHandles(nullptr, logFile.get(), logFile.get()); + + // Start the process. The returned handle is the process handle. + m_processHandle = process.Start(); + + // Kill-on-close job object ensures the child is terminated if the service + // exits without running our destructor. + m_jobObject.reset(CreateJobObjectW(nullptr, nullptr)); + THROW_LAST_ERROR_IF(!m_jobObject); + + JOBOBJECT_EXTENDED_LIMIT_INFORMATION jobLimits{}; + jobLimits.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE; + THROW_IF_WIN32_BOOL_FALSE(SetInformationJobObject( + m_jobObject.get(), JobObjectExtendedLimitInformation, &jobLimits, sizeof(jobLimits))); + THROW_IF_WIN32_BOOL_FALSE(AssignProcessToJobObject(m_jobObject.get(), m_processHandle.get())); + + logFile.reset(); + + // Monitor the openvmm process and signal m_vmExitEvent on exit. + m_processWatchThread = std::thread(&OpenVmmVirtualMachine::WatchProcessExit, this); + + m_ttrpcClient = std::make_unique(); + THROW_IF_FAILED_MSG( + m_ttrpcClient->Connect(m_ttrpcSocketPath.wstring(), TtrpcClient::c_defaultTimeoutMs), + "Failed to connect to OpenVMM ttrpc server"); + + auto vmConfig = BuildVmConfig(); + THROW_IF_FAILED_MSG( + m_ttrpcClient->CreateVm(vmConfig), + "Failed to create VM via ttrpc CreateVM"); + + THROW_IF_FAILED_MSG( + m_ttrpcClient->ResumeVm(), + "Failed to resume VM via ttrpc ResumeVM"); + +} + +void OpenVmmVirtualMachine::WatchProcessExit() +{ + WaitForSingleObject(m_processHandle.get(), INFINITE); + + DWORD exitCode = 0; + GetExitCodeProcess(m_processHandle.get(), &exitCode); + + WSL_LOG( + "OpenVmmProcessExited", + TraceLoggingValue(exitCode, "ExitCode"), + TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + + m_vmExitEvent.SetEvent(); +} + +OpenVmmVirtualMachine::~OpenVmmVirtualMachine() +{ + WSL_LOG("OpenVmmTerminateVmStart", TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + + // Signal termination to any pending operations. + m_vmExitEvent.SetEvent(); + + // TeardownVM releases all VM resources and unblocks WaitVM. + if (m_ttrpcClient) + { + LOG_IF_FAILED(m_ttrpcClient->TeardownVm()); + m_ttrpcClient->Disconnect(); + m_ttrpcClient.reset(); + } + + // Wait up to 5 seconds for graceful exit, then force-terminate. + if (m_processHandle) + { + if (WaitForSingleObject(m_processHandle.get(), c_processTerminationTimeoutMs) == WAIT_TIMEOUT) + { + WSL_LOG("OpenVmmForceTerminate", TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + TerminateProcess(m_processHandle.get(), 1); + } + } + + if (m_processWatchThread.joinable()) + { + m_processWatchThread.join(); + } + + // Join relay threads. + if (m_initRelayThread.joinable()) + { + m_initRelayThread.join(); + } + + if (m_crashDumpRelayThread.joinable()) + { + m_crashDumpRelayThread.join(); + } + + { + std::lock_guard lock(m_relayLock); + for (auto& t : m_relayThreads) + { + if (t.joinable()) + { + t.join(); + } + } + m_relayThreads.clear(); + } + + if (m_initListenSocket != INVALID_SOCKET) + { + closesocket(m_initListenSocket); + m_initListenSocket = INVALID_SOCKET; + } + DeleteFileW(m_initListenPath.c_str()); + + if (m_crashDumpListenSocket != INVALID_SOCKET) + { + closesocket(m_crashDumpListenSocket); + m_crashDumpListenSocket = INVALID_SOCKET; + } + DeleteFileW(m_crashDumpListenPath.c_str()); + + try + { + if (std::filesystem::exists(m_vsockPath)) + { + std::filesystem::remove(m_vsockPath); + } + if (std::filesystem::exists(m_ttrpcSocketPath)) + { + std::filesystem::remove(m_ttrpcSocketPath); + } + } + CATCH_LOG() +} + +bool OpenVmmVirtualMachine::FeatureEnabled(WSLCFeatureFlags Value) const +{ + return static_cast(m_featureFlags) & static_cast(Value); +} + +HRESULT OpenVmmVirtualMachine::GetId(_Out_ GUID* VmId) +try +{ + *VmId = m_vmId; + return S_OK; +} +CATCH_RETURN() + +// Bidirectional relay between an AF_UNIX socket and a TCP socket. +// Runs until either socket closes or exitEvent is signaled. +// Takes ownership of both sockets. +constexpr size_t c_relayBufferSize = 65536; + +static void RelaySocketData(SOCKET unixSock, SOCKET tcpSock, HANDLE exitEvent) +{ + auto cleanup = wil::scope_exit([&] { + closesocket(unixSock); + closesocket(tcpSock); + }); + + // Use WSA events to wait efficiently instead of polling with select(). + // WSAEventSelect puts sockets in non-blocking mode; we only recv when + // data is available and handle WSAEWOULDBLOCK on sends. + wil::unique_event unixEvent(wil::EventOptions::ManualReset); + wil::unique_event tcpEvent(wil::EventOptions::ManualReset); + + if (WSAEventSelect(unixSock, unixEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR || + WSAEventSelect(tcpSock, tcpEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR) + { + return; + } + + char buffer[c_relayBufferSize]; + HANDLE waitHandles[] = {exitEvent, unixEvent.get(), tcpEvent.get()}; + + // Relay data from one socket to another. Returns false if the relay should stop. + auto relayData = [&](SOCKET from, SOCKET to, HANDLE event) -> bool { + WSANETWORKEVENTS netEvents{}; + if (WSAEnumNetworkEvents(from, event, &netEvents) != 0) + { + return true; + } + + if (netEvents.lNetworkEvents & FD_READ) + { + for (;;) + { + int bytes = recv(from, buffer, sizeof(buffer), 0); + if (bytes == SOCKET_ERROR) + { + if (WSAGetLastError() == WSAEWOULDBLOCK) + { + break; // No more data available + } + return false; + } + if (bytes == 0) + { + return false; + } + + int sent = 0; + while (sent < bytes) + { + int n = send(to, buffer + sent, bytes - sent, 0); + if (n == SOCKET_ERROR) + { + if (WSAGetLastError() == WSAEWOULDBLOCK) + { + // Wait briefly for the send buffer to drain. + fd_set writeSet; + FD_ZERO(&writeSet); + FD_SET(to, &writeSet); + timeval tv{1, 0}; + if (select(0, nullptr, &writeSet, nullptr, &tv) <= 0) + { + return false; + } + continue; + } + return false; + } + if (n == 0) + { + return false; + } + sent += n; + } + } + } + + if (netEvents.lNetworkEvents & FD_CLOSE) + { + return false; + } + + return true; + }; + + while (true) + { + auto waitResult = WaitForMultipleObjects(ARRAYSIZE(waitHandles), waitHandles, FALSE, INFINITE); + if (waitResult == WAIT_OBJECT_0 || waitResult == WAIT_FAILED) + { + break; + } + + // Always check both directions — multiple events may be signaled. + if (!relayData(unixSock, tcpSock, unixEvent.get()) || + !relayData(tcpSock, unixSock, tcpEvent.get())) + { + break; + } + } +} + +// Creates a TCP loopback socket pair and starts a relay thread between an +// AF_UNIX socket and the TCP server socket. Returns the TCP client socket +// (which supports overlapped I/O) and the relay thread. The relay thread +// takes ownership of both the Unix socket and the TCP server socket. +static std::pair CreateRelayedSocket( + _In_ SOCKET unixSock, _In_ HANDLE exitEvent) +{ + // Create a TCP loopback listener on an ephemeral port. + SOCKET tcpListener = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + THROW_LAST_ERROR_IF(tcpListener == INVALID_SOCKET); + auto closeListener = wil::scope_exit([&] { closesocket(tcpListener); }); + + sockaddr_in loopback{}; + loopback.sin_family = AF_INET; + loopback.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + loopback.sin_port = 0; + THROW_LAST_ERROR_IF(bind(tcpListener, reinterpret_cast(&loopback), sizeof(loopback)) == SOCKET_ERROR); + THROW_LAST_ERROR_IF(listen(tcpListener, 1) == SOCKET_ERROR); + + // Get the port that was assigned. + sockaddr_in boundAddr{}; + int addrLen = sizeof(boundAddr); + THROW_LAST_ERROR_IF(getsockname(tcpListener, reinterpret_cast(&boundAddr), &addrLen) == SOCKET_ERROR); + + // Connect a TCP client to the listener. + SOCKET tcpClient = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + THROW_LAST_ERROR_IF(tcpClient == INVALID_SOCKET); + auto closeClient = wil::scope_exit([&] { closesocket(tcpClient); }); + + THROW_LAST_ERROR_IF(connect(tcpClient, reinterpret_cast(&boundAddr), sizeof(boundAddr)) == SOCKET_ERROR); + + // Accept the server-side connection. + SOCKET tcpServer = accept(tcpListener, nullptr, nullptr); + THROW_LAST_ERROR_IF(tcpServer == INVALID_SOCKET); + + closeListener.release(); + closesocket(tcpListener); + + // Start a relay thread that takes ownership of unixSock and tcpServer. + auto relayThread = std::thread(RelaySocketData, unixSock, tcpServer, exitEvent); + + // Return the TCP client socket — this supports overlapped I/O. + closeClient.release(); + return {wil::unique_socket(tcpClient), std::move(relayThread)}; +} + +HRESULT OpenVmmVirtualMachine::AcceptConnection(_Out_ HANDLE* Socket) +try +{ + THROW_HR_IF(E_UNEXPECTED, m_initListenSocket == INVALID_SOCKET); + + WSL_LOG("OpenVmmAcceptConnection", + TraceLoggingValue(m_initListenPath.c_str(), "ListenPath"), + TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + + wil::unique_event acceptEvent(wil::EventOptions::ManualReset); + WSAEventSelect(m_initListenSocket, acceptEvent.get(), FD_ACCEPT); + + HANDLE waitHandles[] = { acceptEvent.get(), m_vmExitEvent.get() }; + auto waitResult = WaitForMultipleObjects(ARRAYSIZE(waitHandles), waitHandles, FALSE, m_bootTimeoutMs); + THROW_HR_IF(E_ABORT, waitResult != WAIT_OBJECT_0); + + SOCKET unixSock = accept(m_initListenSocket, nullptr, nullptr); + THROW_LAST_ERROR_IF(unixSock == INVALID_SOCKET); + + closesocket(m_initListenSocket); + m_initListenSocket = INVALID_SOCKET; + DeleteFileW(m_initListenPath.c_str()); + + // Bridge AF_UNIX to TCP loopback for overlapped I/O support. + auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); + m_initRelayThread = std::move(relayThread); + *Socket = reinterpret_cast(tcpSocket.release()); + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::ConfigureNetworking(_In_ HANDLE GnsSocket, _In_opt_ HANDLE* DnsSocket) +try +{ + std::lock_guard lock(m_lock); + + // Consomme networking is configured server-side via NICConfig. + WI_ASSERT(m_networkingMode == WSLCNetworkingModeConsomme); + THROW_HR_IF(E_INVALIDARG, m_networkingMode != WSLCNetworkingModeConsomme); + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::AttachDisk(_In_ LPCWSTR Path, _In_ BOOL ReadOnly, _Out_ ULONG* Lun) +try +{ + RETURN_HR_IF(E_POINTER, Path == nullptr || Lun == nullptr); + + std::lock_guard lock(m_lock); + + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for disk hot-add"); + + DiskInfo disk{Path, ReadOnly != FALSE}; + const ULONG allocatedLun = AllocateLun(); + + auto cleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { + FreeLun(allocatedLun); + }); + + auto hostPath = wsl::shared::string::WideToMultiByte(Path); + THROW_IF_FAILED(m_ttrpcClient->AttachScsiDisk(0, allocatedLun, hostPath, ReadOnly != FALSE)); + + m_attachedDisks.emplace(allocatedLun, std::move(disk)); + cleanup.release(); + + *Lun = allocatedLun; + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::DetachDisk(_In_ ULONG Lun) +try +{ + std::lock_guard lock(m_lock); + + auto it = m_attachedDisks.find(Lun); + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_FOUND), it == m_attachedDisks.end()); + + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for disk hot-remove"); + + THROW_IF_FAILED(m_ttrpcClient->DetachScsiDisk(0, Lun)); + + FreeLun(Lun); + m_attachedDisks.erase(it); + + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::AddShare(_In_ LPCWSTR WindowsPath, _In_ BOOL ReadOnly, _Out_ GUID* ShareId) +try +{ + RETURN_HR_IF(E_POINTER, WindowsPath == nullptr || ShareId == nullptr); + + std::lock_guard lock(m_lock); + + // TODO: Requires vmservice.proto extension for Plan9/VirtioFS in ModifyResourceRequest. + + WSL_LOG( + "OpenVmmAddShare", + TraceLoggingValue(m_vmIdString.c_str(), "VmId"), + TraceLoggingValue(WindowsPath, "WindowsPath"), + TraceLoggingValue(ReadOnly, "ReadOnly"), + TraceLoggingValue("NOT_IMPLEMENTED", "Status")); + + return E_NOTIMPL; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::RemoveShare(_In_ REFGUID ShareId) +try +{ + std::lock_guard lock(m_lock); + + // TODO: Requires vmservice.proto extension. See AddShare. + + WSL_LOG( + "OpenVmmRemoveShare", + TraceLoggingValue(m_vmIdString.c_str(), "VmId"), + TraceLoggingValue("NOT_IMPLEMENTED", "Status")); + + return E_NOTIMPL; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::GetTerminationEvent(_Out_ HANDLE* Event) +try +{ + *Event = wslutil::DuplicateHandle(m_vmExitEvent.get()); + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::ConnectToVsockPort(_In_ ULONG Port, _Out_ HANDLE* Socket) +try +{ + WSL_LOG("OpenVmmConnectToVsockPort", + TraceLoggingValue(Port, "Port"), + TraceLoggingValue(m_vsockPath.c_str(), "BridgePath"), + TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + + SOCKET unixSock = ::socket(AF_UNIX, SOCK_STREAM, 0); + THROW_LAST_ERROR_IF(unixSock == INVALID_SOCKET); + auto closeUnix = wil::scope_exit([&] { closesocket(unixSock); }); + + sockaddr_un addr{}; + addr.sun_family = AF_UNIX; + auto narrowPath = wsl::shared::string::WideToMultiByte(m_vsockPath.wstring()); + THROW_HR_IF_MSG(E_INVALIDARG, narrowPath.size() >= sizeof(addr.sun_path), + "vsock bridge path too long: %hs", narrowPath.c_str()); + memcpy(addr.sun_path, narrowPath.c_str(), narrowPath.size() + 1); + + THROW_LAST_ERROR_IF(connect(unixSock, reinterpret_cast(&addr), sizeof(addr)) == SOCKET_ERROR); + + auto connectMsg = std::format("CONNECT {}\n", Port); + int sent = send(unixSock, connectMsg.c_str(), static_cast(connectMsg.size()), 0); + THROW_LAST_ERROR_IF(sent == SOCKET_ERROR); + THROW_HR_IF(E_FAIL, sent != static_cast(connectMsg.size())); + + char response[64]{}; + int totalRead = 0; + while (totalRead < static_cast(sizeof(response) - 1)) + { + int n = recv(unixSock, response + totalRead, 1, 0); + THROW_LAST_ERROR_IF(n == SOCKET_ERROR); + THROW_HR_IF_MSG(E_FAIL, n == 0, "vsock bridge closed during CONNECT handshake"); + totalRead += n; + if (response[totalRead - 1] == '\n') + { + break; + } + } + response[totalRead] = '\0'; + + THROW_HR_IF_MSG(E_FAIL, strncmp(response, "OK ", 3) != 0, + "vsock bridge CONNECT failed: %hs", response); + + WSL_LOG("OpenVmmConnectToVsockPortOK", + TraceLoggingValue(Port, "Port"), + TraceLoggingValue(response, "Response")); + + // Bridge AF_UNIX to TCP loopback for overlapped I/O support. + closeUnix.release(); + auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); + { + std::lock_guard lock(m_relayLock); + m_relayThreads.emplace_back(std::move(relayThread)); + } + *Socket = reinterpret_cast(tcpSocket.release()); + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::AcceptCrashDumpConnection(_Out_ HANDLE* Socket) +try +{ + THROW_HR_IF(E_UNEXPECTED, m_crashDumpListenSocket == INVALID_SOCKET); + + WSL_LOG("OpenVmmAcceptCrashDumpConnection", + TraceLoggingValue(m_crashDumpListenPath.c_str(), "ListenPath"), + TraceLoggingValue(m_vmIdString.c_str(), "VmId")); + + wil::unique_event acceptEvent(wil::EventOptions::ManualReset); + WSAEventSelect(m_crashDumpListenSocket, acceptEvent.get(), FD_ACCEPT); + + HANDLE waitHandles[] = { acceptEvent.get(), m_vmExitEvent.get() }; + auto waitResult = WaitForMultipleObjects(ARRAYSIZE(waitHandles), waitHandles, FALSE, INFINITE); + THROW_HR_IF(E_ABORT, waitResult != WAIT_OBJECT_0); + + SOCKET unixSock = accept(m_crashDumpListenSocket, nullptr, nullptr); + THROW_LAST_ERROR_IF(unixSock == INVALID_SOCKET); + + // Bridge AF_UNIX to TCP loopback for overlapped I/O support. + auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); + m_crashDumpRelayThread = std::move(relayThread); + *Socket = reinterpret_cast(tcpSocket.release()); + return S_OK; +} +CATCH_RETURN() + +ULONG OpenVmmVirtualMachine::AllocateLun() +{ + for (ULONG index = 0; index < gsl::narrow_cast(m_lunBitmap.size()); index += 1) + { + if (!m_lunBitmap[index]) + { + m_lunBitmap[index] = true; + return index; + } + } + + THROW_HR(WSL_E_TOO_MANY_DISKS_ATTACHED); +} + +void OpenVmmVirtualMachine::FreeLun(ULONG Lun) +{ + THROW_HR_IF(E_BOUNDS, Lun >= m_lunBitmap.size()); + THROW_HR_IF(E_INVALIDARG, !m_lunBitmap[Lun]); + + m_lunBitmap[Lun] = false; +} diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.h b/src/windows/service/exe/OpenVmmVirtualMachine.h new file mode 100644 index 0000000000..c45adedced --- /dev/null +++ b/src/windows/service/exe/OpenVmmVirtualMachine.h @@ -0,0 +1,163 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +/*++ + +Module Name: + + OpenVmmVirtualMachine.h + +Abstract: + + Implementation of IWSLCVirtualMachine using OpenVMM as the VMM backend. + + This class spawns openvmm.exe as a child process, configures the VM via + ttrpc RPCs (vmservice.proto), and implements the same IWSLCVirtualMachine + interface as HcsVirtualMachine so the rest of WSLC can work unchanged. + +--*/ + +#pragma once + +#include "wslc.h" +#include "INetworkingEngine.h" +#include "TtrpcClient.h" +#include "Dmesg.h" +#include +#include +#include + +#define MAX_VHD_COUNT 254 + +namespace wsl::windows::service::wslc { + +class OpenVmmVirtualMachine + : public Microsoft::WRL::RuntimeClass, IWSLCVirtualMachine, IFastRundown> +{ +public: + OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Settings); + ~OpenVmmVirtualMachine(); + + // IWSLCVirtualMachine implementation + IFACEMETHOD(GetId)(_Out_ GUID* VmId) override; + IFACEMETHOD(AcceptConnection)(_Out_ HANDLE* Socket) override; + IFACEMETHOD(ConfigureNetworking)(_In_ HANDLE GnsSocket, _In_opt_ HANDLE* DnsSocket) override; + IFACEMETHOD(AttachDisk)(_In_ LPCWSTR Path, _In_ BOOL ReadOnly, _Out_ ULONG* Lun) override; + IFACEMETHOD(DetachDisk)(_In_ ULONG Lun) override; + IFACEMETHOD(AddShare)(_In_ LPCWSTR WindowsPath, _In_ BOOL ReadOnly, _Out_ GUID* ShareId) override; + IFACEMETHOD(RemoveShare)(_In_ REFGUID ShareId) override; + IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; + IFACEMETHOD(ConnectToVsockPort)(_In_ ULONG Port, _Out_ HANDLE* Socket) override; + IFACEMETHOD(AcceptCrashDumpConnection)(_Out_ HANDLE* Socket) override; + +private: + struct DiskInfo + { + std::wstring Path; + bool ReadOnly = false; + }; + + bool FeatureEnabled(WSLCFeatureFlags Value) const; + + // Build the openvmm.exe command line (ttrpc-only in orchestration mode). + std::wstring BuildCommandLine() const; + + // Build a ttrpc CreateVM configuration from stored VM settings. + TtrpcClient::VmConfig BuildVmConfig() const; + + // Create a Unix domain socket listener for the hybrid_vsock bridge at the given port. + // Returns the listening socket and the filesystem path for cleanup. + std::pair CreateVsockListener(ULONG port); + + // Spawn the openvmm.exe process, connect ttrpc, and create+resume the VM. + void LaunchOpenVmm(); + + // Monitor the openvmm process and signal m_vmExitEvent on exit. + void WatchProcessExit(); + + ULONG AllocateLun(); + void FreeLun(ULONG Lun); + + // Timeout for waiting for the openvmm process to exit gracefully before force-terminating. + static constexpr DWORD c_processTerminationTimeoutMs = 5000; + + // Directory for vsock bridge and ttrpc socket files. + // Must be short — hybrid_vsock appends port GUIDs and Unix sockets have a 108-byte path limit. + static constexpr wchar_t c_vsockBridgeDir[] = L"C:\\ProgramData\\wslc"; + + // Page reporting order (128KB) passed to the kernel command line, matching modern Windows builds. + static constexpr ULONG c_pageReportingOrder = 5; + + // XOR mask applied to VM GUID to derive a deterministic NIC instance GUID. + static constexpr uint32_t c_nicGuidXorMask = 0x4E494300; // "NIC\0" + + // Default Hyper-V MAC address prefix for the Consomme NIC. + static constexpr char c_defaultConsommeMacAddress[] = "00-15-5D-00-00-01"; + + std::recursive_mutex m_lock; + + GUID m_vmId{}; + std::wstring m_vmIdString; + + WSLCFeatureFlags m_featureFlags{}; + WSLCNetworkingMode m_networkingMode{}; + ULONG m_bootTimeoutMs{}; + + // OpenVMM process handle and management. + wil::unique_handle m_processHandle; + wil::unique_handle m_jobObject; + std::thread m_processWatchThread; + + // Paths for VM boot configuration. + std::filesystem::path m_kernelPath; + std::filesystem::path m_initrdPath; + std::filesystem::path m_rootVhdPath; + std::filesystem::path m_modulesVhdPath; + std::filesystem::path m_openvmmPath; + + // Storage VHD for container data (pre-attached at boot). + std::filesystem::path m_storageVhdPath; + + // Vsock bridge path for HvSocket emulation. + std::filesystem::path m_vsockPath; + + // VM settings preserved from constructor for command line building. + ULONG m_cpuCount{}; + ULONG m_memoryMb{}; + std::wstring m_kernelCmdLine; + + // Pre-created Unix domain socket listener for the init connection. + // Must be listening BEFORE the VM boots so the guest can connect. + SOCKET m_initListenSocket = INVALID_SOCKET; + std::wstring m_initListenPath; + + // Pre-created Unix domain socket listener for crash dump collection. + // Uses the hybrid_vsock bridge to receive crash dump connections from the guest. + SOCKET m_crashDumpListenSocket = INVALID_SOCKET; + std::wstring m_crashDumpListenPath; + + wil::unique_event m_vmExitEvent{wil::EventOptions::ManualReset}; + + // Relay threads bridging AF_UNIX sockets to TCP loopback for overlapped I/O. + std::thread m_initRelayThread; + std::thread m_crashDumpRelayThread; + std::mutex m_relayLock; + std::vector m_relayThreads; + + std::map m_attachedDisks; + std::bitset m_lunBitmap; + + // Shares: key is ShareId, value is Windows path. + std::map m_shares; + + // Networking engine (ConsommeNetworking for the OpenVMM backend). + std::unique_ptr m_networkEngine; + + // ttrpc client for runtime VM management (disk hot-add/remove etc.). + std::filesystem::path m_ttrpcSocketPath; + std::unique_ptr m_ttrpcClient; + + // Dmesg collector for early boot and virtio serial console output. + std::shared_ptr m_dmesgCollector; +}; + +} // namespace wsl::windows::service::wslc diff --git a/src/windows/service/exe/TtrpcClient.cpp b/src/windows/service/exe/TtrpcClient.cpp new file mode 100644 index 0000000000..044366e9cb --- /dev/null +++ b/src/windows/service/exe/TtrpcClient.cpp @@ -0,0 +1,445 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +/*++ + +Module Name: + + TtrpcClient.cpp + +Abstract: + + Minimal ttrpc client for communicating with OpenVMM's vmservice. + + This implementation uses generated protobuf types from VMService.proto for + vmservice payloads, while keeping a generic ttrpc transport layer that can + send any protobuf request/response pair. + + Wire format reference: openvmm/support/mesh/mesh_rpc/src/message.rs + +--*/ + +#include "precomp.h" + +#include "TtrpcClient.h" +#include "TtrpcEnvelopeCodec.h" + +#include + +#include "VMService.pb.h" +#include "google/protobuf/empty.pb.h" +#include "stringshared.h" + +using namespace wsl::windows::service::wslc; +using wsl::windows::service::wslc::detail::TtrpcEnvelopeCodec; + +namespace +{ +HRESULT DeserializeMessage(const std::vector& bytes, google::protobuf::Message* message) +{ + RETURN_HR_IF(E_POINTER, message == nullptr); + + if (bytes.empty()) + { + message->Clear(); + return S_OK; + } + + RETURN_HR_IF_MSG( + E_FAIL, + !message->ParseFromArray(bytes.data(), static_cast(bytes.size())), + "ttrpc: failed to parse protobuf response payload"); + + return S_OK; +} + +HRESULT SerializeMessage(const google::protobuf::Message& message, std::vector& bytes) +{ + std::string serialized; + RETURN_HR_IF_MSG(E_FAIL, !message.SerializeToString(&serialized), "ttrpc: failed to serialize protobuf request payload"); + + bytes.assign(serialized.begin(), serialized.end()); + return S_OK; +} + +HRESULT GrpcStatusToHresult(int32_t statusCode) +{ + // gRPC status codes: https://grpc.io/docs/guides/status-codes/ + constexpr int32_t c_grpcInvalidArgument = 3; + constexpr int32_t c_grpcNotFound = 5; + constexpr int32_t c_grpcResourceExhausted = 8; + constexpr int32_t c_grpcUnimplemented = 12; + + switch (statusCode) + { + case c_grpcInvalidArgument: + return E_INVALIDARG; + case c_grpcNotFound: + return HRESULT_FROM_WIN32(ERROR_NOT_FOUND); + case c_grpcResourceExhausted: + return HRESULT_FROM_WIN32(ERROR_NO_SYSTEM_RESOURCES); + case c_grpcUnimplemented: + return E_NOTIMPL; + default: + return E_FAIL; + } +} +} // namespace + +TtrpcClient::TtrpcClient() = default; + +TtrpcClient::~TtrpcClient() +{ + Disconnect(); +} + +HRESULT TtrpcClient::Connect(const std::wstring& socketPath, DWORD timeoutMs) +try +{ + std::lock_guard lock(m_lock); + + if (m_socket != INVALID_SOCKET) + { + return S_OK; + } + + auto narrowPath = wsl::shared::string::WideToMultiByte(socketPath); + + sockaddr_un addr{}; + addr.sun_family = AF_UNIX; + THROW_HR_IF_MSG( + E_INVALIDARG, + narrowPath.size() >= sizeof(addr.sun_path), + "ttrpc socket path too long: %hs", + narrowPath.c_str()); + memcpy(addr.sun_path, narrowPath.c_str(), narrowPath.size() + 1); + + constexpr DWORD c_initialBackoffMs = 100; + constexpr DWORD c_maxBackoffMs = 2000; + DWORD elapsed = 0; + DWORD backoff = c_initialBackoffMs; + + while (elapsed < timeoutMs) + { + SOCKET sock = ::socket(AF_UNIX, SOCK_STREAM, 0); + THROW_LAST_ERROR_IF(sock == INVALID_SOCKET); + + if (::connect(sock, reinterpret_cast(&addr), sizeof(addr)) == 0) + { + m_socket = sock; + m_nextStreamId = 1; + + // Set socket timeouts to prevent blocking forever if OpenVMM hangs. + setsockopt(m_socket, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&c_socketTimeoutMs), sizeof(c_socketTimeoutMs)); + setsockopt(m_socket, SOL_SOCKET, SO_SNDTIMEO, reinterpret_cast(&c_socketTimeoutMs), sizeof(c_socketTimeoutMs)); + + WSL_LOG( + "TtrpcClientConnected", + TraceLoggingValue(narrowPath.c_str(), "SocketPath"), + TraceLoggingValue(elapsed, "ElapsedMs")); + + return S_OK; + } + + closesocket(sock); + + DWORD sleepTime = std::min(backoff, timeoutMs - elapsed); + Sleep(sleepTime); + elapsed += sleepTime; + backoff = std::min(backoff * 2, c_maxBackoffMs); + } + + WSL_LOG( + "TtrpcClientConnectTimeout", + TraceLoggingValue(narrowPath.c_str(), "SocketPath"), + TraceLoggingValue(timeoutMs, "TimeoutMs")); + + return HRESULT_FROM_WIN32(ERROR_TIMEOUT); +} +CATCH_RETURN() + +void TtrpcClient::Disconnect() +{ + std::lock_guard lock(m_lock); + + if (m_socket != INVALID_SOCKET) + { + closesocket(m_socket); + m_socket = INVALID_SOCKET; + } +} + +bool TtrpcClient::IsConnected() const +{ + return m_socket != INVALID_SOCKET; +} + +HRESULT TtrpcClient::Call( + const std::string& service, + const std::string& method, + const google::protobuf::Message& request, + google::protobuf::Message* response) +{ + std::vector requestPayload; + RETURN_IF_FAILED(SerializeMessage(request, requestPayload)); + + std::vector responsePayload; + RETURN_IF_FAILED(SendRequest(service, method, requestPayload, &responsePayload)); + + if (response != nullptr) + { + RETURN_IF_FAILED(DeserializeMessage(responsePayload, response)); + } + + return S_OK; +} + +HRESULT TtrpcClient::AttachScsiDisk( + uint32_t controller, uint32_t lun, const std::string& hostPath, bool readOnly) +try +{ + WSL_LOG( + "TtrpcAttachScsiDisk", + TraceLoggingValue(controller, "Controller"), + TraceLoggingValue(lun, "Lun"), + TraceLoggingValue(hostPath.c_str(), "HostPath"), + TraceLoggingValue(readOnly, "ReadOnly")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::ADD); + + auto* scsiDisk = request.mutable_scsi_disk(); + scsiDisk->set_controller(controller); + scsiDisk->set_lun(lun); + scsiDisk->set_host_path(hostPath); + scsiDisk->set_type(vmservice::SCSI_DISK_TYPE_VHDX); + scsiDisk->set_read_only(readOnly); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::DetachScsiDisk(uint32_t controller, uint32_t lun) +try +{ + WSL_LOG( + "TtrpcDetachScsiDisk", + TraceLoggingValue(controller, "Controller"), + TraceLoggingValue(lun, "Lun")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::REMOVE); + + auto* scsiDisk = request.mutable_scsi_disk(); + scsiDisk->set_controller(controller); + scsiDisk->set_lun(lun); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::CreateVm(const VmConfig& config) +try +{ + WSL_LOG( + "TtrpcCreateVm", + TraceLoggingValue(config.KernelPath.c_str(), "KernelPath"), + TraceLoggingValue(config.MemoryMb, "MemoryMb"), + TraceLoggingValue(config.ProcessorCount, "ProcessorCount"), + TraceLoggingValue(static_cast(config.ScsiDisks.size()), "DiskCount"), + TraceLoggingValue(config.HvSocketPath.c_str(), "HvSocketPath")); + + vmservice::CreateVMRequest request; + auto* vmConfig = request.mutable_config(); + + vmConfig->mutable_memory_config()->set_memory_mb(config.MemoryMb); + vmConfig->mutable_processor_config()->set_processor_count(config.ProcessorCount); + + for (const auto& disk : config.ScsiDisks) + { + auto* scsiDisk = vmConfig->mutable_devices_config()->add_scsi_disks(); + scsiDisk->set_controller(disk.Controller); + scsiDisk->set_lun(disk.Lun); + scsiDisk->set_host_path(disk.HostPath); + scsiDisk->set_type(vmservice::SCSI_DISK_TYPE_VHDX); + scsiDisk->set_read_only(disk.ReadOnly); + } + + if (config.Nic.has_value()) + { + auto* nicConfig = vmConfig->mutable_devices_config()->add_nic_config(); + nicConfig->set_nic_id(config.Nic->NicId); + nicConfig->set_mac_address(config.Nic->MacAddress); + // Empty CIDR uses OpenVMM's default subnet. + nicConfig->mutable_consomme()->set_cidr(""); + } + + if (!config.VirtioConsolePath.empty()) + { + auto* virtioConsole = vmConfig->mutable_devices_config()->mutable_virtio_console(); + virtioConsole->set_socket_path(config.VirtioConsolePath); + virtioConsole->set_connect(true); + } + + for (const auto& serialPort : config.SerialPorts) + { + auto* portConfig = vmConfig->mutable_serial_config()->add_ports(); + portConfig->set_port(serialPort.Port); + portConfig->set_socket_path(serialPort.SocketPath); + portConfig->set_connect(true); + } + + auto* directBoot = vmConfig->mutable_direct_boot(); + directBoot->set_kernel_path(config.KernelPath); + directBoot->set_initrd_path(config.InitrdPath); + directBoot->set_kernel_cmdline(config.KernelCmdLine); + + vmConfig->mutable_hvsocket_config()->set_path(config.HvSocketPath); + + google::protobuf::Empty response; + return Call(c_serviceName, c_createVmMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::ResumeVm() +try +{ + WSL_LOG("TtrpcResumeVm"); + + google::protobuf::Empty request; + google::protobuf::Empty response; + return Call(c_serviceName, c_resumeVmMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::WaitVm() +try +{ + WSL_LOG("TtrpcWaitVm"); + + google::protobuf::Empty request; + google::protobuf::Empty response; + return Call(c_serviceName, c_waitVmMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::TeardownVm() +try +{ + WSL_LOG("TtrpcTeardownVm"); + + google::protobuf::Empty request; + google::protobuf::Empty response; + return Call(c_serviceName, c_teardownVmMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::SendRequest( + const std::string& service, + const std::string& method, + const std::vector& payload, + std::vector* responsePayload) +{ + std::lock_guard lock(m_lock); + + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_CONNECTED), m_socket == INVALID_SOCKET); + + auto ttrpcPayload = TtrpcEnvelopeCodec::EncodeRequestEnvelope(service, method, payload); + + detail::TtrpcMessageHeader header{}; + TtrpcEnvelopeCodec::WriteBigEndian32(header.Length, static_cast(ttrpcPayload.size())); + TtrpcEnvelopeCodec::WriteBigEndian32(header.StreamId, m_nextStreamId); + header.MessageType = TtrpcEnvelopeCodec::c_messageTypeRequest; + header.Flags = 0; + + uint32_t expectedStreamId = m_nextStreamId; + m_nextStreamId += 2; + + RETURN_IF_FAILED(SendAll(&header, sizeof(header))); + RETURN_IF_FAILED(SendAll(ttrpcPayload.data(), ttrpcPayload.size())); + + detail::TtrpcMessageHeader responseHeader{}; + RETURN_IF_FAILED(RecvAll(&responseHeader, sizeof(responseHeader))); + + RETURN_HR_IF_MSG( + E_FAIL, + responseHeader.MessageType != TtrpcEnvelopeCodec::c_messageTypeResponse, + "ttrpc: expected response (type 2), got type %d", + responseHeader.MessageType); + + uint32_t responseStreamId = TtrpcEnvelopeCodec::ReadBigEndian32(responseHeader.StreamId); + RETURN_HR_IF_MSG( + E_FAIL, + responseStreamId != expectedStreamId, + "ttrpc: stream ID mismatch: expected %u, got %u", + expectedStreamId, + responseStreamId); + + uint32_t responseLength = TtrpcEnvelopeCodec::ReadBigEndian32(responseHeader.Length); + RETURN_HR_IF_MSG( + E_FAIL, + responseLength > TtrpcEnvelopeCodec::c_maxMessageBytes, + "ttrpc: response too large: %u bytes", + responseLength); + + std::vector responseData(responseLength); + if (responseLength > 0) + { + RETURN_IF_FAILED(RecvAll(responseData.data(), responseLength)); + } + + TtrpcEnvelopeCodec::DecodedResponse decodedResponse; + RETURN_IF_FAILED(TtrpcEnvelopeCodec::DecodeResponseEnvelope(responseData, decodedResponse)); + + if (decodedResponse.HasStatus && decodedResponse.StatusCode != 0) + { + WSL_LOG( + "TtrpcRequestFailed", + TraceLoggingValue(decodedResponse.StatusCode, "GrpcCode"), + TraceLoggingValue(decodedResponse.StatusMessage.c_str(), "Message")); + + return GrpcStatusToHresult(decodedResponse.StatusCode); + } + + if (responsePayload != nullptr) + { + *responsePayload = std::move(decodedResponse.Payload); + } + + return S_OK; +} + +HRESULT TtrpcClient::SendAll(const void* data, size_t length) +{ + const auto* ptr = static_cast(data); + size_t remaining = length; + + while (remaining > 0) + { + int sent = ::send(m_socket, ptr, static_cast(remaining), 0); + RETURN_LAST_ERROR_IF(sent == SOCKET_ERROR); + RETURN_HR_IF(E_FAIL, sent == 0); + ptr += sent; + remaining -= sent; + } + + return S_OK; +} + +HRESULT TtrpcClient::RecvAll(void* data, size_t length) +{ + auto* ptr = static_cast(data); + size_t remaining = length; + + while (remaining > 0) + { + int received = ::recv(m_socket, ptr, static_cast(remaining), 0); + RETURN_LAST_ERROR_IF(received == SOCKET_ERROR); + RETURN_HR_IF_MSG(E_FAIL, received == 0, "ttrpc: connection closed unexpectedly"); + ptr += received; + remaining -= received; + } + + return S_OK; +} diff --git a/src/windows/service/exe/TtrpcClient.h b/src/windows/service/exe/TtrpcClient.h new file mode 100644 index 0000000000..4629f921c8 --- /dev/null +++ b/src/windows/service/exe/TtrpcClient.h @@ -0,0 +1,154 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +/*++ + +Module Name: + + TtrpcClient.h + +Abstract: + + Minimal ttrpc client for communicating with OpenVMM's vmservice. + + Implements the ttrpc wire protocol and uses protobuf payloads generated + from VMService.proto for vmservice RPCs. + + The ttrpc protocol uses a 10-byte header (big-endian length, stream ID, + type, flags) followed by a protobuf-encoded Request/Response payload. + + See: openvmm/support/mesh/mesh_rpc/src/message.rs for the wire format. + +--*/ + +#pragma once + +#include +#include +#include +#include +#include + +namespace google::protobuf { +class Message; +} + +namespace wsl::windows::service::wslc { + +class TtrpcClient +{ +public: + TtrpcClient(); + ~TtrpcClient(); + + NON_COPYABLE(TtrpcClient); + NON_MOVABLE(TtrpcClient); + + // Default timeout for Connect() retries and socket I/O operations. + static constexpr DWORD c_defaultTimeoutMs = 30000; + + // Connect to the ttrpc Unix domain socket at the given path. + // Retries with backoff until the connection succeeds or timeoutMs expires. + HRESULT Connect(const std::wstring& socketPath, DWORD timeoutMs = c_defaultTimeoutMs); + + // Disconnect from the ttrpc server. + void Disconnect(); + + // Returns true if the client is connected. + bool IsConnected() const; + + // Generic ttrpc call using protobuf request/response messages. + // If response is null, any successful payload is ignored. + HRESULT Call(const std::string& service, + const std::string& method, + const google::protobuf::Message& request, + google::protobuf::Message* response = nullptr); + + // SCSI disk hot-add: ModifyResource(ADD, SCSIDisk { controller, lun, hostPath, VHDX, readOnly }). + HRESULT AttachScsiDisk(uint32_t controller, uint32_t lun, + const std::string& hostPath, bool readOnly); + + // SCSI disk hot-remove: ModifyResource(REMOVE, SCSIDisk { controller, lun }). + HRESULT DetachScsiDisk(uint32_t controller, uint32_t lun); + + // VM configuration for CreateVm. + struct VmConfig + { + std::string KernelPath; + std::string InitrdPath; + std::string KernelCmdLine; + uint64_t MemoryMb{}; + uint32_t ProcessorCount{}; + std::string HvSocketPath; + + struct ScsiDisk + { + uint32_t Controller; + uint32_t Lun; + std::string HostPath; + bool ReadOnly; + }; + std::vector ScsiDisks; + + // NIC with consomme backend (self-contained NAT + DHCP). + struct ConsommeNic + { + std::string NicId; // GUID string + std::string MacAddress; // "12-34-56-78-9A-BC" + }; + std::optional Nic; + + // Serial ports (16550 UART COM ports, e.g. earlycon on port 0). + struct SerialPort + { + uint32_t Port; // 0-3 (COM1-COM4) + std::string SocketPath; // Named pipe or Unix domain socket path + }; + std::vector SerialPorts; + + // Virtio console device (/dev/hvc0 in the guest). + // Path to a named pipe or Unix domain socket for the console backend. + std::string VirtioConsolePath; + }; + + // CreateVM: configure and create the VM (left in paused state). + HRESULT CreateVm(const VmConfig& config); + + // ResumeVM: start a paused VM. + HRESULT ResumeVm(); + + // WaitVM: blocks until the VM halts or is torn down. + HRESULT WaitVm(); + + // TeardownVM: release all VM resources and unblock the WaitVM call. + HRESULT TeardownVm(); + +private: + // ttrpc service and method names (from vmservice.proto). + static constexpr char c_serviceName[] = "vmservice.VM"; + static constexpr char c_createVmMethod[] = "CreateVM"; + static constexpr char c_resumeVmMethod[] = "ResumeVM"; + static constexpr char c_waitVmMethod[] = "WaitVM"; + static constexpr char c_teardownVmMethod[] = "TeardownVM"; + static constexpr char c_modifyResourceMethod[] = "ModifyResource"; + + // Send a ttrpc request payload and wait for the response payload. + // Returns S_OK on success, or an HRESULT error if the server returned a + // status error or there was a communication failure. + HRESULT SendRequest(const std::string& service, + const std::string& method, + const std::vector& payload, + std::vector* responsePayload); + + // Socket send/recv timeout to prevent indefinite blocking. + static constexpr DWORD c_socketTimeoutMs = 30000; + + // Low-level socket I/O. + HRESULT SendAll(const void* data, size_t length); + HRESULT RecvAll(void* data, size_t length); + + std::recursive_mutex m_lock; + SOCKET m_socket = INVALID_SOCKET; + uint32_t m_nextStreamId = 1; // ttrpc client stream IDs must be odd +}; + +} // namespace wsl::windows::service::wslc diff --git a/src/windows/service/exe/TtrpcEnvelopeCodec.cpp b/src/windows/service/exe/TtrpcEnvelopeCodec.cpp new file mode 100644 index 0000000000..3a14a19872 --- /dev/null +++ b/src/windows/service/exe/TtrpcEnvelopeCodec.cpp @@ -0,0 +1,204 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +#include "precomp.h" + +#include "TtrpcEnvelopeCodec.h" + +using namespace wsl::windows::service::wslc::detail; + +void TtrpcEnvelopeCodec::WriteBigEndian32(uint8_t* dest, uint32_t value) +{ + dest[0] = static_cast((value >> 24) & 0xFF); + dest[1] = static_cast((value >> 16) & 0xFF); + dest[2] = static_cast((value >> 8) & 0xFF); + dest[3] = static_cast(value & 0xFF); +} + +uint32_t TtrpcEnvelopeCodec::ReadBigEndian32(const uint8_t* src) +{ + return (static_cast(src[0]) << 24) | + (static_cast(src[1]) << 16) | + (static_cast(src[2]) << 8) | + static_cast(src[3]); +} + +HRESULT TtrpcEnvelopeCodec::ReadVarint(const uint8_t*& ptr, const uint8_t* end, uint64_t& value) +{ + value = 0; + int shift = 0; + constexpr int c_maxVarintBytes = 10; // 64-bit varint is at most 10 bytes + int bytesRead = 0; + + while (ptr < end) + { + RETURN_HR_IF_MSG(E_FAIL, bytesRead >= c_maxVarintBytes, "ttrpc: varint too large"); + + const uint8_t byte = *ptr++; + bytesRead++; + + value |= static_cast(byte & 0x7F) << shift; + + if ((byte & 0x80) == 0) + { + return S_OK; + } + + shift += 7; + } + + return E_FAIL; +} + +void TtrpcEnvelopeCodec::EncodeVarint(uint64_t value, std::vector& buf) +{ + do + { + uint8_t byte = static_cast(value & 0x7F); + value >>= 7; + if (value != 0) + { + byte |= 0x80; + } + buf.push_back(byte); + } while (value != 0); +} + +void TtrpcEnvelopeCodec::EncodeTag(uint32_t field, uint32_t wireType, std::vector& buf) +{ + EncodeVarint((static_cast(field) << 3) | wireType, buf); +} + +void TtrpcEnvelopeCodec::EncodeStringField(uint32_t field, const std::string& value, std::vector& buf) +{ + if (value.empty()) + { + return; + } + + EncodeTag(field, c_wireTypeLengthDelimited, buf); + EncodeVarint(value.size(), buf); + buf.insert(buf.end(), value.begin(), value.end()); +} + +void TtrpcEnvelopeCodec::EncodeBytesField(uint32_t field, const std::vector& value, std::vector& buf) +{ + if (value.empty()) + { + return; + } + + EncodeTag(field, c_wireTypeLengthDelimited, buf); + EncodeVarint(value.size(), buf); + buf.insert(buf.end(), value.begin(), value.end()); +} + +std::vector TtrpcEnvelopeCodec::EncodeRequestEnvelope( + const std::string& service, + const std::string& method, + const std::vector& payload) +{ + std::vector buf; + EncodeStringField(1, service, buf); + EncodeStringField(2, method, buf); + EncodeBytesField(3, payload, buf); + return buf; +} + +HRESULT TtrpcEnvelopeCodec::DecodeResponseEnvelope( + const std::vector& responseData, + DecodedResponse& decoded) +{ + decoded = {}; + + const uint8_t* ptr = responseData.data(); + const uint8_t* end = ptr + responseData.size(); + + while (ptr < end) + { + uint64_t tag = 0; + RETURN_IF_FAILED(ReadVarint(ptr, end, tag)); + + const uint32_t fieldNumber = static_cast(tag >> 3); + const uint32_t wireType = static_cast(tag & 0x7); + + if (wireType == c_wireTypeVarint) + { + uint64_t ignored = 0; + RETURN_IF_FAILED(ReadVarint(ptr, end, ignored)); + continue; + } + + if (wireType != c_wireTypeLengthDelimited) + { + return E_FAIL; + } + + uint64_t length = 0; + RETURN_IF_FAILED(ReadVarint(ptr, end, length)); + + RETURN_HR_IF_MSG(E_FAIL, length > static_cast(end - ptr), "ttrpc: response truncated"); + + if (fieldNumber == 1) + { + decoded.HasStatus = true; + const uint8_t* statusEnd = ptr + length; + + while (ptr < statusEnd) + { + uint64_t innerTag = 0; + RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, innerTag)); + + const uint32_t innerField = static_cast(innerTag >> 3); + const uint32_t innerWire = static_cast(innerTag & 0x7); + + if (innerWire == c_wireTypeVarint) + { + uint64_t value = 0; + RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, value)); + + if (innerField == 1) + { + decoded.StatusCode = static_cast(value); + } + } + else if (innerWire == c_wireTypeLengthDelimited) + { + uint64_t innerLength = 0; + RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, innerLength)); + + RETURN_HR_IF_MSG(E_FAIL, innerLength > static_cast(statusEnd - ptr), "ttrpc: status payload truncated"); + + if (innerField == 2) + { + decoded.StatusMessage.assign(reinterpret_cast(ptr), static_cast(innerLength)); + } + + ptr += innerLength; + } + else + { + ptr = statusEnd; + } + } + + continue; + } + + if (fieldNumber == 2) + { + RETURN_HR_IF_MSG( + E_FAIL, + length > c_maxMessageBytes, + "ttrpc: response payload too large: %llu bytes", + static_cast(length)); + + decoded.Payload.assign(ptr, ptr + length); + ptr += length; + continue; + } + + ptr += length; + } + + return S_OK; +} diff --git a/src/windows/service/exe/TtrpcEnvelopeCodec.h b/src/windows/service/exe/TtrpcEnvelopeCodec.h new file mode 100644 index 0000000000..7439d13004 --- /dev/null +++ b/src/windows/service/exe/TtrpcEnvelopeCodec.h @@ -0,0 +1,59 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +#pragma once + +#include +#include +#include + +namespace wsl::windows::service::wslc::detail { + +#pragma pack(push, 1) +struct TtrpcMessageHeader +{ + uint8_t Length[4]; // big-endian uint32 + uint8_t StreamId[4]; // big-endian uint32 + uint8_t MessageType; + uint8_t Flags; +}; +#pragma pack(pop) + +static_assert(sizeof(TtrpcMessageHeader) == 10, "ttrpc MessageHeader must be 10 bytes"); + +class TtrpcEnvelopeCodec +{ +public: + static constexpr uint8_t c_messageTypeRequest = 1; + static constexpr uint8_t c_messageTypeResponse = 2; + static constexpr uint32_t c_maxMessageBytes = 4 * 1024 * 1024; + + struct DecodedResponse + { + bool HasStatus = false; + int32_t StatusCode = 0; + std::string StatusMessage; + std::vector Payload; + }; + + static void WriteBigEndian32(uint8_t* dest, uint32_t value); + static uint32_t ReadBigEndian32(const uint8_t* src); + + static std::vector EncodeRequestEnvelope(const std::string& service, + const std::string& method, + const std::vector& payload); + + static HRESULT DecodeResponseEnvelope(const std::vector& responseData, + DecodedResponse& decoded); + +private: + static constexpr uint32_t c_wireTypeVarint = 0; + static constexpr uint32_t c_wireTypeLengthDelimited = 2; + + static HRESULT ReadVarint(const uint8_t*& ptr, const uint8_t* end, uint64_t& value); + static void EncodeVarint(uint64_t value, std::vector& buf); + static void EncodeTag(uint32_t field, uint32_t wireType, std::vector& buf); + static void EncodeStringField(uint32_t field, const std::string& value, std::vector& buf); + static void EncodeBytesField(uint32_t field, const std::vector& value, std::vector& buf); +}; + +} // namespace wsl::windows::service::wslc::detail diff --git a/src/windows/service/exe/WSLCSessionManager.cpp b/src/windows/service/exe/WSLCSessionManager.cpp index fc35438823..4dbc711fce 100644 --- a/src/windows/service/exe/WSLCSessionManager.cpp +++ b/src/windows/service/exe/WSLCSessionManager.cpp @@ -29,6 +29,9 @@ Module Name: #include "WSLCSessionManager.h" #include "HcsVirtualMachine.h" +#if WSL_INCLUDE_OPENVMM +#include "OpenVmmVirtualMachine.h" +#endif #include "WSLCUserSettings.h" #include "WSLCSessionDefaults.h" #include "WSLCPluginNotifier.h" @@ -42,6 +45,9 @@ extern wsl::windows::service::PluginManager g_pluginManager; using wsl::windows::common::COMServiceExecutionContext; using wsl::windows::service::wslc::CallingProcessTokenInfo; using wsl::windows::service::wslc::HcsVirtualMachine; +#if WSL_INCLUDE_OPENVMM +using wsl::windows::service::wslc::OpenVmmVirtualMachine; +#endif using wsl::windows::service::wslc::WSLCPluginNotifier; using wsl::windows::service::wslc::WSLCSessionManagerImpl; namespace wslutil = wsl::windows::common::wslutil; @@ -262,7 +268,36 @@ void WSLCSessionManagerImpl::CreateSession(const WSLCSessionSettings* Settings, g_pluginManager, sessionId, creatorPid, std::wstring(resolvedDisplayName), wil::shared_handle(sharedToken), std::vector(storedSid)); // Create the VM in the SYSTEM service (privileged). - auto vm = Microsoft::WRL::Make(Settings); + // Determine VMM backend based on user settings: OpenVMM (experimental) or HCS (default). + Microsoft::WRL::ComPtr vm; + +#if WSL_INCLUDE_OPENVMM + const bool useOpenVmm = SessionSettings::LoadUserSettings(userToken.get()).Get(); + + // For OpenVMM, disable unsupported features before creating the VM and session. + // The copy must outlive all uses of Settings below (CreateSessionSettings, etc.). + WSLCSessionSettings openVmmSettings; + if (useOpenVmm) + { + openVmmSettings = *Settings; + WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsGPU); + WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsVirtioFs); + WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsDnsTunneling); + + // OpenVMM provides networking via its built-in consomme backend. + // Use ConsommeNetworking mode so the session process skips GNS but + // still configures the networking engine and port relay. + openVmmSettings.NetworkingMode = WSLCNetworkingModeConsomme; + + Settings = &openVmmSettings; + + vm = Microsoft::WRL::Make(Settings); + } +#endif + if (!vm) + { + vm = Microsoft::WRL::Make(Settings); + } // Launch per-user COM server factory and add it to our job object for crash cleanup. auto factory = wslutil::CreateComServerAsUser(__uuidof(WSLCSessionFactory), userToken.get()); diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index 67bd148378..e048767f9b 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -422,7 +422,8 @@ typedef enum _WSLCNetworkingMode { WSLCNetworkingModeNone, WSLCNetworkingModeNAT, - WSLCNetworkingModeVirtioProxy + WSLCNetworkingModeVirtioProxy, + WSLCNetworkingModeConsomme } WSLCNetworkingMode; typedef enum _WSLCFeatureFlags @@ -481,6 +482,20 @@ interface IWSLCVirtualMachine : IUnknown // Returns an event that is signaled when the VM exits (graceful or forced). HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event); + + // Connects to a vsock port in the VM. Returns a socket handle that + // supports overlapped I/O (suitable for use with SocketChannel). + // For HCS VMs, this uses hvsocket. + // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge + // with a TCP loopback relay to provide overlapped I/O support. + HRESULT ConnectToVsockPort([in] ULONG Port, [out, system_handle(sh_socket)] HANDLE* Socket); + + // Accepts a crash dump connection from the VM. Blocks until a crash dump + // connection arrives or the VM exits. Returns E_ABORT if the VM exits + // before a connection is received. + // For HCS VMs, this uses an HV socket listener on the crash dump port. + // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge. + HRESULT AcceptCrashDumpConnection([out, system_handle(sh_socket)] HANDLE* Socket); } typedef enum _WSLCSessionStorageFlags diff --git a/src/windows/wslc/CMakeLists.txt b/src/windows/wslc/CMakeLists.txt index d4a4ba6236..283d72ff24 100644 --- a/src/windows/wslc/CMakeLists.txt +++ b/src/windows/wslc/CMakeLists.txt @@ -11,6 +11,10 @@ file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS ${SOURCE_PATTERNS}) add_library(wslclib OBJECT ${SOURCES} ${HEADERS}) target_include_directories(wslclib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${WSLC_SUBDIR_PATHS}) +if (INCLUDE_OPENVMM) + wsl_add_openvmm_proto(wslclib) +endif() + target_link_libraries(wslclib ${COMMON_LINK_LIBRARIES} yaml-cpp diff --git a/src/windows/wslcsession/DockerHTTPClient.cpp b/src/windows/wslcsession/DockerHTTPClient.cpp index e2e2c4385d..32c075f760 100644 --- a/src/windows/wslcsession/DockerHTTPClient.cpp +++ b/src/windows/wslcsession/DockerHTTPClient.cpp @@ -155,8 +155,8 @@ std::string DockerHTTPClient::URL::Escape(const std::string& Value) return wsl::shared::string::WideToMultiByte(escaped.c_str()); } -DockerHTTPClient::DockerHTTPClient(wsl::shared::SocketChannel&& Channel, HANDLE exitingEvent, GUID VmId, ULONG ConnectTimeoutMs) : - m_exitingEvent(exitingEvent), m_channel(std::move(Channel)), m_vmId(VmId), m_connectTimeoutMs(ConnectTimeoutMs) +DockerHTTPClient::DockerHTTPClient(wsl::shared::SocketChannel&& Channel, HANDLE exitingEvent, IWSLCVirtualMachine* Vm, ULONG ConnectTimeoutMs) : + m_exitingEvent(exitingEvent), m_channel(std::move(Channel)), m_vm(Vm), m_connectTimeoutMs(ConnectTimeoutMs) { } @@ -607,9 +607,11 @@ wil::unique_socket DockerHTTPClient::ConnectSocket() THROW_HR_IF_MSG(E_FAIL, response.Pid <= 0, "fork() returned %i", response.Pid); - // Connect the new hvsocket. + // Connect the new socket via the VM interface. + wil::unique_socket connSocket; + THROW_IF_FAILED(m_vm->ConnectToVsockPort(response.Port, reinterpret_cast(&connSocket))); wsl::shared::SocketChannel newChannel{ - wsl::windows::common::hvsocket::Connect(m_vmId, response.Port, m_exitingEvent, m_connectTimeoutMs), "DockerClient", {m_exitingEvent}}; + std::move(connSocket), "DockerClient", {m_exitingEvent}}; lock.reset(); // Connect that socket to the docker unix socket. diff --git a/src/windows/wslcsession/DockerHTTPClient.h b/src/windows/wslcsession/DockerHTTPClient.h index 2ad348dae2..572ed8c3dd 100644 --- a/src/windows/wslcsession/DockerHTTPClient.h +++ b/src/windows/wslcsession/DockerHTTPClient.h @@ -109,8 +109,19 @@ class DockerHTTPClient HTTPRequestContext(wil::unique_socket&& Socket) : stream(context) { - boost::asio::generic::stream_protocol hv_proto(AF_HYPERV, SOCK_STREAM); - stream.assign(hv_proto, Socket.release()); + // Detect the socket's address family to create the correct protocol descriptor. + // HCS returns AF_HYPERV sockets; OpenVMM returns AF_INET (TCP loopback relay). + WSAPROTOCOL_INFOW protocolInfo{}; + int infoLen = sizeof(protocolInfo); + int family = AF_INET; // Default to TCP/IPv4. + if (getsockopt(Socket.get(), SOL_SOCKET, SO_PROTOCOL_INFOW, + reinterpret_cast(&protocolInfo), &infoLen) == 0) + { + family = protocolInfo.iAddressFamily; + } + + boost::asio::generic::stream_protocol proto(family, SOCK_STREAM); + stream.assign(proto, Socket.release()); } boost::asio::io_context context; @@ -119,7 +130,7 @@ class DockerHTTPClient using HTTPResponse = boost::beast::http::message; - DockerHTTPClient(wsl::shared::SocketChannel&& Channel, HANDLE ExitingEvent, GUID VmId, ULONG ConnectTimeoutMs); + DockerHTTPClient(wsl::shared::SocketChannel&& Channel, HANDLE ExitingEvent, IWSLCVirtualMachine* Vm, ULONG ConnectTimeoutMs); // Container management. std::vector ListContainers( @@ -282,7 +293,7 @@ class DockerHTTPClient } ULONG m_connectTimeoutMs{}; - GUID m_vmId; + wil::com_ptr m_vm; shared::SocketChannel m_channel; HANDLE m_exitingEvent; wil::srwlock m_lock; diff --git a/src/windows/wslcsession/WSLCSession.cpp b/src/windows/wslcsession/WSLCSession.cpp index f9ca9ffd65..97de532e6b 100644 --- a/src/windows/wslcsession/WSLCSession.cpp +++ b/src/windows/wslcsession/WSLCSession.cpp @@ -304,7 +304,7 @@ try auto [_, __, channel] = m_virtualMachine->Fork(WSLC_FORK::Thread); - m_dockerClient.emplace(std::move(channel), m_virtualMachine->TerminatingEvent(), m_virtualMachine->VmId(), 10 * 1000); + m_dockerClient.emplace(std::move(channel), m_virtualMachine->TerminatingEvent(), m_virtualMachine->Vm(), 10 * 1000); // Start the event tracker. m_eventTracker.emplace(m_dockerClient.value(), *this, m_ioRelay); @@ -401,7 +401,25 @@ void WSLCSession::ConfigureStorage(const WSLCSessionInitSettings& Settings, PSID } // Mount the device to /root. - m_virtualMachine->Mount(diskDevice.c_str(), c_containerdStorage, "ext4", "", 0); + // If the mount fails (e.g., unformatted disk from pre-attach), format and retry. + auto mountResult = wil::ResultFromException([&]() { + m_virtualMachine->Mount(diskDevice.c_str(), c_containerdStorage, "ext4", "", 0); + }); + + if (FAILED(mountResult) && !vhdCreated) + { + // The disk exists but may be unformatted (pre-attached by OpenVMM backend). + WSL_LOG("StorageMountFailedFormatting", + TraceLoggingValue(diskDevice.c_str(), "Device"), + TraceLoggingValue(mountResult, "MountResult")); + + m_virtualMachine->Ext4Format(diskDevice); + m_virtualMachine->Mount(diskDevice.c_str(), c_containerdStorage, "ext4", "", 0); + } + else + { + THROW_IF_FAILED(mountResult); + } // Configure swap on a separate ephemeral VHD. if (Settings.SwapSizeMb > 0) diff --git a/src/windows/wslcsession/WSLCVirtualMachine.cpp b/src/windows/wslcsession/WSLCVirtualMachine.cpp index c6416e5a4a..aadbe65b39 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.cpp +++ b/src/windows/wslcsession/WSLCVirtualMachine.cpp @@ -21,6 +21,7 @@ Module Name: #include "ServiceProcessLauncher.h" #include "wslutil.h" #include "lxinitshared.h" +#include "ConsommeNetworking.h" using namespace wsl::windows::common; using wsl::windows::service::wslc::TypedHandle; @@ -264,10 +265,10 @@ void WSLCVirtualMachine::Initialize() THROW_IF_FAILED(m_vm->GetId(&m_vmId)); // Start crash dump collection thread. - auto crashDumpSocket = hvsocket::Listen(m_vmId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT); - THROW_LAST_ERROR_IF(!crashDumpSocket); - - m_crashDumpThread = std::thread{[this, socket = std::move(crashDumpSocket)]() mutable { CollectCrashDumps(std::move(socket)); }}; + // The VM backend handles the listen socket creation (HV socket for HCS, + // Unix domain socket for OpenVMM). AcceptCrashDumpConnection blocks until + // a connection arrives or the VM exits. + m_crashDumpThread = std::thread{[this]() { CollectCrashDumps(); }}; // Establish a socket channel with mini_init in the VM. wil::unique_socket socket; @@ -339,6 +340,39 @@ void WSLCVirtualMachine::ConfigureNetworking() return; } + if (m_networkingMode == WSLCNetworkingModeConsomme) + { + // Consomme networking: no GNS daemon needed. The VMM provides NAT, + // DHCP, and DNS directly via the virtio-net device. + // + // Send a message to mini_init to configure the guest's network + // interface statically. This must happen before containerd/dockerd + // start, as they cache DNS from /etc/resolv.conf at launch. + auto address = std::format("{}/{}", wsl::core::c_consommeGuestIp, wsl::core::c_consommeSubnetMask); + + wsl::shared::MessageWriter netMessage; + netMessage.WriteString(netMessage->InterfaceOffset, wsl::core::c_consommeInterface); + netMessage.WriteString(netMessage->AddressOffset, address); + netMessage.WriteString(netMessage->GatewayOffset, wsl::core::c_consommeGatewayIp); + netMessage.WriteString(netMessage->DnsServerOffset, wsl::core::c_consommeGatewayIp); + + const auto& response = m_initChannel.Transaction(netMessage.Span()); + THROW_HR_IF_MSG(E_FAIL, response.Result != 0, "Consomme guest network setup failed: %d", response.Result); + + WSL_LOG("ConsommeConfigureGuestNetwork", TraceLoggingValue(response.Result, "Result")); + + // No ConfigureNetworking COM call needed — ConsommeNetworking is + // initialized eagerly in the OpenVmmVirtualMachine constructor + // (the system_handle IDL attribute can't marshal INVALID_HANDLE_VALUE). + + // Skip LaunchPortRelay — the relay uses wslrelay.exe which connects + // via hvsocket (m_vmId), and OpenVMM/WHP VMs are not registered with + // the HvSocket driver. Port forwarding for OpenVMM will need a + // different mechanism (e.g. consomme's own NAT port forwarding). + // TODO: Implement port forwarding for OpenVMM consomme backend. + return; + } + // Launch /gns with auto-allocated file descriptors for the GNS channel (and DNS channel if enabled). std::vector fds; fds.emplace_back(WSLCProcessFd{.Fd = -1, .Type = WSLCFdType::WSLCFdTypeDefault}); @@ -564,7 +598,8 @@ std::tuple WSLCVirtualMachine::For THROW_HR_IF_MSG(E_FAIL, pid <= 0, "fork() returned %i", pid); - auto socket = wsl::windows::common::hvsocket::Connect(m_vmId, port, m_vmTerminatingEvent.get(), m_initChannelTimeout); + wil::unique_socket socket; + THROW_IF_FAILED(m_vm->ConnectToVsockPort(port, reinterpret_cast(&socket))); return std::make_tuple( pid, ptyMaster, wsl::shared::SocketChannel{std::move(socket), std::to_string(pid), std::vector(Channel.GetExitEvents())}); @@ -580,7 +615,7 @@ WSLCVirtualMachine::ConnectedSocket WSLCVirtualMachine::ConnectSocket(wsl::share const auto& response = transaction.Receive(); ConnectedSocket socket; - socket.Socket = wsl::windows::common::hvsocket::Connect(m_vmId, response.Result, m_vmTerminatingEvent.get(), m_initChannelTimeout); + THROW_IF_FAILED(m_vm->ConnectToVsockPort(response.Result, reinterpret_cast(&socket.Socket))); // If the FD was unspecified, read the Linux file descriptor from the guest. if (Fd == -1) @@ -1210,7 +1245,7 @@ wil::unique_socket WSLCVirtualMachine::ConnectUnixSocket(const char* Path) return channel.Release(); } -void WSLCVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) +void WSLCVirtualMachine::CollectCrashDumps() { // No impersonation needed - the session process already runs as the user. wslutil::SetThreadDescription(L"CrashDumpCollection"); @@ -1221,18 +1256,20 @@ void WSLCVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) { try { - auto socket = hvsocket::CancellableAccept(listenSocket.get(), INFINITE, m_vmTerminatingEvent.get()); - if (!socket) + wil::unique_socket socket; + HRESULT hr = m_vm->AcceptCrashDumpConnection(reinterpret_cast(&socket)); + if (hr == E_ABORT) { // VM is exiting. break; } + THROW_IF_FAILED(hr); constexpr DWORD timeout = 30 * 1000; - THROW_LAST_ERROR_IF(setsockopt(socket->get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout, sizeof(timeout)) == SOCKET_ERROR); + THROW_LAST_ERROR_IF(setsockopt(socket.get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout, sizeof(timeout)) == SOCKET_ERROR); auto channel = wsl::shared::SocketChannel{ - std::move(socket.value()), "crash_dump", {m_vmTerminatingEvent.get(), m_sessionTerminatingEvent}}; + std::move(socket), "crash_dump", {m_vmTerminatingEvent.get(), m_sessionTerminatingEvent}}; auto transaction = channel.ReceiveTransaction(); gsl::span responseSpan; diff --git a/src/windows/wslcsession/WSLCVirtualMachine.h b/src/windows/wslcsession/WSLCVirtualMachine.h index 2b47cfb98f..0b788d0378 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.h +++ b/src/windows/wslcsession/WSLCVirtualMachine.h @@ -167,6 +167,11 @@ class WSLCVirtualMachine return m_vmId; } + IWSLCVirtualMachine* Vm() const + { + return m_vm.get(); + } + bool FeatureEnabled(WSLCFeatureFlags Flag) const; private: @@ -197,7 +202,7 @@ class WSLCVirtualMachine void WatchForExitedProcesses(wsl::shared::SocketChannel& Channel); - void CollectCrashDumps(wil::unique_socket&& listenSocket); + void CollectCrashDumps(); struct AttachedDisk { From b2bad508fa479007bd9c9652019d2edb553d7b74 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Thu, 21 May 2026 13:09:46 -0700 Subject: [PATCH 02/10] Get rid of TCP relay sockets --- src/shared/inc/SocketChannel.h | 244 +++++++++++++++++- .../service/exe/OpenVmmVirtualMachine.cpp | 204 +-------------- .../service/exe/OpenVmmVirtualMachine.h | 6 - src/windows/service/inc/wslc.idl | 9 +- src/windows/wslcsession/DockerHTTPClient.h | 2 +- .../wslcsession/WSLCVirtualMachine.cpp | 24 +- 6 files changed, 280 insertions(+), 209 deletions(-) diff --git a/src/shared/inc/SocketChannel.h b/src/shared/inc/SocketChannel.h index b202217367..4a66f61d15 100644 --- a/src/shared/inc/SocketChannel.h +++ b/src/shared/inc/SocketChannel.h @@ -110,6 +110,7 @@ class SocketChannel #ifdef WIN32 m_exitEvents = std::move(other.m_exitEvents); + m_blockingIO = other.m_blockingIO; #endif m_ignore_sequence = other.m_ignore_sequence; m_sent_non_transaction_messages = other.m_sent_non_transaction_messages; @@ -126,7 +127,8 @@ class SocketChannel #ifdef WIN32 SocketChannel(TSocket&& socket, std::string&& name, std::vector&& exitEvents) : - m_socket(std::move(socket)), m_exitEvents(std::move(exitEvents)), m_name(std::move(name)) + m_socket(std::move(socket)), m_exitEvents(std::move(exitEvents)), m_name(std::move(name)), + m_blockingIO(IsNonOverlappedSocket(m_socket.get())) { } @@ -142,6 +144,11 @@ class SocketChannel return m_exitEvents; } + bool IsBlockingIO() const + { + return m_blockingIO; + } + #endif template @@ -183,10 +190,17 @@ class SocketChannel #ifdef WIN32 - auto io = CreateIO(); - io.AddHandle(std::make_unique(m_socket.get(), span)); + if (m_blockingIO) + { + BlockingSend(span, timeout); + } + else + { + auto io = CreateIO(); + io.AddHandle(std::make_unique(m_socket.get(), span)); - io.Run(TimeoutToMilliseconds(timeout)); + io.Run(TimeoutToMilliseconds(timeout)); + } WSL_LOG( "SentMessage", @@ -631,8 +645,27 @@ class SocketChannel return std::chrono::milliseconds{timeout}; } + // Returns true if the socket does not support overlapped I/O (e.g. AF_UNIX on Windows). + static bool IsNonOverlappedSocket(SOCKET s) + { + WSAPROTOCOL_INFOW protocolInfo{}; + int infoLen = sizeof(protocolInfo); + if (getsockopt(s, SOL_SOCKET, SO_PROTOCOL_INFOW, + reinterpret_cast(&protocolInfo), &infoLen) == 0) + { + return protocolInfo.iAddressFamily == AF_UNIX; + } + + return false; + } + gsl::span ReceiveImpl(TTimeout timeout) { + if (m_blockingIO) + { + return BlockingReceive(timeout); + } + auto io = CreateIO(); gsl::span message; @@ -645,6 +678,208 @@ class SocketChannel return message; } + // Blocking send for sockets that do not support overlapped I/O (e.g. AF_UNIX on Windows). + // Handles WSAEWOULDBLOCK gracefully since the socket may be in non-blocking mode + // if a concurrent BlockingReceive is active (WSAEventSelect sets non-blocking). + void BlockingSend(gsl::span span, TTimeout timeout) + { + size_t offset = 0; + while (offset < span.size()) + { + int sent = ::send( + m_socket.get(), + reinterpret_cast(span.data() + offset), + static_cast(span.size() - offset), + 0); + + if (sent == SOCKET_ERROR) + { + if (WSAGetLastError() == WSAEWOULDBLOCK) + { + // Socket is temporarily in non-blocking mode. Wait for writability. + fd_set writeSet; + FD_ZERO(&writeSet); + FD_SET(m_socket.get(), &writeSet); + timeval tv{1, 0}; + if (select(0, nullptr, &writeSet, nullptr, &tv) > 0) + { + continue; + } + + THROW_HR_MSG(E_FAIL, "BlockingSend timed out waiting for writability on channel: %hs", m_name.c_str()); + } + + THROW_LAST_ERROR_MSG("BlockingSend failed on channel: %hs", m_name.c_str()); + } + + THROW_HR_IF_MSG(E_UNEXPECTED, sent == 0, "Socket closed during BlockingSend on channel: %hs", m_name.c_str()); + + offset += sent; + } + } + + // Blocking receive for sockets that do not support overlapped I/O (e.g. AF_UNIX on Windows). + // Uses WSAEventSelect + WaitForMultipleObjects to integrate exit event cancellation + // with non-blocking recv() on the data socket. + gsl::span BlockingReceive(TTimeout timeout) + { + // Set up a WSA event to detect when data is available or the socket closes. + wil::unique_event socketEvent(wil::EventOptions::ManualReset); + THROW_LAST_ERROR_IF( + WSAEventSelect(m_socket.get(), socketEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR); + + // Restore the socket to blocking mode on exit (WSAEventSelect sets non-blocking). + auto restoreBlocking = wil::scope_exit([&] { + WSAEventSelect(m_socket.get(), nullptr, 0); + u_long nonBlocking = 0; + ioctlsocket(m_socket.get(), FIONBIO, &nonBlocking); + }); + + // Build wait handle array: exit events first, then socket event. + std::vector waitHandles; + waitHandles.reserve(m_exitEvents.size() + 1); + for (const auto event : m_exitEvents) + { + waitHandles.push_back(event); + } + const DWORD socketEventIndex = static_cast(waitHandles.size()); + waitHandles.push_back(socketEvent.get()); + + auto messageSize = sizeof(MESSAGE_HEADER); + if (m_buffer.size() < messageSize) + { + m_buffer.resize(messageSize); + } + + size_t bytesNeeded = sizeof(MESSAGE_HEADER); + size_t currentOffset = 0; + bool readingHeader = true; + + for (;;) + { + // Try to read data that may already be buffered before waiting. + // This is critical because WSAEventSelect only signals on state + // transitions; data that arrived before the call would be missed + // if we waited first. + while (bytesNeeded > 0) + { + int received = ::recv( + m_socket.get(), + reinterpret_cast(m_buffer.data() + currentOffset), + static_cast(bytesNeeded), + 0); + + if (received == SOCKET_ERROR) + { + auto error = WSAGetLastError(); + if (error == WSAEWOULDBLOCK) + { + break; // No more data available, need to wait. + } + + if (error == WSAECONNABORTED || error == WSAECONNRESET) + { + return {}; // Clean close. + } + + THROW_WIN32(error); + } + + if (received == 0) + { + // Socket closed. + THROW_HR_IF_MSG( + E_UNEXPECTED, + currentOffset > 0, + "Socket closed mid-message during BlockingReceive. Offset: %zu, Remaining: %zu, channel: %hs", + currentOffset, + bytesNeeded, + m_name.c_str()); + + return {}; + } + + currentOffset += received; + bytesNeeded -= received; + } + + // When the header is fully read, parse the message size and set up for the body. + if (readingHeader && bytesNeeded == 0) + { + messageSize = gslhelpers::get_struct( + gsl::make_span(m_buffer.data(), sizeof(MESSAGE_HEADER)))->MessageSize; + + THROW_HR_IF_MSG(E_UNEXPECTED, messageSize < sizeof(MESSAGE_HEADER), + "Unexpected message size: %zu on channel: %hs", messageSize, m_name.c_str()); + THROW_HR_IF_MSG(E_UNEXPECTED, messageSize > 4 * 1024 * 1024, + "Message size too large: %zu on channel: %hs", messageSize, m_name.c_str()); + + if (messageSize > sizeof(MESSAGE_HEADER)) + { + if (m_buffer.size() < messageSize) + { + m_buffer.resize(messageSize); + } + + readingHeader = false; + bytesNeeded = messageSize - sizeof(MESSAGE_HEADER); + continue; // Try to read body data immediately. + } + } + + // Message complete. + if (bytesNeeded == 0) + { + break; + } + + // No data available (WSAEWOULDBLOCK). Wait for data or exit event. + // WSAEnumNetworkEvents atomically resets the event — never call + // ResetEvent manually, as that can clear a legitimate signal. + DWORD waitTimeout = (timeout == INFINITE) ? INFINITE : timeout; + auto waitResult = WaitForMultipleObjects( + static_cast(waitHandles.size()), waitHandles.data(), FALSE, waitTimeout); + + if (waitResult == WAIT_TIMEOUT) + { + THROW_HR_MSG( + HCS_E_CONNECTION_TIMEOUT, + "BlockingReceive timeout on channel: %hs", + m_name.c_str()); + } + + // An exit event was signaled. + if (waitResult >= WAIT_OBJECT_0 && waitResult < WAIT_OBJECT_0 + socketEventIndex) + { + THROW_HR_MSG(E_ABORT, "Exit event signaled during BlockingReceive on channel: %hs", m_name.c_str()); + } + + THROW_HR_IF(E_UNEXPECTED, waitResult < WAIT_OBJECT_0 || waitResult > WAIT_OBJECT_0 + socketEventIndex); + + // Reset the event and check what happened. + WSANETWORKEVENTS netEvents{}; + THROW_LAST_ERROR_IF( + WSAEnumNetworkEvents(m_socket.get(), socketEvent.get(), &netEvents) != 0); + + if (netEvents.lNetworkEvents & FD_CLOSE) + { + THROW_HR_IF_MSG( + E_UNEXPECTED, + currentOffset > 0, + "Socket closed mid-message during BlockingReceive. Offset: %zu, Remaining: %zu, channel: %hs", + currentOffset, + bytesNeeded, + m_name.c_str()); + + return {}; + } + + // FD_READ signaled — loop back to recv. + } + + return gsl::make_span(m_buffer.data(), messageSize); + } + #else gsl::span ReceiveImpl(TTimeout timeout) @@ -723,6 +958,7 @@ class SocketChannel #ifdef WIN32 std::vector m_exitEvents; + bool m_blockingIO = false; #endif uint32_t m_sent_non_transaction_messages = 0; diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 12f8bd5c3e..b5d322fe0c 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -418,29 +418,6 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() m_processWatchThread.join(); } - // Join relay threads. - if (m_initRelayThread.joinable()) - { - m_initRelayThread.join(); - } - - if (m_crashDumpRelayThread.joinable()) - { - m_crashDumpRelayThread.join(); - } - - { - std::lock_guard lock(m_relayLock); - for (auto& t : m_relayThreads) - { - if (t.joinable()) - { - t.join(); - } - } - m_relayThreads.clear(); - } - if (m_initListenSocket != INVALID_SOCKET) { closesocket(m_initListenSocket); @@ -482,160 +459,6 @@ try } CATCH_RETURN() -// Bidirectional relay between an AF_UNIX socket and a TCP socket. -// Runs until either socket closes or exitEvent is signaled. -// Takes ownership of both sockets. -constexpr size_t c_relayBufferSize = 65536; - -static void RelaySocketData(SOCKET unixSock, SOCKET tcpSock, HANDLE exitEvent) -{ - auto cleanup = wil::scope_exit([&] { - closesocket(unixSock); - closesocket(tcpSock); - }); - - // Use WSA events to wait efficiently instead of polling with select(). - // WSAEventSelect puts sockets in non-blocking mode; we only recv when - // data is available and handle WSAEWOULDBLOCK on sends. - wil::unique_event unixEvent(wil::EventOptions::ManualReset); - wil::unique_event tcpEvent(wil::EventOptions::ManualReset); - - if (WSAEventSelect(unixSock, unixEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR || - WSAEventSelect(tcpSock, tcpEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR) - { - return; - } - - char buffer[c_relayBufferSize]; - HANDLE waitHandles[] = {exitEvent, unixEvent.get(), tcpEvent.get()}; - - // Relay data from one socket to another. Returns false if the relay should stop. - auto relayData = [&](SOCKET from, SOCKET to, HANDLE event) -> bool { - WSANETWORKEVENTS netEvents{}; - if (WSAEnumNetworkEvents(from, event, &netEvents) != 0) - { - return true; - } - - if (netEvents.lNetworkEvents & FD_READ) - { - for (;;) - { - int bytes = recv(from, buffer, sizeof(buffer), 0); - if (bytes == SOCKET_ERROR) - { - if (WSAGetLastError() == WSAEWOULDBLOCK) - { - break; // No more data available - } - return false; - } - if (bytes == 0) - { - return false; - } - - int sent = 0; - while (sent < bytes) - { - int n = send(to, buffer + sent, bytes - sent, 0); - if (n == SOCKET_ERROR) - { - if (WSAGetLastError() == WSAEWOULDBLOCK) - { - // Wait briefly for the send buffer to drain. - fd_set writeSet; - FD_ZERO(&writeSet); - FD_SET(to, &writeSet); - timeval tv{1, 0}; - if (select(0, nullptr, &writeSet, nullptr, &tv) <= 0) - { - return false; - } - continue; - } - return false; - } - if (n == 0) - { - return false; - } - sent += n; - } - } - } - - if (netEvents.lNetworkEvents & FD_CLOSE) - { - return false; - } - - return true; - }; - - while (true) - { - auto waitResult = WaitForMultipleObjects(ARRAYSIZE(waitHandles), waitHandles, FALSE, INFINITE); - if (waitResult == WAIT_OBJECT_0 || waitResult == WAIT_FAILED) - { - break; - } - - // Always check both directions — multiple events may be signaled. - if (!relayData(unixSock, tcpSock, unixEvent.get()) || - !relayData(tcpSock, unixSock, tcpEvent.get())) - { - break; - } - } -} - -// Creates a TCP loopback socket pair and starts a relay thread between an -// AF_UNIX socket and the TCP server socket. Returns the TCP client socket -// (which supports overlapped I/O) and the relay thread. The relay thread -// takes ownership of both the Unix socket and the TCP server socket. -static std::pair CreateRelayedSocket( - _In_ SOCKET unixSock, _In_ HANDLE exitEvent) -{ - // Create a TCP loopback listener on an ephemeral port. - SOCKET tcpListener = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - THROW_LAST_ERROR_IF(tcpListener == INVALID_SOCKET); - auto closeListener = wil::scope_exit([&] { closesocket(tcpListener); }); - - sockaddr_in loopback{}; - loopback.sin_family = AF_INET; - loopback.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - loopback.sin_port = 0; - THROW_LAST_ERROR_IF(bind(tcpListener, reinterpret_cast(&loopback), sizeof(loopback)) == SOCKET_ERROR); - THROW_LAST_ERROR_IF(listen(tcpListener, 1) == SOCKET_ERROR); - - // Get the port that was assigned. - sockaddr_in boundAddr{}; - int addrLen = sizeof(boundAddr); - THROW_LAST_ERROR_IF(getsockname(tcpListener, reinterpret_cast(&boundAddr), &addrLen) == SOCKET_ERROR); - - // Connect a TCP client to the listener. - SOCKET tcpClient = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - THROW_LAST_ERROR_IF(tcpClient == INVALID_SOCKET); - auto closeClient = wil::scope_exit([&] { closesocket(tcpClient); }); - - THROW_LAST_ERROR_IF(connect(tcpClient, reinterpret_cast(&boundAddr), sizeof(boundAddr)) == SOCKET_ERROR); - - // Accept the server-side connection. - SOCKET tcpServer = accept(tcpListener, nullptr, nullptr); - THROW_LAST_ERROR_IF(tcpServer == INVALID_SOCKET); - - closeListener.release(); - closesocket(tcpListener); - - // Start a relay thread that takes ownership of unixSock and tcpServer. - auto relayThread = std::thread(RelaySocketData, unixSock, tcpServer, exitEvent); - - // Return the TCP client socket — this supports overlapped I/O. - closeClient.release(); - return {wil::unique_socket(tcpClient), std::move(relayThread)}; -} - HRESULT OpenVmmVirtualMachine::AcceptConnection(_Out_ HANDLE* Socket) try { @@ -659,10 +482,10 @@ try m_initListenSocket = INVALID_SOCKET; DeleteFileW(m_initListenPath.c_str()); - // Bridge AF_UNIX to TCP loopback for overlapped I/O support. - auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); - m_initRelayThread = std::move(relayThread); - *Socket = reinterpret_cast(tcpSocket.release()); + // Return the AF_UNIX socket directly. Callers that wrap it in a + // SocketChannel should use blocking I/O mode since AF_UNIX on Windows + // does not support overlapped I/O. + *Socket = reinterpret_cast(unixSock); return S_OK; } CATCH_RETURN() @@ -819,14 +642,11 @@ try TraceLoggingValue(Port, "Port"), TraceLoggingValue(response, "Response")); - // Bridge AF_UNIX to TCP loopback for overlapped I/O support. + // Return the AF_UNIX socket directly. Callers that wrap it in a + // SocketChannel should use blocking I/O mode since AF_UNIX on Windows + // does not support overlapped I/O. closeUnix.release(); - auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); - { - std::lock_guard lock(m_relayLock); - m_relayThreads.emplace_back(std::move(relayThread)); - } - *Socket = reinterpret_cast(tcpSocket.release()); + *Socket = reinterpret_cast(unixSock); return S_OK; } CATCH_RETURN() @@ -850,10 +670,10 @@ try SOCKET unixSock = accept(m_crashDumpListenSocket, nullptr, nullptr); THROW_LAST_ERROR_IF(unixSock == INVALID_SOCKET); - // Bridge AF_UNIX to TCP loopback for overlapped I/O support. - auto [tcpSocket, relayThread] = CreateRelayedSocket(unixSock, m_vmExitEvent.get()); - m_crashDumpRelayThread = std::move(relayThread); - *Socket = reinterpret_cast(tcpSocket.release()); + // Return the AF_UNIX socket directly. Callers that wrap it in a + // SocketChannel should use blocking I/O mode since AF_UNIX on Windows + // does not support overlapped I/O. + *Socket = reinterpret_cast(unixSock); return S_OK; } CATCH_RETURN() diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.h b/src/windows/service/exe/OpenVmmVirtualMachine.h index c45adedced..7796fad37b 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.h +++ b/src/windows/service/exe/OpenVmmVirtualMachine.h @@ -137,12 +137,6 @@ class OpenVmmVirtualMachine wil::unique_event m_vmExitEvent{wil::EventOptions::ManualReset}; - // Relay threads bridging AF_UNIX sockets to TCP loopback for overlapped I/O. - std::thread m_initRelayThread; - std::thread m_crashDumpRelayThread; - std::mutex m_relayLock; - std::vector m_relayThreads; - std::map m_attachedDisks; std::bitset m_lunBitmap; diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index e048767f9b..f4b13be970 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -483,11 +483,10 @@ interface IWSLCVirtualMachine : IUnknown // Returns an event that is signaled when the VM exits (graceful or forced). HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event); - // Connects to a vsock port in the VM. Returns a socket handle that - // supports overlapped I/O (suitable for use with SocketChannel). - // For HCS VMs, this uses hvsocket. - // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge - // with a TCP loopback relay to provide overlapped I/O support. + // Connects to a vsock port in the VM. Returns a socket handle. + // For HCS VMs, this uses hvsocket (supports overlapped I/O). + // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge. + // AF_UNIX sockets do not support overlapped I/O HRESULT ConnectToVsockPort([in] ULONG Port, [out, system_handle(sh_socket)] HANDLE* Socket); // Accepts a crash dump connection from the VM. Blocks until a crash dump diff --git a/src/windows/wslcsession/DockerHTTPClient.h b/src/windows/wslcsession/DockerHTTPClient.h index 572ed8c3dd..b7c4e1fe9e 100644 --- a/src/windows/wslcsession/DockerHTTPClient.h +++ b/src/windows/wslcsession/DockerHTTPClient.h @@ -110,7 +110,7 @@ class DockerHTTPClient HTTPRequestContext(wil::unique_socket&& Socket) : stream(context) { // Detect the socket's address family to create the correct protocol descriptor. - // HCS returns AF_HYPERV sockets; OpenVMM returns AF_INET (TCP loopback relay). + // HCS returns AF_HYPERV sockets; OpenVMM returns AF_UNIX sockets. WSAPROTOCOL_INFOW protocolInfo{}; int infoLen = sizeof(protocolInfo); int family = AF_INET; // Default to TCP/IPv4. diff --git a/src/windows/wslcsession/WSLCVirtualMachine.cpp b/src/windows/wslcsession/WSLCVirtualMachine.cpp index aadbe65b39..e3d96527fc 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.cpp +++ b/src/windows/wslcsession/WSLCVirtualMachine.cpp @@ -1318,7 +1318,29 @@ void WSLCVirtualMachine::CollectCrashDumps() THROW_LAST_ERROR_IF(!file); transaction.SendResultMessage(0); - relay::InterruptableRelay(reinterpret_cast(channel.Socket()), file.get(), nullptr); + + // InterruptableRelay uses overlapped I/O which is not supported on AF_UNIX. + // Use a simple blocking recv-to-write loop for sockets in blocking I/O mode. + if (channel.IsBlockingIO()) + { + constexpr size_t bufferSize = 65536; + std::vector buf(bufferSize); + for (;;) + { + int bytesRead = ::recv(channel.Socket(), buf.data(), static_cast(buf.size()), 0); + if (bytesRead <= 0) + { + break; + } + + DWORD bytesWritten{}; + THROW_IF_WIN32_BOOL_FALSE(WriteFile(file.get(), buf.data(), static_cast(bytesRead), &bytesWritten, nullptr)); + } + } + else + { + relay::InterruptableRelay(reinterpret_cast(channel.Socket()), file.get(), nullptr); + } } CATCH_LOG() } From 6a9a01fb273a66cd8abc4fce4e08e29e763908d4 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Thu, 21 May 2026 14:40:56 -0700 Subject: [PATCH 03/10] logging fix --- .../service/exe/OpenVmmVirtualMachine.cpp | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index b5d322fe0c..7dd5d93289 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -329,6 +329,25 @@ void OpenVmmVirtualMachine::LaunchOpenVmm() SubProcess process(m_openvmmPath.c_str(), cmd.c_str()); + // Set OPENVMM_LOG so the openvmm tracing subscriber emits detailed logs. + // Without this, only INFO-level messages appear (the default), which omits + // most operational output from VM creation and runtime. + // The variable is set in the current process environment and inherited by the + // child; restore it after Start() to avoid polluting the service environment. + wil::unique_hlocal_string previousLog; + DWORD prevLen = GetEnvironmentVariableW(L"OPENVMM_LOG", nullptr, 0); + if (prevLen > 0) + { + previousLog.reset(static_cast(LocalAlloc(LMEM_FIXED, prevLen * sizeof(WCHAR)))); + THROW_IF_NULL_ALLOC(previousLog.get()); + GetEnvironmentVariableW(L"OPENVMM_LOG", previousLog.get(), prevLen); + } + + SetEnvironmentVariableW(L"OPENVMM_LOG", L"info,openvmm=debug"); + auto restoreEnv = wil::scope_exit([&] { + SetEnvironmentVariableW(L"OPENVMM_LOG", previousLog.get()); + }); + // Redirect stdout and stderr to a log file for diagnostics. SECURITY_ATTRIBUTES sa{sizeof(sa), nullptr, TRUE}; auto logPath = m_vsockPath.wstring() + L".log"; @@ -336,7 +355,17 @@ void OpenVmmVirtualMachine::LaunchOpenVmm() logPath.c_str(), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, &sa, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)}; - process.SetStdHandles(nullptr, logFile.get(), logFile.get()); + // Duplicate the log file handle for stderr so that stdout and stderr are + // independent. OpenVMM closes stdout after startup (pal::close_stdout), + // and if both handles share the same value that also invalidates stderr, + // silencing all tracing output. + wil::unique_hfile logFileForStderr; + THROW_IF_WIN32_BOOL_FALSE(DuplicateHandle( + GetCurrentProcess(), logFile.get(), + GetCurrentProcess(), logFileForStderr.put(), + 0, TRUE, DUPLICATE_SAME_ACCESS)); + + process.SetStdHandles(nullptr, logFile.get(), logFileForStderr.get()); // Start the process. The returned handle is the process handle. m_processHandle = process.Start(); From 0f890429fb20e19ed9fd198103ec21f7d14abcb3 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 22 May 2026 15:45:48 -0700 Subject: [PATCH 04/10] bump nuget package --- packages.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages.config b/packages.config index 2acea0ff50..e28ae3bcac 100644 --- a/packages.config +++ b/packages.config @@ -19,7 +19,7 @@ - + From 74b18c9942e5060f4a89b5e5ad977057f00cb3fe Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 27 May 2026 11:28:49 -0700 Subject: [PATCH 05/10] implement add share and improve local development story --- CMakeLists.txt | 38 +++ UserConfig.cmake.sample | 7 + .../devicehost/proto/VMService.proto | 312 ++++++++++++++++++ .../service/exe/OpenVmmVirtualMachine.cpp | 31 +- src/windows/service/exe/TtrpcClient.cpp | 38 +++ src/windows/service/exe/TtrpcClient.h | 6 + 6 files changed, 426 insertions(+), 6 deletions(-) create mode 100644 local_overrides/devicehost/proto/VMService.proto diff --git a/CMakeLists.txt b/CMakeLists.txt index 05f519d2eb..6153863ff0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,7 @@ find_nuget_package(Microsoft.RemoteDesktop.Client.MSRDC.SessionHost MSRDC /build find_nuget_package(Microsoft.Taef TAEF /) find_nuget_package(Microsoft.Windows.ImplementationLibrary WIL /) find_nuget_package(Microsoft.WSL.DeviceHost WSL_DEVICE_HOST /build/native) +set(WSL_DEVICE_HOST_NUGET_DIR "${WSL_DEVICE_HOST_SOURCE_DIR}") find_nuget_package(Microsoft.WSL.Kernel KERNEL /build/native) find_nuget_package(Microsoft.WSL.bsdtar BSDTARD /build/native/bin) find_nuget_package(Microsoft.WSL.LinuxSdk LINUXSDK /) @@ -213,6 +214,43 @@ if (EXISTS "${CMAKE_CURRENT_LIST_DIR}/UserConfig.cmake") find_package(USER REQUIRED PATHS ${CMAKE_CURRENT_LIST_DIR}) endif() +# --- Local OpenVMM binary override --- +# Set OPENVMM_BUILD_DIR to use a locally built openvmm.exe instead of the version +# from the Microsoft.WSL.DeviceHost NuGet package. This is useful when iterating +# on changes that span both the OpenVMM and WSL/WSLC projects. +# +# Usage: +# cmake . -DINCLUDE_OPENVMM=ON -DOPENVMM_BUILD_DIR="d:/src/openvmm/target/release" +# +# Or set in UserConfig.cmake: +# set(OPENVMM_BUILD_DIR "d:/src/openvmm/target/release") + +set(OPENVMM_BUILD_DIR "" CACHE PATH "Path to directory containing a locally built openvmm.exe (overrides NuGet package)") + +if (OPENVMM_BUILD_DIR) + if (NOT INCLUDE_OPENVMM) + message(WARNING "OPENVMM_BUILD_DIR is set but INCLUDE_OPENVMM is OFF. " + "Pass -DINCLUDE_OPENVMM=ON to use the local openvmm build.") + elseif (EXISTS "${OPENVMM_BUILD_DIR}/openvmm.exe") + message(STATUS "Using local openvmm build: ${OPENVMM_BUILD_DIR}/openvmm.exe") + file(MAKE_DIRECTORY "${WSL_DEVICE_HOST_SOURCE_DIR}/bin/${TARGET_PLATFORM}") + file(CREATE_LINK "${OPENVMM_BUILD_DIR}/openvmm.exe" + "${WSL_DEVICE_HOST_SOURCE_DIR}/bin/${TARGET_PLATFORM}/openvmm.exe" COPY_ON_ERROR) + + # When WSL_DEVICE_HOST_SOURCE_DIR was overridden (e.g. to local_overrides/), + # the proto file from the NuGet package is no longer reachable. Copy it over. + if (NOT EXISTS "${WSL_DEVICE_HOST_SOURCE_DIR}/proto/VMService.proto" + AND EXISTS "${WSL_DEVICE_HOST_NUGET_DIR}/proto/VMService.proto") + file(MAKE_DIRECTORY "${WSL_DEVICE_HOST_SOURCE_DIR}/proto") + file(CREATE_LINK "${WSL_DEVICE_HOST_NUGET_DIR}/proto/VMService.proto" + "${WSL_DEVICE_HOST_SOURCE_DIR}/proto/VMService.proto" COPY_ON_ERROR) + endif() + else() + message(WARNING "OPENVMM_BUILD_DIR is set but openvmm.exe was not found at '${OPENVMM_BUILD_DIR}'. " + "Build it first: cargo build -p openvmm") + endif() +endif() + # Optional target configuration if (NOT DEFINED WSL_BUILD_WSL_SETTINGS) diff --git a/UserConfig.cmake.sample b/UserConfig.cmake.sample index f47ed59e65..6aeb93dc05 100644 --- a/UserConfig.cmake.sample +++ b/UserConfig.cmake.sample @@ -42,6 +42,13 @@ endif() # # Uncomment to generate a "thin" MSI package which builds and installs faster # set(WSL_BUILD_THIN_PACKAGE true) +# # Uncomment to use a locally built openvmm.exe instead of the NuGet package version. +# # Requires INCLUDE_OPENVMM=ON (pass -DINCLUDE_OPENVMM=ON when generating). +# # Point to the cargo output directory containing openvmm.exe: +# # Debug: set(OPENVMM_BUILD_DIR "d:/src/openvmm/target/debug") +# # Release: set(OPENVMM_BUILD_DIR "d:/src/openvmm/target/release") +set(OPENVMM_/BUILD_DIR "d:/src/openvmm/target/release") + # # Uncomment to install the package as part of the build # set(WSL_POST_BUILD_COMMAND "powershell;-ExecutionPolicy;Bypass;-NoProfile;-NonInteractive;./tools/deploy/deploy-to-host.ps1") diff --git a/local_overrides/devicehost/proto/VMService.proto b/local_overrides/devicehost/proto/VMService.proto new file mode 100644 index 0000000000..efbefb085a --- /dev/null +++ b/local_overrides/devicehost/proto/VMService.proto @@ -0,0 +1,312 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +syntax = 'proto3'; + +package vmservice; +option go_package = "vmservice"; + +import "google/protobuf/empty.proto"; +import "google/protobuf/struct.proto"; + +service VM { + // CreateVM will create the virtual machine with the configuration in the + // CreateVMRequest. The virtual machine will be in a paused state power wise + // after CreateVM. ResumeVM can be called to transition the VM into a running state. + rpc CreateVM(CreateVMRequest) returns (google.protobuf.Empty); + + // TeardownVM will release all associated resources from the VM and unblock the WaitVM call. + rpc TeardownVM(google.protobuf.Empty) returns (google.protobuf.Empty); + + // PauseVM will, if the virtual machine power state is in a running state, transition + // the state to paused. This is the same state power wise that the VM should be in after + // an initial CreateVM call. + rpc PauseVM(google.protobuf.Empty) returns (google.protobuf.Empty); + + // ResumeVM is used to transition a vm to a running state. This can be used to resume a VM that + // has had PauseVM called on it, or to start a VM that was created with CreateVM. + rpc ResumeVM(google.protobuf.Empty) returns (google.protobuf.Empty); + + // WaitVM will block until the VM is either in a halted state or has had all of it's resources freed + // via TeardownVM. + rpc WaitVM(google.protobuf.Empty) returns (google.protobuf.Empty); + + // CapabilitiesVM will return what capabilities the virtstack supports. This includes + // what guest operating systems are supported, what resources are supported, and if hot + // add/hot remove of a resource is supported. + rpc CapabilitiesVM(google.protobuf.Empty) returns (CapabilitiesVMResponse); + + // PropertiesVM will take in a list of properties that the virtstack will return + // statistics for (memory, processors). + rpc PropertiesVM(PropertiesVMRequest) returns (PropertiesVMResponse); + + // ModifyResource is a generic call to modify (add/remove/update) resources for a VM. + // This includes things such as block devices, network adapters, and pci devices. + rpc ModifyResource(ModifyResourceRequest) returns (google.protobuf.Empty); + + // Quit will shutdown the process hosting the ttrpc server. + rpc Quit(google.protobuf.Empty) returns (google.protobuf.Empty); +} + +// +// VM lifecycle request/response +// +message DirectBoot { + string kernel_path = 1; + string initrd_path = 2; + string kernel_cmdline = 3; +} + +message UEFI { + string firmware_path = 1; + string device_path = 2; + // Optional data to include for uefi boot. For Linux this could be used as the kernel + // commandline. + string optional_data = 3; +} + +message MemoryConfig { + uint64 memory_mb = 1; + bool allow_overcommit = 2; + bool deferred_commit = 3; + bool hot_hint = 4; + bool cold_hint = 5; + bool cold_discard_hint = 6; + uint64 low_mmio_gap_in_mb = 7; + uint64 high_mmio_base_in_mb = 8; + uint64 high_mmio_gap_in_mb = 9; +} + +message ProcessorConfig { + uint32 processor_count = 1; + uint32 processor_weight = 2; + uint32 processor_limit = 3; +} + +message DevicesConfig { + repeated SCSIDisk scsi_disks = 1; + repeated VPMEMDisk vpmem_disks = 2; + repeated NICConfig nic_config = 3; + // When we know what information we need to assign a pci device on Linux, + // have a oneof here named PCIDevice with WindowsPCIDevice and LinuxPCIDevice + // housed. + repeated WindowsPCIDevice windows_device = 4; + repeated VirtioFSConfig virtiofs_config = 5; + VirtioConsoleConfig virtio_console = 6; +} + +message VirtioConsoleConfig { + // Path to a named pipe or Unix domain socket for the console backend. + string socket_path = 1; + // When true, connect to an existing pipe/socket at socket_path as a client + // instead of creating a new server listener. Used when the host has already + // created the socket/pipe. + bool connect = 2; +} + +message VMConfig { + MemoryConfig memory_config = 1; + ProcessorConfig processor_config = 2; + DevicesConfig devices_config = 3; + SerialConfig serial_config = 4; + oneof BootConfig { + DirectBoot direct_boot = 5; + UEFI uefi = 6; + } + WindowsOptions windows_options = 7; + // Optional k:v extra data. Up to the virtstack for how to interpret this. + map extra_data = 8; + HVSocketConfig hvsocket_config = 9; +} + +// WindowsOptions contains virtual machine configurations that are only present on a Windows host. +message WindowsOptions { + uint64 cpu_group_id = 1; +} + +message SerialConfig { + message Config { + uint32 port = 1; + // Uds to relay serial console output to. + string socket_path = 2; + // When true, connect to an existing pipe/socket as a client instead of + // creating a new server listener. + bool connect = 3; + } + repeated Config ports = 3; +} + +message HVSocketConfig { + string path = 1; +} + +message CreateVMRequest { + VMConfig config = 1; + // Optional ID to be used by the VM service in log messages. It's up to the + // server/virtstack to make use of this field. Useful for debugging to be able to + // correlate events in the virtstack for a given vm that the client launched. + string log_id = 2; +} + +message MemoryStats { + uint64 working_set_bytes = 1; + uint64 available_memory = 2; + uint64 reserved_memory = 3; + uint64 assigned_memory = 4; +} + +message ProcessorStats { + uint64 total_runtime_ns = 1; +} + +message PropertiesVMRequest { + enum PropertiesType { + Memory = 0; + Processor = 1; + } + repeated PropertiesType types = 1; +} + +message PropertiesVMResponse { + MemoryStats memory_stats = 1; + ProcessorStats processor_stats = 2; +} + +message CapabilitiesVMResponse { + enum Resource { + Vpmem = 0; + Scsi = 1; + Vpci = 2; + Plan9 = 3; + VMNic = 4; + Memory = 5; + Processor = 6; + } + + message SupportedResource { + bool Add = 1; + bool Remove = 2; + bool Update = 3; + Resource resource = 4; + } + + enum SupportedGuestOS { + Windows = 0; + Linux = 1; + } + repeated SupportedResource supported_resources = 1; + repeated SupportedGuestOS supported_guest_os = 2; +} + +// +// Modify existing VM request/response +// +enum ModifyType { + ADD = 0; + REMOVE = 1; + UPDATE = 2; +} + +enum DiskType { + SCSI_DISK_TYPE_VHD1 = 0; + SCSI_DISK_TYPE_VHDX = 1; + SCSI_DISK_TYPE_PHYSICAL = 2; +} + +message SCSIDisk { + uint32 controller = 1; + uint32 lun = 2; + string host_path = 3; + DiskType type = 4; + bool read_only = 5; +} + +message VPMEMDisk { + string host_path = 1; + DiskType type = 2; + bool read_only = 3; +} + +message NICConfig { + string nic_id = 1; // GUID + string mac_address = 3; // 12-34-56-78-9A-BC + string legacy_switch_id = 4; // GUID, used only with legacy_port_id below + // Optional friendly name for the adapter. Might be useful to show up in logs. + string nic_name = 5; + oneof backend { + string legacy_port_id = 2; // legacy, GUID, Windows only + DioBackend dio = 6; + TapBackend tap = 7; + ConsommeBackend consomme = 8; + } +} + +message DioBackend { + string switch_id = 1; // GUID + string port_id = 2; // GUID +} + +message TapBackend { + string name = 1; +} + +enum IpProtocol { + TCP = 0; + UDP = 1; +} + +message PortConfig { + // The host port to listen on. + uint32 host_port = 1; + // The guest port to forward to. + uint32 guest_port = 2; + // The protocol to forward. + IpProtocol protocol = 3; +} + +message ConsommeBackend { + // Optional CIDR for the guest network (e.g. "10.0.0.0/24"). + // If empty, a default is used. + string cidr = 1; + // Ports to forward from the host into the guest. + // Used during initial creation and for dynamic add/remove via ModifyResource. + repeated PortConfig ports = 2; +} + +message WindowsPCIDevice { + // e.g. PCIP\\VEN_10DE&DEV_13F2&SUBSYS_115E10DE&REV_A1\\6&17F903&0&00400000 + string instance_id = 1; +} + +message VirtioFSConfig { + string tag = 1; + string root_path = 2; +} + +message ModifyMemoryRequest { + uint64 memory_mb = 1; +} + +message ModifyProcessorRequest { + // Index of the processor to add/remove + uint32 processor_index = 1; +} + +message ModifyProcessorConfigRequest { + uint32 processor_weight = 1; + uint32 processor_limit = 2; +} + +message ModifyResourceRequest { + ModifyType type = 1; + oneof resource { + ModifyProcessorRequest processor = 2; + ModifyProcessorConfigRequest processor_config = 3; + ModifyMemoryRequest memory = 4; + SCSIDisk scsi_disk = 5; + VPMEMDisk vpmem_disk = 6; + NICConfig nic_config = 7; + WindowsPCIDevice windows_device = 8; + VirtioFSConfig virtiofs = 9; + } +} diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 7dd5d93289..7cb2b8dbba 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -586,16 +586,26 @@ try std::lock_guard lock(m_lock); - // TODO: Requires vmservice.proto extension for Plan9/VirtioFS in ModifyResourceRequest. + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for share add"); + + GUID shareIdLocal; + THROW_IF_FAILED(CoCreateGuid(&shareIdLocal)); + auto shareTag = wsl::shared::string::GuidToString(shareIdLocal, wsl::shared::string::None); + auto hostPath = wsl::shared::string::WideToMultiByte(WindowsPath); WSL_LOG( "OpenVmmAddShare", TraceLoggingValue(m_vmIdString.c_str(), "VmId"), TraceLoggingValue(WindowsPath, "WindowsPath"), TraceLoggingValue(ReadOnly, "ReadOnly"), - TraceLoggingValue("NOT_IMPLEMENTED", "Status")); + TraceLoggingValue(shareTag.c_str(), "Tag")); + + THROW_IF_FAILED(m_ttrpcClient->AddShare(shareTag, hostPath)); - return E_NOTIMPL; + m_shares.emplace(shareIdLocal, WindowsPath); + *ShareId = shareIdLocal; + return S_OK; } CATCH_RETURN() @@ -604,14 +614,23 @@ try { std::lock_guard lock(m_lock); - // TODO: Requires vmservice.proto extension. See AddShare. + auto it = m_shares.find(ShareId); + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_FOUND), it == m_shares.end()); + + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for share remove"); + + auto shareTag = wsl::shared::string::GuidToString(it->first, wsl::shared::string::None); WSL_LOG( "OpenVmmRemoveShare", TraceLoggingValue(m_vmIdString.c_str(), "VmId"), - TraceLoggingValue("NOT_IMPLEMENTED", "Status")); + TraceLoggingValue(shareTag.c_str(), "Tag")); - return E_NOTIMPL; + THROW_IF_FAILED(m_ttrpcClient->RemoveShare(shareTag)); + + m_shares.erase(it); + return S_OK; } CATCH_RETURN() diff --git a/src/windows/service/exe/TtrpcClient.cpp b/src/windows/service/exe/TtrpcClient.cpp index 044366e9cb..f6f0ca2cb9 100644 --- a/src/windows/service/exe/TtrpcClient.cpp +++ b/src/windows/service/exe/TtrpcClient.cpp @@ -239,6 +239,44 @@ try } CATCH_RETURN() +HRESULT TtrpcClient::AddShare(const std::string& tag, const std::string& rootPath) +try +{ + WSL_LOG( + "TtrpcAddShare", + TraceLoggingValue(tag.c_str(), "Tag"), + TraceLoggingValue(rootPath.c_str(), "RootPath")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::ADD); + + auto* virtiofs = request.mutable_virtiofs(); + virtiofs->set_tag(tag); + virtiofs->set_root_path(rootPath); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::RemoveShare(const std::string& tag) +try +{ + WSL_LOG( + "TtrpcRemoveShare", + TraceLoggingValue(tag.c_str(), "Tag")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::REMOVE); + + auto* virtiofs = request.mutable_virtiofs(); + virtiofs->set_tag(tag); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + HRESULT TtrpcClient::CreateVm(const VmConfig& config) try { diff --git a/src/windows/service/exe/TtrpcClient.h b/src/windows/service/exe/TtrpcClient.h index 4629f921c8..790bc34019 100644 --- a/src/windows/service/exe/TtrpcClient.h +++ b/src/windows/service/exe/TtrpcClient.h @@ -70,6 +70,12 @@ class TtrpcClient // SCSI disk hot-remove: ModifyResource(REMOVE, SCSIDisk { controller, lun }). HRESULT DetachScsiDisk(uint32_t controller, uint32_t lun); + // VirtioFS share hot-add: ModifyResource(ADD, VirtioFSConfig { tag, root_path }). + HRESULT AddShare(const std::string& tag, const std::string& rootPath); + + // VirtioFS share hot-remove: ModifyResource(REMOVE, VirtioFSConfig { tag }). + HRESULT RemoveShare(const std::string& tag); + // VM configuration for CreateVm. struct VmConfig { From 73eab49c377f44a0b662b441f8dba67624ab88cd Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Thu, 28 May 2026 15:27:36 -0700 Subject: [PATCH 06/10] test fixes --- .../devicehost/proto/VMService.proto | 4 + src/windows/service/exe/HcsVirtualMachine.cpp | 18 ++- src/windows/service/exe/HcsVirtualMachine.h | 2 + .../service/exe/OpenVmmVirtualMachine.cpp | 134 ++++++++++++++---- .../service/exe/OpenVmmVirtualMachine.h | 12 ++ src/windows/service/exe/TtrpcClient.cpp | 90 +++++++++++- src/windows/service/exe/TtrpcClient.h | 17 ++- .../service/exe/WSLCSessionManager.cpp | 2 +- src/windows/service/inc/wslc.idl | 8 ++ src/windows/wslcsession/WSLCSession.cpp | 21 ++- .../wslcsession/WSLCVirtualMachine.cpp | 61 +++++++- src/windows/wslcsession/WSLCVirtualMachine.h | 1 + test/windows/Common.cpp | 7 +- 13 files changed, 334 insertions(+), 43 deletions(-) diff --git a/local_overrides/devicehost/proto/VMService.proto b/local_overrides/devicehost/proto/VMService.proto index efbefb085a..3b5560571a 100644 --- a/local_overrides/devicehost/proto/VMService.proto +++ b/local_overrides/devicehost/proto/VMService.proto @@ -262,6 +262,9 @@ message PortConfig { uint32 guest_port = 2; // The protocol to forward. IpProtocol protocol = 3; + // The host address to bind to (e.g. "127.0.0.1" or "::1"). + // If empty, defaults to 0.0.0.0. + string host_address = 4; } message ConsommeBackend { @@ -281,6 +284,7 @@ message WindowsPCIDevice { message VirtioFSConfig { string tag = 1; string root_path = 2; + bool read_only = 3; } message ModifyMemoryRequest { diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp index 36ba3a4452..1fb070e3d8 100644 --- a/src/windows/service/exe/HcsVirtualMachine.cpp +++ b/src/windows/service/exe/HcsVirtualMachine.cpp @@ -814,4 +814,20 @@ try *Socket = reinterpret_cast(socket->release()); return S_OK; } -CATCH_RETURN() \ No newline at end of file +CATCH_RETURN() + +HRESULT HcsVirtualMachine::MapPort(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) +{ + UNREFERENCED_PARAMETER(Family); + UNREFERENCED_PARAMETER(HostPort); + UNREFERENCED_PARAMETER(GuestPort); + return E_NOTIMPL; +} + +HRESULT HcsVirtualMachine::UnmapPort(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) +{ + UNREFERENCED_PARAMETER(Family); + UNREFERENCED_PARAMETER(HostPort); + UNREFERENCED_PARAMETER(GuestPort); + return E_NOTIMPL; +} \ No newline at end of file diff --git a/src/windows/service/exe/HcsVirtualMachine.h b/src/windows/service/exe/HcsVirtualMachine.h index f6d085044d..222b8a1042 100644 --- a/src/windows/service/exe/HcsVirtualMachine.h +++ b/src/windows/service/exe/HcsVirtualMachine.h @@ -47,6 +47,8 @@ class HcsVirtualMachine IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; IFACEMETHOD(ConnectToVsockPort)(_In_ ULONG Port, _Out_ HANDLE* Socket) override; IFACEMETHOD(AcceptCrashDumpConnection)(_Out_ HANDLE* Socket) override; + IFACEMETHOD(MapPort)(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) override; + IFACEMETHOD(UnmapPort)(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) override; private: struct DiskInfo diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 7cb2b8dbba..3fc22c1fc3 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -46,13 +46,18 @@ OpenVmmVirtualMachine::OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Set // Disable features not yet supported by the OpenVMM backend. WI_ClearFlag(m_featureFlags, WslcFeatureFlagsGPU); - WI_ClearFlag(m_featureFlags, WslcFeatureFlagsVirtioFs); m_networkingMode = Settings->NetworkingMode; m_bootTimeoutMs = Settings->BootTimeoutMs; m_cpuCount = Settings->CpuCount; m_memoryMb = Settings->MemoryMb; + // Configure termination callback + if (Settings->TerminationCallback) + { + m_terminationCallback = Settings->TerminationCallback; + } + // Resolve paths for kernel, initrd, and root VHD. auto basePath = wslutil::GetBasePath(); @@ -134,12 +139,14 @@ OpenVmmVirtualMachine::OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Set dmesgOutputHandle.reset(wslutil::DuplicateHandle(wslutil::FromCOMInputHandle(Settings->DmesgOutput), GENERIC_WRITE | SYNCHRONIZE)); } - // REVIEW: Can we always enable earlycon? m_dmesgCollector = DmesgCollector::Create( - m_vmId, m_vmExitEvent, true, false, L"", true /* earlycon */, std::move(dmesgOutputHandle)); + m_vmId, m_vmExitEvent, true, false, L"", FeatureEnabled(WslcFeatureFlagsEarlyBootDmesg), std::move(dmesgOutputHandle)); - // Earlycon captures kernel output via COM1 before the hvc0 driver loads. - m_kernelCmdLine += L" earlycon=uart8250,io,0x3f8,115200"; + if (FeatureEnabled(WslcFeatureFlagsEarlyBootDmesg)) + { + // Earlycon captures kernel output via COM1 before the hvc0 driver loads. + m_kernelCmdLine += L" earlycon=uart8250,io,0x3f8,115200"; + } m_kernelCmdLine += L" console=hvc0 debug"; @@ -310,10 +317,13 @@ TtrpcClient::VmConfig OpenVmmVirtualMachine::BuildVmConfig() const } // COM1 (port 0) — earlycon output before hvc0 loads. - config.SerialPorts.push_back({ - .Port = 0, - .SocketPath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->EarlyConsoleName()), - }); + if (FeatureEnabled(WslcFeatureFlagsEarlyBootDmesg)) + { + config.SerialPorts.push_back({ + .Port = 0, + .SocketPath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->EarlyConsoleName()), + }); + } // Virtio console (/dev/hvc0) — primary console after boot. config.VirtioConsolePath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->VirtioConsoleName()); @@ -415,6 +425,13 @@ void OpenVmmVirtualMachine::WatchProcessExit() TraceLoggingValue(m_vmIdString.c_str(), "VmId")); m_vmExitEvent.SetEvent(); + + if (m_terminationCallback) + { + auto reason = (exitCode == 0) ? WSLCVirtualMachineTerminationReasonShutdown : WSLCVirtualMachineTerminationReasonCrashed; + auto details = std::format(L"openvmm process exited with code {}", exitCode); + LOG_IF_FAILED(m_terminationCallback->OnTermination(reason, details.c_str())); + } } OpenVmmVirtualMachine::~OpenVmmVirtualMachine() @@ -424,15 +441,12 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() // Signal termination to any pending operations. m_vmExitEvent.SetEvent(); - // TeardownVM releases all VM resources and unblocks WaitVM. if (m_ttrpcClient) { - LOG_IF_FAILED(m_ttrpcClient->TeardownVm()); - m_ttrpcClient->Disconnect(); - m_ttrpcClient.reset(); + LOG_IF_FAILED(m_ttrpcClient->QuitVm()); } - // Wait up to 5 seconds for graceful exit, then force-terminate. + // Wait for graceful exit, then force-terminate. if (m_processHandle) { if (WaitForSingleObject(m_processHandle.get(), c_processTerminationTimeoutMs) == WAIT_TIMEOUT) @@ -442,6 +456,9 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() } } + // Clean up the ttrpc client now that the process has exited. + m_ttrpcClient.reset(); + if (m_processWatchThread.joinable()) { m_processWatchThread.join(); @@ -461,18 +478,11 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() } DeleteFileW(m_crashDumpListenPath.c_str()); - try - { - if (std::filesystem::exists(m_vsockPath)) - { - std::filesystem::remove(m_vsockPath); - } - if (std::filesystem::exists(m_ttrpcSocketPath)) - { - std::filesystem::remove(m_ttrpcSocketPath); - } - } - CATCH_LOG() + // Best-effort cleanup of socket files. Use DeleteFileW instead of + // std::filesystem to avoid exceptions — the files may still be held + // briefly by the OS after force-terminating the openvmm process. + DeleteFileW(m_vsockPath.c_str()); + DeleteFileW(m_ttrpcSocketPath.c_str()); } bool OpenVmmVirtualMachine::FeatureEnabled(WSLCFeatureFlags Value) const @@ -535,6 +545,8 @@ HRESULT OpenVmmVirtualMachine::AttachDisk(_In_ LPCWSTR Path, _In_ BOOL ReadOnly, try { RETURN_HR_IF(E_POINTER, Path == nullptr || Lun == nullptr); + THROW_HR_IF_MSG( + HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND), !std::filesystem::exists(Path), "Disk path does not exist: '%ls'", Path); std::lock_guard lock(m_lock); @@ -583,6 +595,8 @@ HRESULT OpenVmmVirtualMachine::AddShare(_In_ LPCWSTR WindowsPath, _In_ BOOL Read try { RETURN_HR_IF(E_POINTER, WindowsPath == nullptr || ShareId == nullptr); + THROW_HR_IF_MSG( + HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND), !std::filesystem::is_directory(WindowsPath), "Path is not a directory: '%ls'", WindowsPath); std::lock_guard lock(m_lock); @@ -601,7 +615,7 @@ try TraceLoggingValue(ReadOnly, "ReadOnly"), TraceLoggingValue(shareTag.c_str(), "Tag")); - THROW_IF_FAILED(m_ttrpcClient->AddShare(shareTag, hostPath)); + THROW_IF_FAILED(m_ttrpcClient->AddShare(shareTag, hostPath, ReadOnly)); m_shares.emplace(shareIdLocal, WindowsPath); *ShareId = shareIdLocal; @@ -726,6 +740,72 @@ try } CATCH_RETURN() +HRESULT OpenVmmVirtualMachine::MapPort(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) +try +{ + std::lock_guard lock(m_lock); + + auto key = std::make_tuple(Family, HostPort, GuestPort); + if (m_boundPorts.contains(key)) + { + return HRESULT_FROM_WIN32(ERROR_ALREADY_EXISTS); + } + + // Mirror the wslrelay localhost relay limit (see localhost.cpp): the relay's + // AcceptThread uses WaitForMultipleObjects, which supports at most + // MAXIMUM_WAIT_OBJECTS (64) handles, with one reserved for the exit event. + // Reject the mapping if adding it would exceed the limit. + constexpr size_t c_maxPorts = MAXIMUM_WAIT_OBJECTS - 1; + if (m_boundPorts.size() >= c_maxPorts) + { + return HRESULT_FROM_WIN32(ERROR_TOO_MANY_OPEN_FILES); + } + + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for port bind"); + + WSL_LOG( + "OpenVmmMapPort", + TraceLoggingValue(m_vmIdString.c_str(), "VmId"), + TraceLoggingValue(HostPort, "HostPort"), + TraceLoggingValue(GuestPort, "GuestPort"), + TraceLoggingValue(Family, "Family")); + + THROW_IF_FAILED(m_ttrpcClient->BindPort(HostPort, GuestPort, true, Family)); + + m_boundPorts.insert(key); + return S_OK; +} +CATCH_RETURN() + +HRESULT OpenVmmVirtualMachine::UnmapPort(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) +try +{ + std::lock_guard lock(m_lock); + + auto key = std::make_tuple(Family, HostPort, GuestPort); + if (!m_boundPorts.contains(key)) + { + return HRESULT_FROM_WIN32(ERROR_NOT_FOUND); + } + + THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), + "ttrpc client not connected for port unbind"); + + WSL_LOG( + "OpenVmmUnmapPort", + TraceLoggingValue(m_vmIdString.c_str(), "VmId"), + TraceLoggingValue(HostPort, "HostPort"), + TraceLoggingValue(GuestPort, "GuestPort"), + TraceLoggingValue(Family, "Family")); + + THROW_IF_FAILED(m_ttrpcClient->UnbindPort(HostPort, GuestPort, true, Family)); + + m_boundPorts.erase(key); + return S_OK; +} +CATCH_RETURN() + ULONG OpenVmmVirtualMachine::AllocateLun() { for (ULONG index = 0; index < gsl::narrow_cast(m_lunBitmap.size()); index += 1) diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.h b/src/windows/service/exe/OpenVmmVirtualMachine.h index 7796fad37b..182b1ff5e7 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.h +++ b/src/windows/service/exe/OpenVmmVirtualMachine.h @@ -24,6 +24,8 @@ Module Name: #include "Dmesg.h" #include #include +#include +#include #include #define MAX_VHD_COUNT 254 @@ -48,6 +50,8 @@ class OpenVmmVirtualMachine IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; IFACEMETHOD(ConnectToVsockPort)(_In_ ULONG Port, _Out_ HANDLE* Socket) override; IFACEMETHOD(AcceptCrashDumpConnection)(_Out_ HANDLE* Socket) override; + IFACEMETHOD(MapPort)(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) override; + IFACEMETHOD(UnmapPort)(_In_ int Family, _In_ unsigned short HostPort, _In_ unsigned short GuestPort) override; private: struct DiskInfo @@ -143,6 +147,11 @@ class OpenVmmVirtualMachine // Shares: key is ShareId, value is Windows path. std::map m_shares; + // Bound ports: tracks (Family, HostPort, GuestPort) tuples. + // Same-family duplicates return ERROR_ALREADY_EXISTS (matching wslrelay behavior). + // Cross-family calls return S_OK since the dual-stack socket covers both. + std::set> m_boundPorts; + // Networking engine (ConsommeNetworking for the OpenVMM backend). std::unique_ptr m_networkEngine; @@ -150,6 +159,9 @@ class OpenVmmVirtualMachine std::filesystem::path m_ttrpcSocketPath; std::unique_ptr m_ttrpcClient; + // Termination callback to invoke when the VM exits. + wil::com_ptr m_terminationCallback; + // Dmesg collector for early boot and virtio serial console output. std::shared_ptr m_dmesgCollector; }; diff --git a/src/windows/service/exe/TtrpcClient.cpp b/src/windows/service/exe/TtrpcClient.cpp index f6f0ca2cb9..a7ff6ba599 100644 --- a/src/windows/service/exe/TtrpcClient.cpp +++ b/src/windows/service/exe/TtrpcClient.cpp @@ -239,13 +239,14 @@ try } CATCH_RETURN() -HRESULT TtrpcClient::AddShare(const std::string& tag, const std::string& rootPath) +HRESULT TtrpcClient::AddShare(const std::string& tag, const std::string& rootPath, bool readOnly) try { WSL_LOG( "TtrpcAddShare", TraceLoggingValue(tag.c_str(), "Tag"), - TraceLoggingValue(rootPath.c_str(), "RootPath")); + TraceLoggingValue(rootPath.c_str(), "RootPath"), + TraceLoggingValue(readOnly, "ReadOnly")); vmservice::ModifyResourceRequest request; request.set_type(vmservice::ADD); @@ -253,6 +254,7 @@ try auto* virtiofs = request.mutable_virtiofs(); virtiofs->set_tag(tag); virtiofs->set_root_path(rootPath); + virtiofs->set_read_only(readOnly); google::protobuf::Empty response; return Call(c_serviceName, c_modifyResourceMethod, request, &response); @@ -277,6 +279,62 @@ try } CATCH_RETURN() +HRESULT TtrpcClient::BindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family) +try +{ + auto hostAddress = (family == AF_INET6) ? "::1" : "127.0.0.1"; + + WSL_LOG( + "TtrpcBindPort", + TraceLoggingValue(hostPort, "HostPort"), + TraceLoggingValue(guestPort, "GuestPort"), + TraceLoggingValue(tcp, "Tcp"), + TraceLoggingValue(hostAddress, "HostAddress")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::UPDATE); + + auto* nic = request.mutable_nic_config(); + auto* consomme = nic->mutable_consomme(); + auto* port = consomme->add_ports(); + port->set_host_port(hostPort); + port->set_guest_port(guestPort); + port->set_protocol(tcp ? vmservice::TCP : vmservice::UDP); + port->set_host_address(hostAddress); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + +HRESULT TtrpcClient::UnbindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family) +try +{ + auto hostAddress = (family == AF_INET6) ? "::1" : "127.0.0.1"; + + WSL_LOG( + "TtrpcUnbindPort", + TraceLoggingValue(hostPort, "HostPort"), + TraceLoggingValue(guestPort, "GuestPort"), + TraceLoggingValue(tcp, "Tcp"), + TraceLoggingValue(hostAddress, "HostAddress")); + + vmservice::ModifyResourceRequest request; + request.set_type(vmservice::REMOVE); + + auto* nic = request.mutable_nic_config(); + auto* consomme = nic->mutable_consomme(); + auto* port = consomme->add_ports(); + port->set_host_port(hostPort); + port->set_guest_port(guestPort); + port->set_protocol(tcp ? vmservice::TCP : vmservice::UDP); + port->set_host_address(hostAddress); + + google::protobuf::Empty response; + return Call(c_serviceName, c_modifyResourceMethod, request, &response); +} +CATCH_RETURN() + HRESULT TtrpcClient::CreateVm(const VmConfig& config) try { @@ -373,6 +431,34 @@ try } CATCH_RETURN() +HRESULT TtrpcClient::QuitVm() +try +{ + WSL_LOG("TtrpcQuitVm"); + + google::protobuf::Empty request; + std::vector requestPayload; + RETURN_IF_FAILED(SerializeMessage(request, requestPayload)); + + std::lock_guard lock(m_lock); + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_CONNECTED), m_socket == INVALID_SOCKET); + + auto ttrpcPayload = TtrpcEnvelopeCodec::EncodeRequestEnvelope(c_serviceName, c_quitVmMethod, requestPayload); + + detail::TtrpcMessageHeader header{}; + TtrpcEnvelopeCodec::WriteBigEndian32(header.Length, static_cast(ttrpcPayload.size())); + TtrpcEnvelopeCodec::WriteBigEndian32(header.StreamId, m_nextStreamId); + header.MessageType = TtrpcEnvelopeCodec::c_messageTypeRequest; + header.Flags = 0; + m_nextStreamId += 2; + + RETURN_IF_FAILED(SendAll(&header, sizeof(header))); + RETURN_IF_FAILED(SendAll(ttrpcPayload.data(), ttrpcPayload.size())); + + return S_OK; +} +CATCH_RETURN() + HRESULT TtrpcClient::SendRequest( const std::string& service, const std::string& method, diff --git a/src/windows/service/exe/TtrpcClient.h b/src/windows/service/exe/TtrpcClient.h index 790bc34019..62e0e7c623 100644 --- a/src/windows/service/exe/TtrpcClient.h +++ b/src/windows/service/exe/TtrpcClient.h @@ -70,12 +70,20 @@ class TtrpcClient // SCSI disk hot-remove: ModifyResource(REMOVE, SCSIDisk { controller, lun }). HRESULT DetachScsiDisk(uint32_t controller, uint32_t lun); - // VirtioFS share hot-add: ModifyResource(ADD, VirtioFSConfig { tag, root_path }). - HRESULT AddShare(const std::string& tag, const std::string& rootPath); + // VirtioFS share hot-add: ModifyResource(ADD, VirtioFSConfig { tag, root_path, read_only }). + HRESULT AddShare(const std::string& tag, const std::string& rootPath, bool readOnly); // VirtioFS share hot-remove: ModifyResource(REMOVE, VirtioFSConfig { tag }). HRESULT RemoveShare(const std::string& tag); + // Consomme port bind: ModifyResource(UPDATE, NicConfig { consomme { ports } }). + // Creates a host-side listener in OpenVMM's consomme NAT and forwards to the guest port. + // family is AF_INET or AF_INET6. + HRESULT BindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family); + + // Consomme port unbind: ModifyResource(REMOVE, NicConfig { consomme { ports } }). + HRESULT UnbindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family); + // VM configuration for CreateVm. struct VmConfig { @@ -128,6 +136,10 @@ class TtrpcClient // TeardownVM: release all VM resources and unblock the WaitVM call. HRESULT TeardownVm(); + // Quit: tear down the VM and exit the openvmm process. + // Fire-and-forget — sends the request without waiting for a response. + HRESULT QuitVm(); + private: // ttrpc service and method names (from vmservice.proto). static constexpr char c_serviceName[] = "vmservice.VM"; @@ -135,6 +147,7 @@ class TtrpcClient static constexpr char c_resumeVmMethod[] = "ResumeVM"; static constexpr char c_waitVmMethod[] = "WaitVM"; static constexpr char c_teardownVmMethod[] = "TeardownVM"; + static constexpr char c_quitVmMethod[] = "Quit"; static constexpr char c_modifyResourceMethod[] = "ModifyResource"; // Send a ttrpc request payload and wait for the response payload. diff --git a/src/windows/service/exe/WSLCSessionManager.cpp b/src/windows/service/exe/WSLCSessionManager.cpp index 4dbc711fce..0fd39e1a5b 100644 --- a/src/windows/service/exe/WSLCSessionManager.cpp +++ b/src/windows/service/exe/WSLCSessionManager.cpp @@ -281,7 +281,7 @@ void WSLCSessionManagerImpl::CreateSession(const WSLCSessionSettings* Settings, { openVmmSettings = *Settings; WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsGPU); - WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsVirtioFs); + WI_SetFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsVirtioFs); WI_ClearFlag(openVmmSettings.FeatureFlags, WslcFeatureFlagsDnsTunneling); // OpenVMM provides networking via its built-in consomme backend. diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index f4b13be970..bc48a1146c 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -495,6 +495,14 @@ interface IWSLCVirtualMachine : IUnknown // For HCS VMs, this uses an HV socket listener on the crash dump port. // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge. HRESULT AcceptCrashDumpConnection([out, system_handle(sh_socket)] HANDLE* Socket); + + // Maps a host port to a guest port via the VMM's built-in port forwarding. + // For OpenVMM consomme VMs, this creates a host-side listener in the VMM. + // For HCS VMs, returns E_NOTIMPL (port relay is used instead). + HRESULT MapPort([in] int Family, [in] unsigned short HostPort, [in] unsigned short GuestPort); + + // Unmaps a previously mapped host port. + HRESULT UnmapPort([in] int Family, [in] unsigned short HostPort, [in] unsigned short GuestPort); } typedef enum _WSLCSessionStorageFlags diff --git a/src/windows/wslcsession/WSLCSession.cpp b/src/windows/wslcsession/WSLCSession.cpp index 97de532e6b..a1684e9dc4 100644 --- a/src/windows/wslcsession/WSLCSession.cpp +++ b/src/windows/wslcsession/WSLCSession.cpp @@ -2549,7 +2549,6 @@ try std::lock_guard networksLock(m_networksLock); m_containers.clear(); - m_volumes.reset(); m_networks.clear(); // Stop the IO relay. @@ -2565,9 +2564,6 @@ try m_allocatedPorts.clear(); } - m_eventTracker.reset(); - m_dockerClient.reset(); - // Check if the VM has already exited (e.g., killed externally). // If so, skip operations that require a live VM to avoid unnecessary waits. // N.B. m_vmExitedEvent may be uninitialized if Terminate() is called from the @@ -2596,6 +2592,13 @@ try WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code")); } + // Detach VHD volumes while the VM is still alive and the init channel + // is usable (OnSessionTerminated removed the session-terminating event + // from the channel's exit events). This ensures the guest unmounts and + // flushes data to disk before the VM is destroyed. + // N.B. dockerd has exited by this point, so no container can be using the volumes. + m_volumes.reset(); + // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running. try { @@ -2603,8 +2606,18 @@ try } CATCH_LOG(); } + else + { + m_volumes.reset(); + } } + // If the VM was already dead or never created, volumes may not have been reset above. + m_volumes.reset(); + + m_eventTracker.reset(); + m_dockerClient.reset(); + m_dockerdProcess.reset(); m_containerdProcess.reset(); m_virtualMachine.reset(); diff --git a/src/windows/wslcsession/WSLCVirtualMachine.cpp b/src/windows/wslcsession/WSLCVirtualMachine.cpp index e3d96527fc..e4cf1afcc7 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.cpp +++ b/src/windows/wslcsession/WSLCVirtualMachine.cpp @@ -263,6 +263,8 @@ WSLCVirtualMachine::WSLCVirtualMachine(_In_ IWSLCVirtualMachine* Vm, _In_ const void WSLCVirtualMachine::Initialize() { THROW_IF_FAILED(m_vm->GetId(&m_vmId)); + + THROW_IF_FAILED(m_vm->GetTerminationEvent(&m_vmExitedEvent)); // Start crash dump collection thread. // The VM backend handles the listen socket creation (HV socket for HCS, @@ -308,21 +310,53 @@ WSLCVirtualMachine::~WSLCVirtualMachine() m_vmTerminatingEvent.SetEvent(); - m_initChannel.Close(); + // Tell the guest to halt by killing PID 1 (mini_init). The kernel + // command line includes panic=-1, so a PID 1 death causes an immediate + // kernel panic and halt. This allows the openvmm VM worker to observe + // the guest halt and exit cleanly, avoiding a force-terminate. + try + { + Signal(1, 9 /* SIGKILL */); + } + catch (...) + { + LOG_CAUGHT_EXCEPTION_MSG("Failed to signal PID 1 for guest halt"); + } - // Terminate the VM. - m_vm.reset(); + m_initChannel.Close(); - if (m_processExitThread.joinable()) + // Signal the VM exit event to unblock any pending AcceptCrashDumpConnection + // COM calls. The crash dump thread may hold an in-flight cross-process COM + // call to AcceptCrashDumpConnection, which blocks waiting for the VM exit + // event. COM prevents the server object from being destroyed while calls + // are pending, so releasing m_vm without unblocking this call first would + // deadlock. Signaling the event makes AcceptCrashDumpConnection return + // E_ABORT, allowing the crash dump thread to exit cleanly. + if (m_vmExitedEvent) { - m_processExitThread.join(); + m_vmExitedEvent.SetEvent(); } + // Join the crash dump thread first — it makes cross-process COM calls to + // the VM backend. Once joined, no in-flight COM calls remain and the + // VM COM reference can be released without deadlock. if (m_crashDumpThread.joinable()) { m_crashDumpThread.join(); } + // Release the VM COM reference. This triggers the server-side destructor + // which terminates the openvmm process, breaking all socket connections + // (including the process exit thread's vsock channel). + m_vm.reset(); + + // Join the process exit thread after the VM is destroyed — it reads from + // a vsock channel that gets disconnected when the VM dies. + if (m_processExitThread.joinable()) + { + m_processExitThread.join(); + } + // Clear the state of all remaining processes now that the VM has exited. for (auto& e : m_trackedProcesses) { @@ -965,6 +999,17 @@ void WSLCVirtualMachine::MapPort(VMPortMapping& Mapping) MapRelayPort(Mapping.BindAddress.si_family, Mapping.HostPort(), Mapping.VmPort->Port(), false); } + else if (m_networkingMode == WSLCNetworkingModeConsomme) + { + THROW_HR_IF_MSG( + HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED), + !Mapping.IsLocalhost() || Mapping.Protocol != IPPROTO_TCP, + "Unsupported port mapping for consomme mode: %hs, protocol: %i", + Mapping.BindingAddressString().c_str(), + Mapping.Protocol); + + THROW_IF_FAILED(m_vm->MapPort(Mapping.BindAddress.si_family, Mapping.HostPort(), Mapping.VmPort->Port())); + } else { THROW_HR_MSG(E_UNEXPECTED, "Unexpected networking mode: %i", m_networkingMode); @@ -990,6 +1035,10 @@ void WSLCVirtualMachine::UnmapPort(VMPortMapping& Mapping) // TODO: Switch to using the native virtionet relay. MapRelayPort(Mapping.BindAddress.si_family, Mapping.HostPort(), Mapping.VmPort->Port(), true); } + else if (m_networkingMode == WSLCNetworkingModeConsomme) + { + THROW_IF_FAILED(m_vm->UnmapPort(Mapping.BindAddress.si_family, Mapping.HostPort(), Mapping.VmPort->Port())); + } else { THROW_HR_MSG(E_UNEXPECTED, "Unexpected networking mode: %i", m_networkingMode); @@ -1007,6 +1056,8 @@ HRESULT WSLCVirtualMachine::MountWindowsFolderImpl(_In_ LPCWSTR WindowsPath, _In try { std::filesystem::path path(WindowsPath); + auto absolute = path.is_absolute(); + WSL_LOG("MountWindowsFolder", TraceLoggingValue(WindowsPath, "WindowsPath"), TraceLoggingValue(LinuxPath, "LinuxPath"), TraceLoggingValue(static_cast(Flags), "Flags"), TraceLoggingValue(absolute, "IsAbsolute")); THROW_HR_IF_MSG(E_INVALIDARG, !path.is_absolute(), "Path is not absolute: '%ls'", WindowsPath); THROW_HR_IF_MSG( HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND), !std::filesystem::is_directory(path), "Path is not a directory: '%ls'", WindowsPath); diff --git a/src/windows/wslcsession/WSLCVirtualMachine.h b/src/windows/wslcsession/WSLCVirtualMachine.h index 0b788d0378..b2c160172b 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.h +++ b/src/windows/wslcsession/WSLCVirtualMachine.h @@ -230,6 +230,7 @@ class WSLCVirtualMachine std::vector> m_trackedProcesses; wil::unique_event m_vmTerminatingEvent{wil::EventOptions::ManualReset}; + wil::unique_event m_vmExitedEvent; HANDLE m_sessionTerminatingEvent{}; wsl::shared::SocketChannel m_initChannel; diff --git a/test/windows/Common.cpp b/test/windows/Common.cpp index d18622dd61..fb8713ed99 100644 --- a/test/windows/Common.cpp +++ b/test/windows/Common.cpp @@ -2961,7 +2961,12 @@ void ExpectHttpResponse(LPCWSTR Url, std::optional expectedCode, bool retry if (retry) { wsl::shared::retry::RetryWithTimeout(sendRequest, std::chrono::milliseconds(500), std::chrono::seconds(30), [&]() { - return wil::ResultFromCaughtException() == HRESULT_FROM_WIN32(WININET_E_INVALID_SERVER_RESPONSE); + auto hr = wil::ResultFromCaughtException(); + // WININET_E_INVALID_SERVER_RESPONSE: returned by the HCS/wslrelay backend when the + // guest service is not yet listening. + // WININET_E_CONNECTION_RESET: returned by the OpenVMM/Consomme backend when the + // guest kernel RSTs the connection because the service hasn't started listening yet. + return hr == HRESULT_FROM_WIN32(WININET_E_INVALID_SERVER_RESPONSE) || hr == WININET_E_CONNECTION_RESET; }); } else From 4b40714c475eddbfe384ee7f3c1d54d9ae72414b Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Fri, 29 May 2026 10:48:53 -0700 Subject: [PATCH 07/10] Make testing easier with openvmm --- test/windows/Common.cpp | 63 ++++++++++++++++++++++++++++++++++++++ test/windows/Common.h | 46 ++++++++++++++++++++++++++++ test/windows/WSLCTests.cpp | 8 +++++ 3 files changed, 117 insertions(+) diff --git a/test/windows/Common.cpp b/test/windows/Common.cpp index fb8713ed99..ab5862a9b5 100644 --- a/test/windows/Common.cpp +++ b/test/windows/Common.cpp @@ -17,6 +17,7 @@ Module Name: #include "precomp.h" #include "Common.h" #include "LxssDynamicFunction.h" +#include "filesystem.hpp" #include #include #include @@ -69,6 +70,7 @@ static std::wstring g_pipelineBuildId; std::wstring g_testDistroPath; std::wstring g_testDataPath; bool g_fastTestRun = false; // True when test.bat was invoked with -f +bool g_useOpenVmm = false; // True when Backend=openvmm is passed to te.exe static wil::unique_mta_usage_cookie g_mtaCookie; std::pair CreateSubprocessPipe(bool inheritRead, bool inheritWrite, DWORD bufferSize, _In_opt_ SECURITY_ATTRIBUTES* sa) @@ -1390,6 +1392,54 @@ WslConfigChange::~WslConfigChange() } } +WslcSettingsChange::WslcSettingsChange(const std::string& YamlContent) +{ + m_settingsPath = wsl::windows::common::filesystem::GetLocalAppDataPath(nullptr) / L"wslc" / L"settings.yaml"; + + m_fileExisted = std::filesystem::exists(m_settingsPath); + if (m_fileExisted) + { + std::ifstream existing(m_settingsPath); + m_originalContent = std::string{std::istreambuf_iterator(existing), {}}; + } + + std::filesystem::create_directories(m_settingsPath.parent_path()); + std::ofstream out(m_settingsPath, std::ios::trunc); + THROW_HR_IF(E_FAIL, !out.good()); + out << YamlContent; +} + +WslcSettingsChange::WslcSettingsChange(WslcSettingsChange&& other) + : m_settingsPath(std::move(other.m_settingsPath)), + m_originalContent(std::move(other.m_originalContent)), + m_fileExisted(other.m_fileExisted) +{ + other.m_fileExisted = false; + other.m_originalContent.reset(); +} + +WslcSettingsChange::~WslcSettingsChange() +{ + if (m_settingsPath.empty()) + { + return; + } + + if (m_fileExisted && m_originalContent.has_value()) + { + std::ofstream out(m_settingsPath, std::ios::trunc); + if (out.good()) + { + out << m_originalContent.value(); + } + } + else if (!m_fileExisted) + { + std::error_code ec; + std::filesystem::remove(m_settingsPath, ec); + } +} + std::wstring ReadFileContent(const std::string& Path) { std::ifstream configRead(Path); @@ -2094,6 +2144,19 @@ Return Value: g_testDataPath = getTestParam(L"TestDataPath"); + // Read optional Backend parameter (hcs or openvmm). Default is hcs. + const auto backend = getOptionalTestParam(L"Backend"); + if (backend.has_value() && _wcsicmp(backend->c_str(), L"openvmm") == 0) + { + g_useOpenVmm = true; + LogInfo("Backend: OpenVMM"); + } + else + { + g_useOpenVmm = false; + LogInfo("Backend: HCS"); + } + const auto setupScript = getOptionalTestParam(L"SetupScript"); if (!setupScript.has_value()) { diff --git a/test/windows/Common.h b/test/windows/Common.h index ad2706ebf2..e8da8940c7 100644 --- a/test/windows/Common.h +++ b/test/windows/Common.h @@ -105,6 +105,30 @@ using namespace std::chrono_literals; return; \ } +// +// Backend-specific skip macros for HCS vs OpenVMM testing. +// g_useOpenVmm is set from the "Backend" TAEF runtime parameter. +// +extern bool g_useOpenVmm; + +#define SKIP_TEST_OPENVMM() \ + { \ + if (g_useOpenVmm) \ + { \ + LogSkipped("This test is skipped for the OpenVMM backend"); \ + return; \ + } \ + } + +#define SKIP_TEST_HCS() \ + { \ + if (!g_useOpenVmm) \ + { \ + LogSkipped("This test is skipped for the HCS backend"); \ + return; \ + } \ + } + #define WSL_SETTINGS_TEST() \ if constexpr (!WSL_BUILD_WSL_SETTINGS) \ { \ @@ -188,6 +212,28 @@ class WslConfigChange std::optional m_originalContent; }; +// +// RAII Wrapper for WSLC settings.yaml changes. +// Writes the given YAML content to %LOCALAPPDATA%\wslc\settings.yaml +// and restores the original content (or removes the file) on destruction. +// +class WslcSettingsChange +{ +public: + WslcSettingsChange(const std::string& YamlContent); + ~WslcSettingsChange(); + + WslcSettingsChange(const WslcSettingsChange&) = delete; + WslcSettingsChange(WslcSettingsChange&& other); + const WslcSettingsChange& operator=(WslcSettingsChange&&) = delete; + const WslcSettingsChange& operator=(WslcSettingsChange&) = delete; + +private: + std::filesystem::path m_settingsPath; + std::optional m_originalContent; + bool m_fileExisted = false; +}; + template class RegistryKeyChange { diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp index b0da4a00f7..5b84e05546 100644 --- a/test/windows/WSLCTests.cpp +++ b/test/windows/WSLCTests.cpp @@ -36,6 +36,7 @@ using WSLCE2ETests::StartLocalRegistry; extern std::wstring g_testDataPath; extern bool g_fastTestRun; +extern bool g_useOpenVmm; class WSLCTests { @@ -46,11 +47,15 @@ class WSLCTests WSLCSessionSettings m_defaultSessionSettings{}; wil::com_ptr m_defaultSession; static inline auto c_testSessionName = L"wslc-test"; + std::optional m_settingsChange; TEST_CLASS_SETUP(TestClassSetup) { THROW_IF_WIN32_ERROR(WSAStartup(MAKEWORD(2, 2), &m_wsadata)); + // Configure the VMM backend via settings.yaml before creating any sessions. + m_settingsChange.emplace(std::format("session:\n openVmm: {}\n", g_useOpenVmm ? "true" : "false")); + // The WSLC SDK tests use this same storage to reduce pull overhead. m_storagePath = std::filesystem::current_path() / "test-storage"; m_defaultSessionSettings = GetDefaultSessionSettings(c_testSessionName, true, WSLCNetworkingModeVirtioProxy); @@ -103,6 +108,9 @@ class WSLCTests { m_defaultSession.reset(); + // Restore original settings.yaml. + m_settingsChange.reset(); + // Keep the VHD when running in -f mode, to speed up subsequent test runs. if (!g_fastTestRun && !m_storagePath.empty()) { From c66c9ad2d1893c941e5983f9b8e697dbbab63234 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 10 Jun 2026 13:40:03 -0700 Subject: [PATCH 08/10] Use dll from hvlite to interface with openvmm --- CMakeLists.txt | 51 +- UserConfig.cmake.sample | 13 + msipackage/CMakeLists.txt | 2 + msipackage/package.wix.in | 11 + src/windows/service/exe/CMakeLists.txt | 15 +- .../service/exe/OpenVmmVirtualMachine.cpp | 115 ++-- .../service/exe/OpenVmmVirtualMachine.h | 10 +- src/windows/service/exe/TtrpcClient.cpp | 569 ------------------ src/windows/service/exe/TtrpcClient.h | 173 ------ .../service/exe/TtrpcEnvelopeCodec.cpp | 204 ------- src/windows/service/exe/TtrpcEnvelopeCodec.h | 59 -- src/windows/service/inc/windowsdefs.idl | 53 +- src/windows/wslc/CMakeLists.txt | 4 - 13 files changed, 144 insertions(+), 1135 deletions(-) delete mode 100644 src/windows/service/exe/TtrpcClient.cpp delete mode 100644 src/windows/service/exe/TtrpcClient.h delete mode 100644 src/windows/service/exe/TtrpcEnvelopeCodec.cpp delete mode 100644 src/windows/service/exe/TtrpcEnvelopeCodec.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6153863ff0..3fd2f3b25b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,49 +106,6 @@ FetchContent_Declare( FetchContent_MakeAvailable(boost_headers) -set(protobuf_BUILD_TESTS OFF CACHE BOOL "" FORCE) -set(protobuf_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) -set(protobuf_INSTALL OFF CACHE BOOL "" FORCE) - -if (INCLUDE_OPENVMM) - FetchContent_Declare(protobuf - URL https://github.com/protocolbuffers/protobuf/releases/download/v34.1/protobuf-34.1.tar.gz - URL_HASH SHA256=e4e6ff10760cf747a2decd1867741f561b216bd60cc4038c87564713a6da1848) - - FetchContent_MakeAvailable(protobuf) - include(${protobuf_SOURCE_DIR}/cmake/protobuf-generate.cmake) -endif() - -# Adds protobuf-generated C++ sources from VMService.proto to the given TARGET. -# The target must already exist. This function handles code generation, include -# directories, library linking, and MSVC warning suppression for generated code. -function(wsl_add_openvmm_proto TARGET) - set(_proto_file ${WSL_DEVICE_HOST_SOURCE_DIR}/proto/VMService.proto) - set(_proto_out_dir ${CMAKE_CURRENT_BINARY_DIR}/generated) - - if (NOT EXISTS ${_proto_file}) - message(FATAL_ERROR "Expected DeviceHost proto file was not found: ${_proto_file}") - endif() - - protobuf_generate( - TARGET ${TARGET} - PROTOS ${_proto_file} - IMPORT_DIRS ${WSL_DEVICE_HOST_SOURCE_DIR}/proto ${protobuf_SOURCE_DIR}/src - PROTOC_OUT_DIR ${_proto_out_dir} - ) - - target_include_directories(${TARGET} PRIVATE ${_proto_out_dir}) - target_link_libraries(${TARGET} protobuf::libprotobuf) - - if (MSVC) - set_source_files_properties( - ${_proto_out_dir}/VMService.pb.cc - TARGET_DIRECTORY ${TARGET} - PROPERTIES COMPILE_OPTIONS "/wd4267;/wd4244;/wd4018") - endif() -endfunction() - - # Import modules list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") find_package(IDL REQUIRED) @@ -214,6 +171,14 @@ if (EXISTS "${CMAKE_CURRENT_LIST_DIR}/UserConfig.cmake") find_package(USER REQUIRED PATHS ${CMAKE_CURRENT_LIST_DIR}) endif() +# Resolve OpenVMM packages. UserConfig.cmake can set WSL_DEVICE_HOST_SOURCE_DIR +# and/or WSL_VM_SERVICE_SOURCE_DIR to local paths to skip the NuGet lookup. +# When INCLUDE_OPENVMM is set and WSL_VM_SERVICE_SOURCE_DIR is not already +# overridden, resolve from the NuGet feed. +if (INCLUDE_OPENVMM AND NOT WSL_VM_SERVICE_SOURCE_DIR) + find_nuget_package(Microsoft.WSL.VmService WSL_VM_SERVICE /build/native) +endif() + # --- Local OpenVMM binary override --- # Set OPENVMM_BUILD_DIR to use a locally built openvmm.exe instead of the version # from the Microsoft.WSL.DeviceHost NuGet package. This is useful when iterating diff --git a/UserConfig.cmake.sample b/UserConfig.cmake.sample index 6aeb93dc05..9bac3d3f04 100644 --- a/UserConfig.cmake.sample +++ b/UserConfig.cmake.sample @@ -55,6 +55,19 @@ set(OPENVMM_/BUILD_DIR "d:/src/openvmm/target/release") # # Uncomment to reduce the verbosity of the appx package build # set(WSL_SILENT_APPX_BUILD true) +# # --- Local hvlite / OpenVMM development --- +# # Set HVLITE_BUILD_DIR to consume locally built DLLs from your hvlite checkout +# # (wsldevicehost.dll, wslvmservice.dll). Set OPENVMM_BUILD_DIR for openvmm.exe, +# # which builds from the separate oss/ submodule workspace. +# # +# # Build in hvlite first: +# # cargo build -p wsldevicehost -p wslvmservice +# # cd oss && cargo build -p openvmm +# # +# # Then uncomment and adjust the paths: +# set(HVLITE_BUILD_DIR "d:/src/hvlite/target/debug") +# set(OPENVMM_BUILD_DIR "d:/src/hvlite/oss/target/debug") + # # Uncomment to change the pre-commit hook behavior (default: warn) # # warn - report formatting issues without blocking the commit # # error - block the commit when formatting issues are found diff --git a/msipackage/CMakeLists.txt b/msipackage/CMakeLists.txt index 5583fad210..4f1c21c4cc 100644 --- a/msipackage/CMakeLists.txt +++ b/msipackage/CMakeLists.txt @@ -32,6 +32,8 @@ if (INCLUDE_OPENVMM) foreach(binary ${WSL_DEVICE_HOST_BINARIES}) list(APPEND BINARIES_DEPENDENCIES "${WSL_DEVICE_HOST_SOURCE_DIR}/bin/${TARGET_PLATFORM}/${binary}") endforeach() + + list(APPEND BINARIES_DEPENDENCIES "${WSL_VM_SERVICE_SOURCE_DIR}/bin/${TARGET_PLATFORM}/wslvmservice.dll") endif() set(LINUX_BINARIES init;initrd.img) diff --git a/msipackage/package.wix.in b/msipackage/package.wix.in index 28cba85b07..9f0e92bf73 100644 --- a/msipackage/package.wix.in +++ b/msipackage/package.wix.in @@ -241,8 +241,19 @@ + + + + + + + + + + + diff --git a/src/windows/service/exe/CMakeLists.txt b/src/windows/service/exe/CMakeLists.txt index 757dfe2daa..69eeda725a 100644 --- a/src/windows/service/exe/CMakeLists.txt +++ b/src/windows/service/exe/CMakeLists.txt @@ -59,26 +59,15 @@ set(HEADERS if (INCLUDE_OPENVMM) list(APPEND SOURCES - OpenVmmVirtualMachine.cpp - TtrpcEnvelopeCodec.cpp - TtrpcClient.cpp) + OpenVmmVirtualMachine.cpp) list(APPEND HEADERS - OpenVmmVirtualMachine.h - TtrpcEnvelopeCodec.h - TtrpcClient.h) + OpenVmmVirtualMachine.h) endif() add_executable(wslservice ${SOURCES} ${HEADERS}) add_dependencies(wslservice wslserviceidl wslservicemc) target_compile_definitions(wslservice PRIVATE WSL_INCLUDE_OPENVMM=$) -if (INCLUDE_OPENVMM) - wsl_add_openvmm_proto(wslservice) - - if (MSVC) - set_source_files_properties(TtrpcClient.cpp PROPERTIES COMPILE_OPTIONS "/wd4267;/wd4244;/wd4018") - endif() -endif() add_compile_definitions(__WRL_CLASSIC_COM__) add_compile_definitions(__WRL_DISABLE_STATIC_INITIALIZE__) add_compile_definitions(USE_COM_CONTEXT_DEF=1) diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 3fc22c1fc3..4074c290cb 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -31,7 +31,6 @@ Module Name: using namespace wsl::windows::common; using wsl::windows::service::wslc::OpenVmmVirtualMachine; -using wsl::windows::service::wslc::TtrpcClient; namespace wslutil = wsl::windows::common::wslutil; OpenVmmVirtualMachine::OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Settings) @@ -177,10 +176,10 @@ OpenVmmVirtualMachine::OpenVmmVirtualMachine(_In_ const WSLCSessionSettings* Set auto cleanupOnFailure = wil::scope_exit([this]() { m_vmExitEvent.SetEvent(); - if (m_ttrpcClient) + if (m_vmService) { - m_ttrpcClient->Disconnect(); - m_ttrpcClient.reset(); + m_vmService->Disconnect(); + m_vmService.reset(); } if (m_processHandle) @@ -272,35 +271,28 @@ std::wstring OpenVmmVirtualMachine::BuildCommandLine() const return cmd; } -TtrpcClient::VmConfig OpenVmmVirtualMachine::BuildVmConfig() const +void OpenVmmVirtualMachine::ConfigureVmService() const { - TtrpcClient::VmConfig config; - - config.KernelPath = wsl::shared::string::WideToMultiByte(m_kernelPath.wstring()); - config.InitrdPath = wsl::shared::string::WideToMultiByte(m_initrdPath.wstring()); + THROW_IF_FAILED(m_vmService->SetKernelPath(m_kernelPath.c_str())); + THROW_IF_FAILED(m_vmService->SetInitrdPath(m_initrdPath.c_str())); // Kernel command line — the server prepends "panic=-1 debug pci=off console=ttyS0 " // automatically via HyperVGen2LinuxDirect chipset type. - config.KernelCmdLine = wsl::shared::string::WideToMultiByte(m_kernelCmdLine); + THROW_IF_FAILED(m_vmService->SetKernelCmdLine(m_kernelCmdLine.c_str())); // Ensure 2MB granularity. Cap at 4GB because OpenVMM on WHP allocates guest RAM upfront. constexpr ULONG c_maxMemoryMb = 4096; - config.MemoryMb = std::min(m_memoryMb, c_maxMemoryMb) & ~0x1; + THROW_IF_FAILED(m_vmService->SetMemoryMb((std::min(m_memoryMb, c_maxMemoryMb) & ~0x1))); - config.ProcessorCount = m_cpuCount; + THROW_IF_FAILED(m_vmService->SetProcessorCount(m_cpuCount)); // HvSocket bridge via vsock path (for the guest init connection). - config.HvSocketPath = wsl::shared::string::WideToMultiByte(m_vsockPath.wstring()); + THROW_IF_FAILED(m_vmService->SetHvSocketPath(m_vsockPath.c_str())); // Boot disks: root VHD (LUN 0) and modules VHD (LUN 1), both read-only. for (const auto& [lun, disk] : m_attachedDisks) { - config.ScsiDisks.push_back({ - .Controller = 0, - .Lun = lun, - .HostPath = wsl::shared::string::WideToMultiByte(disk.Path), - .ReadOnly = disk.ReadOnly, - }); + THROW_IF_FAILED(m_vmService->AddBootDisk(0, lun, disk.Path.c_str(), disk.ReadOnly)); } if (m_networkingMode == WSLCNetworkingModeConsomme) @@ -310,25 +302,22 @@ TtrpcClient::VmConfig OpenVmmVirtualMachine::BuildVmConfig() const GUID nicGuid = m_vmId; nicGuid.Data1 ^= c_nicGuidXorMask; - config.Nic = TtrpcClient::VmConfig::ConsommeNic{ - .NicId = wsl::shared::string::GuidToString(nicGuid), - .MacAddress = c_defaultConsommeMacAddress, - }; + auto nicIdStr = wsl::shared::string::GuidToString(nicGuid, wsl::shared::string::GuidToStringFlags::None); + auto macStr = wsl::shared::string::MultiByteToWide(c_defaultConsommeMacAddress); + THROW_IF_FAILED(m_vmService->SetConsommeNic(nicIdStr.c_str(), macStr.c_str())); } - // COM1 (port 0) — earlycon output before hvc0 loads. - if (FeatureEnabled(WslcFeatureFlagsEarlyBootDmesg)) + // COM1 (port 0) — earlycon output before hvc0 loads. Only configure it when + // early-boot logging is enabled; otherwise EarlyConsoleName() is empty and the + // OpenVMM server would fail trying to connect to an empty serial socket path, + // aborting CreateVM. + if (const auto earlyConsoleName = m_dmesgCollector->EarlyConsoleName(); !earlyConsoleName.empty()) { - config.SerialPorts.push_back({ - .Port = 0, - .SocketPath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->EarlyConsoleName()), - }); + THROW_IF_FAILED(m_vmService->AddSerialPort(0, earlyConsoleName.c_str())); } // Virtio console (/dev/hvc0) — primary console after boot. - config.VirtioConsolePath = wsl::shared::string::WideToMultiByte(m_dmesgCollector->VirtioConsoleName()); - - return config; + THROW_IF_FAILED(m_vmService->SetVirtioConsolePath(m_dmesgCollector->VirtioConsoleName().c_str())); } void OpenVmmVirtualMachine::LaunchOpenVmm() @@ -396,18 +385,18 @@ void OpenVmmVirtualMachine::LaunchOpenVmm() // Monitor the openvmm process and signal m_vmExitEvent on exit. m_processWatchThread = std::thread(&OpenVmmVirtualMachine::WatchProcessExit, this); - m_ttrpcClient = std::make_unique(); + m_vmService = wil::CoCreateInstance(CLSID_WslVmService, CLSCTX_INPROC_SERVER); THROW_IF_FAILED_MSG( - m_ttrpcClient->Connect(m_ttrpcSocketPath.wstring(), TtrpcClient::c_defaultTimeoutMs), + m_vmService->Connect(m_ttrpcSocketPath.c_str(), 30000), "Failed to connect to OpenVMM ttrpc server"); - auto vmConfig = BuildVmConfig(); + ConfigureVmService(); THROW_IF_FAILED_MSG( - m_ttrpcClient->CreateVm(vmConfig), + m_vmService->CreateVm(), "Failed to create VM via ttrpc CreateVM"); THROW_IF_FAILED_MSG( - m_ttrpcClient->ResumeVm(), + m_vmService->ResumeVm(), "Failed to resume VM via ttrpc ResumeVM"); } @@ -441,9 +430,12 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() // Signal termination to any pending operations. m_vmExitEvent.SetEvent(); - if (m_ttrpcClient) + // TeardownVM releases all VM resources and unblocks WaitVM. + if (m_vmService) { - LOG_IF_FAILED(m_ttrpcClient->QuitVm()); + LOG_IF_FAILED(m_vmService->TeardownVm()); + m_vmService->Disconnect(); + m_vmService.reset(); } // Wait for graceful exit, then force-terminate. @@ -456,9 +448,6 @@ OpenVmmVirtualMachine::~OpenVmmVirtualMachine() } } - // Clean up the ttrpc client now that the process has exited. - m_ttrpcClient.reset(); - if (m_processWatchThread.joinable()) { m_processWatchThread.join(); @@ -550,8 +539,8 @@ try std::lock_guard lock(m_lock); - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for disk hot-add"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for disk hot-add"); DiskInfo disk{Path, ReadOnly != FALSE}; const ULONG allocatedLun = AllocateLun(); @@ -560,8 +549,7 @@ try FreeLun(allocatedLun); }); - auto hostPath = wsl::shared::string::WideToMultiByte(Path); - THROW_IF_FAILED(m_ttrpcClient->AttachScsiDisk(0, allocatedLun, hostPath, ReadOnly != FALSE)); + THROW_IF_FAILED(m_vmService->AttachScsiDisk(0, allocatedLun, Path, ReadOnly)); m_attachedDisks.emplace(allocatedLun, std::move(disk)); cleanup.release(); @@ -579,10 +567,10 @@ try auto it = m_attachedDisks.find(Lun); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_FOUND), it == m_attachedDisks.end()); - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for disk hot-remove"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for disk hot-remove"); - THROW_IF_FAILED(m_ttrpcClient->DetachScsiDisk(0, Lun)); + THROW_IF_FAILED(m_vmService->DetachScsiDisk(0, Lun)); FreeLun(Lun); m_attachedDisks.erase(it); @@ -600,13 +588,12 @@ try std::lock_guard lock(m_lock); - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for share add"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for share add"); GUID shareIdLocal; THROW_IF_FAILED(CoCreateGuid(&shareIdLocal)); - auto shareTag = wsl::shared::string::GuidToString(shareIdLocal, wsl::shared::string::None); - auto hostPath = wsl::shared::string::WideToMultiByte(WindowsPath); + auto shareTag = wsl::shared::string::GuidToString(shareIdLocal, wsl::shared::string::None); WSL_LOG( "OpenVmmAddShare", @@ -615,7 +602,7 @@ try TraceLoggingValue(ReadOnly, "ReadOnly"), TraceLoggingValue(shareTag.c_str(), "Tag")); - THROW_IF_FAILED(m_ttrpcClient->AddShare(shareTag, hostPath, ReadOnly)); + THROW_IF_FAILED(m_vmService->AddShare(shareTag.c_str(), WindowsPath, ReadOnly)); m_shares.emplace(shareIdLocal, WindowsPath); *ShareId = shareIdLocal; @@ -631,17 +618,17 @@ try auto it = m_shares.find(ShareId); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_FOUND), it == m_shares.end()); - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for share remove"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for share remove"); - auto shareTag = wsl::shared::string::GuidToString(it->first, wsl::shared::string::None); + auto shareTag = wsl::shared::string::GuidToString(it->first, wsl::shared::string::None); WSL_LOG( "OpenVmmRemoveShare", TraceLoggingValue(m_vmIdString.c_str(), "VmId"), TraceLoggingValue(shareTag.c_str(), "Tag")); - THROW_IF_FAILED(m_ttrpcClient->RemoveShare(shareTag)); + THROW_IF_FAILED(m_vmService->RemoveShare(shareTag.c_str())); m_shares.erase(it); return S_OK; @@ -761,8 +748,8 @@ try return HRESULT_FROM_WIN32(ERROR_TOO_MANY_OPEN_FILES); } - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for port bind"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for port bind"); WSL_LOG( "OpenVmmMapPort", @@ -771,7 +758,7 @@ try TraceLoggingValue(GuestPort, "GuestPort"), TraceLoggingValue(Family, "Family")); - THROW_IF_FAILED(m_ttrpcClient->BindPort(HostPort, GuestPort, true, Family)); + THROW_IF_FAILED(m_vmService->BindPort(HostPort, GuestPort, TRUE, Family)); m_boundPorts.insert(key); return S_OK; @@ -789,8 +776,8 @@ try return HRESULT_FROM_WIN32(ERROR_NOT_FOUND); } - THROW_HR_IF_MSG(E_FAIL, !m_ttrpcClient || !m_ttrpcClient->IsConnected(), - "ttrpc client not connected for port unbind"); + THROW_HR_IF_MSG(E_FAIL, !m_vmService, + "VM service not available for port unbind"); WSL_LOG( "OpenVmmUnmapPort", @@ -799,7 +786,7 @@ try TraceLoggingValue(GuestPort, "GuestPort"), TraceLoggingValue(Family, "Family")); - THROW_IF_FAILED(m_ttrpcClient->UnbindPort(HostPort, GuestPort, true, Family)); + THROW_IF_FAILED(m_vmService->UnbindPort(HostPort, GuestPort, TRUE, Family)); m_boundPorts.erase(key); return S_OK; diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.h b/src/windows/service/exe/OpenVmmVirtualMachine.h index 182b1ff5e7..a0200b1678 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.h +++ b/src/windows/service/exe/OpenVmmVirtualMachine.h @@ -20,8 +20,8 @@ Module Name: #include "wslc.h" #include "INetworkingEngine.h" -#include "TtrpcClient.h" #include "Dmesg.h" +#include #include #include #include @@ -65,8 +65,8 @@ class OpenVmmVirtualMachine // Build the openvmm.exe command line (ttrpc-only in orchestration mode). std::wstring BuildCommandLine() const; - // Build a ttrpc CreateVM configuration from stored VM settings. - TtrpcClient::VmConfig BuildVmConfig() const; + // Configure the VM via IWslVmService COM calls (kernel, disks, NIC, etc.). + void ConfigureVmService() const; // Create a Unix domain socket listener for the hybrid_vsock bridge at the given port. // Returns the listening socket and the filesystem path for cleanup. @@ -155,9 +155,9 @@ class OpenVmmVirtualMachine // Networking engine (ConsommeNetworking for the OpenVMM backend). std::unique_ptr m_networkEngine; - // ttrpc client for runtime VM management (disk hot-add/remove etc.). + // ttrpc client COM object for runtime VM management (disk hot-add/remove etc.). std::filesystem::path m_ttrpcSocketPath; - std::unique_ptr m_ttrpcClient; + wil::com_ptr m_vmService; // Termination callback to invoke when the VM exits. wil::com_ptr m_terminationCallback; diff --git a/src/windows/service/exe/TtrpcClient.cpp b/src/windows/service/exe/TtrpcClient.cpp deleted file mode 100644 index a7ff6ba599..0000000000 --- a/src/windows/service/exe/TtrpcClient.cpp +++ /dev/null @@ -1,569 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. - -/*++ - -Module Name: - - TtrpcClient.cpp - -Abstract: - - Minimal ttrpc client for communicating with OpenVMM's vmservice. - - This implementation uses generated protobuf types from VMService.proto for - vmservice payloads, while keeping a generic ttrpc transport layer that can - send any protobuf request/response pair. - - Wire format reference: openvmm/support/mesh/mesh_rpc/src/message.rs - ---*/ - -#include "precomp.h" - -#include "TtrpcClient.h" -#include "TtrpcEnvelopeCodec.h" - -#include - -#include "VMService.pb.h" -#include "google/protobuf/empty.pb.h" -#include "stringshared.h" - -using namespace wsl::windows::service::wslc; -using wsl::windows::service::wslc::detail::TtrpcEnvelopeCodec; - -namespace -{ -HRESULT DeserializeMessage(const std::vector& bytes, google::protobuf::Message* message) -{ - RETURN_HR_IF(E_POINTER, message == nullptr); - - if (bytes.empty()) - { - message->Clear(); - return S_OK; - } - - RETURN_HR_IF_MSG( - E_FAIL, - !message->ParseFromArray(bytes.data(), static_cast(bytes.size())), - "ttrpc: failed to parse protobuf response payload"); - - return S_OK; -} - -HRESULT SerializeMessage(const google::protobuf::Message& message, std::vector& bytes) -{ - std::string serialized; - RETURN_HR_IF_MSG(E_FAIL, !message.SerializeToString(&serialized), "ttrpc: failed to serialize protobuf request payload"); - - bytes.assign(serialized.begin(), serialized.end()); - return S_OK; -} - -HRESULT GrpcStatusToHresult(int32_t statusCode) -{ - // gRPC status codes: https://grpc.io/docs/guides/status-codes/ - constexpr int32_t c_grpcInvalidArgument = 3; - constexpr int32_t c_grpcNotFound = 5; - constexpr int32_t c_grpcResourceExhausted = 8; - constexpr int32_t c_grpcUnimplemented = 12; - - switch (statusCode) - { - case c_grpcInvalidArgument: - return E_INVALIDARG; - case c_grpcNotFound: - return HRESULT_FROM_WIN32(ERROR_NOT_FOUND); - case c_grpcResourceExhausted: - return HRESULT_FROM_WIN32(ERROR_NO_SYSTEM_RESOURCES); - case c_grpcUnimplemented: - return E_NOTIMPL; - default: - return E_FAIL; - } -} -} // namespace - -TtrpcClient::TtrpcClient() = default; - -TtrpcClient::~TtrpcClient() -{ - Disconnect(); -} - -HRESULT TtrpcClient::Connect(const std::wstring& socketPath, DWORD timeoutMs) -try -{ - std::lock_guard lock(m_lock); - - if (m_socket != INVALID_SOCKET) - { - return S_OK; - } - - auto narrowPath = wsl::shared::string::WideToMultiByte(socketPath); - - sockaddr_un addr{}; - addr.sun_family = AF_UNIX; - THROW_HR_IF_MSG( - E_INVALIDARG, - narrowPath.size() >= sizeof(addr.sun_path), - "ttrpc socket path too long: %hs", - narrowPath.c_str()); - memcpy(addr.sun_path, narrowPath.c_str(), narrowPath.size() + 1); - - constexpr DWORD c_initialBackoffMs = 100; - constexpr DWORD c_maxBackoffMs = 2000; - DWORD elapsed = 0; - DWORD backoff = c_initialBackoffMs; - - while (elapsed < timeoutMs) - { - SOCKET sock = ::socket(AF_UNIX, SOCK_STREAM, 0); - THROW_LAST_ERROR_IF(sock == INVALID_SOCKET); - - if (::connect(sock, reinterpret_cast(&addr), sizeof(addr)) == 0) - { - m_socket = sock; - m_nextStreamId = 1; - - // Set socket timeouts to prevent blocking forever if OpenVMM hangs. - setsockopt(m_socket, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&c_socketTimeoutMs), sizeof(c_socketTimeoutMs)); - setsockopt(m_socket, SOL_SOCKET, SO_SNDTIMEO, reinterpret_cast(&c_socketTimeoutMs), sizeof(c_socketTimeoutMs)); - - WSL_LOG( - "TtrpcClientConnected", - TraceLoggingValue(narrowPath.c_str(), "SocketPath"), - TraceLoggingValue(elapsed, "ElapsedMs")); - - return S_OK; - } - - closesocket(sock); - - DWORD sleepTime = std::min(backoff, timeoutMs - elapsed); - Sleep(sleepTime); - elapsed += sleepTime; - backoff = std::min(backoff * 2, c_maxBackoffMs); - } - - WSL_LOG( - "TtrpcClientConnectTimeout", - TraceLoggingValue(narrowPath.c_str(), "SocketPath"), - TraceLoggingValue(timeoutMs, "TimeoutMs")); - - return HRESULT_FROM_WIN32(ERROR_TIMEOUT); -} -CATCH_RETURN() - -void TtrpcClient::Disconnect() -{ - std::lock_guard lock(m_lock); - - if (m_socket != INVALID_SOCKET) - { - closesocket(m_socket); - m_socket = INVALID_SOCKET; - } -} - -bool TtrpcClient::IsConnected() const -{ - return m_socket != INVALID_SOCKET; -} - -HRESULT TtrpcClient::Call( - const std::string& service, - const std::string& method, - const google::protobuf::Message& request, - google::protobuf::Message* response) -{ - std::vector requestPayload; - RETURN_IF_FAILED(SerializeMessage(request, requestPayload)); - - std::vector responsePayload; - RETURN_IF_FAILED(SendRequest(service, method, requestPayload, &responsePayload)); - - if (response != nullptr) - { - RETURN_IF_FAILED(DeserializeMessage(responsePayload, response)); - } - - return S_OK; -} - -HRESULT TtrpcClient::AttachScsiDisk( - uint32_t controller, uint32_t lun, const std::string& hostPath, bool readOnly) -try -{ - WSL_LOG( - "TtrpcAttachScsiDisk", - TraceLoggingValue(controller, "Controller"), - TraceLoggingValue(lun, "Lun"), - TraceLoggingValue(hostPath.c_str(), "HostPath"), - TraceLoggingValue(readOnly, "ReadOnly")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::ADD); - - auto* scsiDisk = request.mutable_scsi_disk(); - scsiDisk->set_controller(controller); - scsiDisk->set_lun(lun); - scsiDisk->set_host_path(hostPath); - scsiDisk->set_type(vmservice::SCSI_DISK_TYPE_VHDX); - scsiDisk->set_read_only(readOnly); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::DetachScsiDisk(uint32_t controller, uint32_t lun) -try -{ - WSL_LOG( - "TtrpcDetachScsiDisk", - TraceLoggingValue(controller, "Controller"), - TraceLoggingValue(lun, "Lun")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::REMOVE); - - auto* scsiDisk = request.mutable_scsi_disk(); - scsiDisk->set_controller(controller); - scsiDisk->set_lun(lun); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::AddShare(const std::string& tag, const std::string& rootPath, bool readOnly) -try -{ - WSL_LOG( - "TtrpcAddShare", - TraceLoggingValue(tag.c_str(), "Tag"), - TraceLoggingValue(rootPath.c_str(), "RootPath"), - TraceLoggingValue(readOnly, "ReadOnly")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::ADD); - - auto* virtiofs = request.mutable_virtiofs(); - virtiofs->set_tag(tag); - virtiofs->set_root_path(rootPath); - virtiofs->set_read_only(readOnly); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::RemoveShare(const std::string& tag) -try -{ - WSL_LOG( - "TtrpcRemoveShare", - TraceLoggingValue(tag.c_str(), "Tag")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::REMOVE); - - auto* virtiofs = request.mutable_virtiofs(); - virtiofs->set_tag(tag); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::BindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family) -try -{ - auto hostAddress = (family == AF_INET6) ? "::1" : "127.0.0.1"; - - WSL_LOG( - "TtrpcBindPort", - TraceLoggingValue(hostPort, "HostPort"), - TraceLoggingValue(guestPort, "GuestPort"), - TraceLoggingValue(tcp, "Tcp"), - TraceLoggingValue(hostAddress, "HostAddress")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::UPDATE); - - auto* nic = request.mutable_nic_config(); - auto* consomme = nic->mutable_consomme(); - auto* port = consomme->add_ports(); - port->set_host_port(hostPort); - port->set_guest_port(guestPort); - port->set_protocol(tcp ? vmservice::TCP : vmservice::UDP); - port->set_host_address(hostAddress); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::UnbindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family) -try -{ - auto hostAddress = (family == AF_INET6) ? "::1" : "127.0.0.1"; - - WSL_LOG( - "TtrpcUnbindPort", - TraceLoggingValue(hostPort, "HostPort"), - TraceLoggingValue(guestPort, "GuestPort"), - TraceLoggingValue(tcp, "Tcp"), - TraceLoggingValue(hostAddress, "HostAddress")); - - vmservice::ModifyResourceRequest request; - request.set_type(vmservice::REMOVE); - - auto* nic = request.mutable_nic_config(); - auto* consomme = nic->mutable_consomme(); - auto* port = consomme->add_ports(); - port->set_host_port(hostPort); - port->set_guest_port(guestPort); - port->set_protocol(tcp ? vmservice::TCP : vmservice::UDP); - port->set_host_address(hostAddress); - - google::protobuf::Empty response; - return Call(c_serviceName, c_modifyResourceMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::CreateVm(const VmConfig& config) -try -{ - WSL_LOG( - "TtrpcCreateVm", - TraceLoggingValue(config.KernelPath.c_str(), "KernelPath"), - TraceLoggingValue(config.MemoryMb, "MemoryMb"), - TraceLoggingValue(config.ProcessorCount, "ProcessorCount"), - TraceLoggingValue(static_cast(config.ScsiDisks.size()), "DiskCount"), - TraceLoggingValue(config.HvSocketPath.c_str(), "HvSocketPath")); - - vmservice::CreateVMRequest request; - auto* vmConfig = request.mutable_config(); - - vmConfig->mutable_memory_config()->set_memory_mb(config.MemoryMb); - vmConfig->mutable_processor_config()->set_processor_count(config.ProcessorCount); - - for (const auto& disk : config.ScsiDisks) - { - auto* scsiDisk = vmConfig->mutable_devices_config()->add_scsi_disks(); - scsiDisk->set_controller(disk.Controller); - scsiDisk->set_lun(disk.Lun); - scsiDisk->set_host_path(disk.HostPath); - scsiDisk->set_type(vmservice::SCSI_DISK_TYPE_VHDX); - scsiDisk->set_read_only(disk.ReadOnly); - } - - if (config.Nic.has_value()) - { - auto* nicConfig = vmConfig->mutable_devices_config()->add_nic_config(); - nicConfig->set_nic_id(config.Nic->NicId); - nicConfig->set_mac_address(config.Nic->MacAddress); - // Empty CIDR uses OpenVMM's default subnet. - nicConfig->mutable_consomme()->set_cidr(""); - } - - if (!config.VirtioConsolePath.empty()) - { - auto* virtioConsole = vmConfig->mutable_devices_config()->mutable_virtio_console(); - virtioConsole->set_socket_path(config.VirtioConsolePath); - virtioConsole->set_connect(true); - } - - for (const auto& serialPort : config.SerialPorts) - { - auto* portConfig = vmConfig->mutable_serial_config()->add_ports(); - portConfig->set_port(serialPort.Port); - portConfig->set_socket_path(serialPort.SocketPath); - portConfig->set_connect(true); - } - - auto* directBoot = vmConfig->mutable_direct_boot(); - directBoot->set_kernel_path(config.KernelPath); - directBoot->set_initrd_path(config.InitrdPath); - directBoot->set_kernel_cmdline(config.KernelCmdLine); - - vmConfig->mutable_hvsocket_config()->set_path(config.HvSocketPath); - - google::protobuf::Empty response; - return Call(c_serviceName, c_createVmMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::ResumeVm() -try -{ - WSL_LOG("TtrpcResumeVm"); - - google::protobuf::Empty request; - google::protobuf::Empty response; - return Call(c_serviceName, c_resumeVmMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::WaitVm() -try -{ - WSL_LOG("TtrpcWaitVm"); - - google::protobuf::Empty request; - google::protobuf::Empty response; - return Call(c_serviceName, c_waitVmMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::TeardownVm() -try -{ - WSL_LOG("TtrpcTeardownVm"); - - google::protobuf::Empty request; - google::protobuf::Empty response; - return Call(c_serviceName, c_teardownVmMethod, request, &response); -} -CATCH_RETURN() - -HRESULT TtrpcClient::QuitVm() -try -{ - WSL_LOG("TtrpcQuitVm"); - - google::protobuf::Empty request; - std::vector requestPayload; - RETURN_IF_FAILED(SerializeMessage(request, requestPayload)); - - std::lock_guard lock(m_lock); - RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_CONNECTED), m_socket == INVALID_SOCKET); - - auto ttrpcPayload = TtrpcEnvelopeCodec::EncodeRequestEnvelope(c_serviceName, c_quitVmMethod, requestPayload); - - detail::TtrpcMessageHeader header{}; - TtrpcEnvelopeCodec::WriteBigEndian32(header.Length, static_cast(ttrpcPayload.size())); - TtrpcEnvelopeCodec::WriteBigEndian32(header.StreamId, m_nextStreamId); - header.MessageType = TtrpcEnvelopeCodec::c_messageTypeRequest; - header.Flags = 0; - m_nextStreamId += 2; - - RETURN_IF_FAILED(SendAll(&header, sizeof(header))); - RETURN_IF_FAILED(SendAll(ttrpcPayload.data(), ttrpcPayload.size())); - - return S_OK; -} -CATCH_RETURN() - -HRESULT TtrpcClient::SendRequest( - const std::string& service, - const std::string& method, - const std::vector& payload, - std::vector* responsePayload) -{ - std::lock_guard lock(m_lock); - - RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_NOT_CONNECTED), m_socket == INVALID_SOCKET); - - auto ttrpcPayload = TtrpcEnvelopeCodec::EncodeRequestEnvelope(service, method, payload); - - detail::TtrpcMessageHeader header{}; - TtrpcEnvelopeCodec::WriteBigEndian32(header.Length, static_cast(ttrpcPayload.size())); - TtrpcEnvelopeCodec::WriteBigEndian32(header.StreamId, m_nextStreamId); - header.MessageType = TtrpcEnvelopeCodec::c_messageTypeRequest; - header.Flags = 0; - - uint32_t expectedStreamId = m_nextStreamId; - m_nextStreamId += 2; - - RETURN_IF_FAILED(SendAll(&header, sizeof(header))); - RETURN_IF_FAILED(SendAll(ttrpcPayload.data(), ttrpcPayload.size())); - - detail::TtrpcMessageHeader responseHeader{}; - RETURN_IF_FAILED(RecvAll(&responseHeader, sizeof(responseHeader))); - - RETURN_HR_IF_MSG( - E_FAIL, - responseHeader.MessageType != TtrpcEnvelopeCodec::c_messageTypeResponse, - "ttrpc: expected response (type 2), got type %d", - responseHeader.MessageType); - - uint32_t responseStreamId = TtrpcEnvelopeCodec::ReadBigEndian32(responseHeader.StreamId); - RETURN_HR_IF_MSG( - E_FAIL, - responseStreamId != expectedStreamId, - "ttrpc: stream ID mismatch: expected %u, got %u", - expectedStreamId, - responseStreamId); - - uint32_t responseLength = TtrpcEnvelopeCodec::ReadBigEndian32(responseHeader.Length); - RETURN_HR_IF_MSG( - E_FAIL, - responseLength > TtrpcEnvelopeCodec::c_maxMessageBytes, - "ttrpc: response too large: %u bytes", - responseLength); - - std::vector responseData(responseLength); - if (responseLength > 0) - { - RETURN_IF_FAILED(RecvAll(responseData.data(), responseLength)); - } - - TtrpcEnvelopeCodec::DecodedResponse decodedResponse; - RETURN_IF_FAILED(TtrpcEnvelopeCodec::DecodeResponseEnvelope(responseData, decodedResponse)); - - if (decodedResponse.HasStatus && decodedResponse.StatusCode != 0) - { - WSL_LOG( - "TtrpcRequestFailed", - TraceLoggingValue(decodedResponse.StatusCode, "GrpcCode"), - TraceLoggingValue(decodedResponse.StatusMessage.c_str(), "Message")); - - return GrpcStatusToHresult(decodedResponse.StatusCode); - } - - if (responsePayload != nullptr) - { - *responsePayload = std::move(decodedResponse.Payload); - } - - return S_OK; -} - -HRESULT TtrpcClient::SendAll(const void* data, size_t length) -{ - const auto* ptr = static_cast(data); - size_t remaining = length; - - while (remaining > 0) - { - int sent = ::send(m_socket, ptr, static_cast(remaining), 0); - RETURN_LAST_ERROR_IF(sent == SOCKET_ERROR); - RETURN_HR_IF(E_FAIL, sent == 0); - ptr += sent; - remaining -= sent; - } - - return S_OK; -} - -HRESULT TtrpcClient::RecvAll(void* data, size_t length) -{ - auto* ptr = static_cast(data); - size_t remaining = length; - - while (remaining > 0) - { - int received = ::recv(m_socket, ptr, static_cast(remaining), 0); - RETURN_LAST_ERROR_IF(received == SOCKET_ERROR); - RETURN_HR_IF_MSG(E_FAIL, received == 0, "ttrpc: connection closed unexpectedly"); - ptr += received; - remaining -= received; - } - - return S_OK; -} diff --git a/src/windows/service/exe/TtrpcClient.h b/src/windows/service/exe/TtrpcClient.h deleted file mode 100644 index 62e0e7c623..0000000000 --- a/src/windows/service/exe/TtrpcClient.h +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. - -/*++ - -Module Name: - - TtrpcClient.h - -Abstract: - - Minimal ttrpc client for communicating with OpenVMM's vmservice. - - Implements the ttrpc wire protocol and uses protobuf payloads generated - from VMService.proto for vmservice RPCs. - - The ttrpc protocol uses a 10-byte header (big-endian length, stream ID, - type, flags) followed by a protobuf-encoded Request/Response payload. - - See: openvmm/support/mesh/mesh_rpc/src/message.rs for the wire format. - ---*/ - -#pragma once - -#include -#include -#include -#include -#include - -namespace google::protobuf { -class Message; -} - -namespace wsl::windows::service::wslc { - -class TtrpcClient -{ -public: - TtrpcClient(); - ~TtrpcClient(); - - NON_COPYABLE(TtrpcClient); - NON_MOVABLE(TtrpcClient); - - // Default timeout for Connect() retries and socket I/O operations. - static constexpr DWORD c_defaultTimeoutMs = 30000; - - // Connect to the ttrpc Unix domain socket at the given path. - // Retries with backoff until the connection succeeds or timeoutMs expires. - HRESULT Connect(const std::wstring& socketPath, DWORD timeoutMs = c_defaultTimeoutMs); - - // Disconnect from the ttrpc server. - void Disconnect(); - - // Returns true if the client is connected. - bool IsConnected() const; - - // Generic ttrpc call using protobuf request/response messages. - // If response is null, any successful payload is ignored. - HRESULT Call(const std::string& service, - const std::string& method, - const google::protobuf::Message& request, - google::protobuf::Message* response = nullptr); - - // SCSI disk hot-add: ModifyResource(ADD, SCSIDisk { controller, lun, hostPath, VHDX, readOnly }). - HRESULT AttachScsiDisk(uint32_t controller, uint32_t lun, - const std::string& hostPath, bool readOnly); - - // SCSI disk hot-remove: ModifyResource(REMOVE, SCSIDisk { controller, lun }). - HRESULT DetachScsiDisk(uint32_t controller, uint32_t lun); - - // VirtioFS share hot-add: ModifyResource(ADD, VirtioFSConfig { tag, root_path, read_only }). - HRESULT AddShare(const std::string& tag, const std::string& rootPath, bool readOnly); - - // VirtioFS share hot-remove: ModifyResource(REMOVE, VirtioFSConfig { tag }). - HRESULT RemoveShare(const std::string& tag); - - // Consomme port bind: ModifyResource(UPDATE, NicConfig { consomme { ports } }). - // Creates a host-side listener in OpenVMM's consomme NAT and forwards to the guest port. - // family is AF_INET or AF_INET6. - HRESULT BindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family); - - // Consomme port unbind: ModifyResource(REMOVE, NicConfig { consomme { ports } }). - HRESULT UnbindPort(uint16_t hostPort, uint16_t guestPort, bool tcp, int family); - - // VM configuration for CreateVm. - struct VmConfig - { - std::string KernelPath; - std::string InitrdPath; - std::string KernelCmdLine; - uint64_t MemoryMb{}; - uint32_t ProcessorCount{}; - std::string HvSocketPath; - - struct ScsiDisk - { - uint32_t Controller; - uint32_t Lun; - std::string HostPath; - bool ReadOnly; - }; - std::vector ScsiDisks; - - // NIC with consomme backend (self-contained NAT + DHCP). - struct ConsommeNic - { - std::string NicId; // GUID string - std::string MacAddress; // "12-34-56-78-9A-BC" - }; - std::optional Nic; - - // Serial ports (16550 UART COM ports, e.g. earlycon on port 0). - struct SerialPort - { - uint32_t Port; // 0-3 (COM1-COM4) - std::string SocketPath; // Named pipe or Unix domain socket path - }; - std::vector SerialPorts; - - // Virtio console device (/dev/hvc0 in the guest). - // Path to a named pipe or Unix domain socket for the console backend. - std::string VirtioConsolePath; - }; - - // CreateVM: configure and create the VM (left in paused state). - HRESULT CreateVm(const VmConfig& config); - - // ResumeVM: start a paused VM. - HRESULT ResumeVm(); - - // WaitVM: blocks until the VM halts or is torn down. - HRESULT WaitVm(); - - // TeardownVM: release all VM resources and unblock the WaitVM call. - HRESULT TeardownVm(); - - // Quit: tear down the VM and exit the openvmm process. - // Fire-and-forget — sends the request without waiting for a response. - HRESULT QuitVm(); - -private: - // ttrpc service and method names (from vmservice.proto). - static constexpr char c_serviceName[] = "vmservice.VM"; - static constexpr char c_createVmMethod[] = "CreateVM"; - static constexpr char c_resumeVmMethod[] = "ResumeVM"; - static constexpr char c_waitVmMethod[] = "WaitVM"; - static constexpr char c_teardownVmMethod[] = "TeardownVM"; - static constexpr char c_quitVmMethod[] = "Quit"; - static constexpr char c_modifyResourceMethod[] = "ModifyResource"; - - // Send a ttrpc request payload and wait for the response payload. - // Returns S_OK on success, or an HRESULT error if the server returned a - // status error or there was a communication failure. - HRESULT SendRequest(const std::string& service, - const std::string& method, - const std::vector& payload, - std::vector* responsePayload); - - // Socket send/recv timeout to prevent indefinite blocking. - static constexpr DWORD c_socketTimeoutMs = 30000; - - // Low-level socket I/O. - HRESULT SendAll(const void* data, size_t length); - HRESULT RecvAll(void* data, size_t length); - - std::recursive_mutex m_lock; - SOCKET m_socket = INVALID_SOCKET; - uint32_t m_nextStreamId = 1; // ttrpc client stream IDs must be odd -}; - -} // namespace wsl::windows::service::wslc diff --git a/src/windows/service/exe/TtrpcEnvelopeCodec.cpp b/src/windows/service/exe/TtrpcEnvelopeCodec.cpp deleted file mode 100644 index 3a14a19872..0000000000 --- a/src/windows/service/exe/TtrpcEnvelopeCodec.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. - -#include "precomp.h" - -#include "TtrpcEnvelopeCodec.h" - -using namespace wsl::windows::service::wslc::detail; - -void TtrpcEnvelopeCodec::WriteBigEndian32(uint8_t* dest, uint32_t value) -{ - dest[0] = static_cast((value >> 24) & 0xFF); - dest[1] = static_cast((value >> 16) & 0xFF); - dest[2] = static_cast((value >> 8) & 0xFF); - dest[3] = static_cast(value & 0xFF); -} - -uint32_t TtrpcEnvelopeCodec::ReadBigEndian32(const uint8_t* src) -{ - return (static_cast(src[0]) << 24) | - (static_cast(src[1]) << 16) | - (static_cast(src[2]) << 8) | - static_cast(src[3]); -} - -HRESULT TtrpcEnvelopeCodec::ReadVarint(const uint8_t*& ptr, const uint8_t* end, uint64_t& value) -{ - value = 0; - int shift = 0; - constexpr int c_maxVarintBytes = 10; // 64-bit varint is at most 10 bytes - int bytesRead = 0; - - while (ptr < end) - { - RETURN_HR_IF_MSG(E_FAIL, bytesRead >= c_maxVarintBytes, "ttrpc: varint too large"); - - const uint8_t byte = *ptr++; - bytesRead++; - - value |= static_cast(byte & 0x7F) << shift; - - if ((byte & 0x80) == 0) - { - return S_OK; - } - - shift += 7; - } - - return E_FAIL; -} - -void TtrpcEnvelopeCodec::EncodeVarint(uint64_t value, std::vector& buf) -{ - do - { - uint8_t byte = static_cast(value & 0x7F); - value >>= 7; - if (value != 0) - { - byte |= 0x80; - } - buf.push_back(byte); - } while (value != 0); -} - -void TtrpcEnvelopeCodec::EncodeTag(uint32_t field, uint32_t wireType, std::vector& buf) -{ - EncodeVarint((static_cast(field) << 3) | wireType, buf); -} - -void TtrpcEnvelopeCodec::EncodeStringField(uint32_t field, const std::string& value, std::vector& buf) -{ - if (value.empty()) - { - return; - } - - EncodeTag(field, c_wireTypeLengthDelimited, buf); - EncodeVarint(value.size(), buf); - buf.insert(buf.end(), value.begin(), value.end()); -} - -void TtrpcEnvelopeCodec::EncodeBytesField(uint32_t field, const std::vector& value, std::vector& buf) -{ - if (value.empty()) - { - return; - } - - EncodeTag(field, c_wireTypeLengthDelimited, buf); - EncodeVarint(value.size(), buf); - buf.insert(buf.end(), value.begin(), value.end()); -} - -std::vector TtrpcEnvelopeCodec::EncodeRequestEnvelope( - const std::string& service, - const std::string& method, - const std::vector& payload) -{ - std::vector buf; - EncodeStringField(1, service, buf); - EncodeStringField(2, method, buf); - EncodeBytesField(3, payload, buf); - return buf; -} - -HRESULT TtrpcEnvelopeCodec::DecodeResponseEnvelope( - const std::vector& responseData, - DecodedResponse& decoded) -{ - decoded = {}; - - const uint8_t* ptr = responseData.data(); - const uint8_t* end = ptr + responseData.size(); - - while (ptr < end) - { - uint64_t tag = 0; - RETURN_IF_FAILED(ReadVarint(ptr, end, tag)); - - const uint32_t fieldNumber = static_cast(tag >> 3); - const uint32_t wireType = static_cast(tag & 0x7); - - if (wireType == c_wireTypeVarint) - { - uint64_t ignored = 0; - RETURN_IF_FAILED(ReadVarint(ptr, end, ignored)); - continue; - } - - if (wireType != c_wireTypeLengthDelimited) - { - return E_FAIL; - } - - uint64_t length = 0; - RETURN_IF_FAILED(ReadVarint(ptr, end, length)); - - RETURN_HR_IF_MSG(E_FAIL, length > static_cast(end - ptr), "ttrpc: response truncated"); - - if (fieldNumber == 1) - { - decoded.HasStatus = true; - const uint8_t* statusEnd = ptr + length; - - while (ptr < statusEnd) - { - uint64_t innerTag = 0; - RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, innerTag)); - - const uint32_t innerField = static_cast(innerTag >> 3); - const uint32_t innerWire = static_cast(innerTag & 0x7); - - if (innerWire == c_wireTypeVarint) - { - uint64_t value = 0; - RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, value)); - - if (innerField == 1) - { - decoded.StatusCode = static_cast(value); - } - } - else if (innerWire == c_wireTypeLengthDelimited) - { - uint64_t innerLength = 0; - RETURN_IF_FAILED(ReadVarint(ptr, statusEnd, innerLength)); - - RETURN_HR_IF_MSG(E_FAIL, innerLength > static_cast(statusEnd - ptr), "ttrpc: status payload truncated"); - - if (innerField == 2) - { - decoded.StatusMessage.assign(reinterpret_cast(ptr), static_cast(innerLength)); - } - - ptr += innerLength; - } - else - { - ptr = statusEnd; - } - } - - continue; - } - - if (fieldNumber == 2) - { - RETURN_HR_IF_MSG( - E_FAIL, - length > c_maxMessageBytes, - "ttrpc: response payload too large: %llu bytes", - static_cast(length)); - - decoded.Payload.assign(ptr, ptr + length); - ptr += length; - continue; - } - - ptr += length; - } - - return S_OK; -} diff --git a/src/windows/service/exe/TtrpcEnvelopeCodec.h b/src/windows/service/exe/TtrpcEnvelopeCodec.h deleted file mode 100644 index 7439d13004..0000000000 --- a/src/windows/service/exe/TtrpcEnvelopeCodec.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. - -#pragma once - -#include -#include -#include - -namespace wsl::windows::service::wslc::detail { - -#pragma pack(push, 1) -struct TtrpcMessageHeader -{ - uint8_t Length[4]; // big-endian uint32 - uint8_t StreamId[4]; // big-endian uint32 - uint8_t MessageType; - uint8_t Flags; -}; -#pragma pack(pop) - -static_assert(sizeof(TtrpcMessageHeader) == 10, "ttrpc MessageHeader must be 10 bytes"); - -class TtrpcEnvelopeCodec -{ -public: - static constexpr uint8_t c_messageTypeRequest = 1; - static constexpr uint8_t c_messageTypeResponse = 2; - static constexpr uint32_t c_maxMessageBytes = 4 * 1024 * 1024; - - struct DecodedResponse - { - bool HasStatus = false; - int32_t StatusCode = 0; - std::string StatusMessage; - std::vector Payload; - }; - - static void WriteBigEndian32(uint8_t* dest, uint32_t value); - static uint32_t ReadBigEndian32(const uint8_t* src); - - static std::vector EncodeRequestEnvelope(const std::string& service, - const std::string& method, - const std::vector& payload); - - static HRESULT DecodeResponseEnvelope(const std::vector& responseData, - DecodedResponse& decoded); - -private: - static constexpr uint32_t c_wireTypeVarint = 0; - static constexpr uint32_t c_wireTypeLengthDelimited = 2; - - static HRESULT ReadVarint(const uint8_t*& ptr, const uint8_t* end, uint64_t& value); - static void EncodeVarint(uint64_t value, std::vector& buf); - static void EncodeTag(uint32_t field, uint32_t wireType, std::vector& buf); - static void EncodeStringField(uint32_t field, const std::string& value, std::vector& buf); - static void EncodeBytesField(uint32_t field, const std::vector& value, std::vector& buf); -}; - -} // namespace wsl::windows::service::wslc::detail diff --git a/src/windows/service/inc/windowsdefs.idl b/src/windows/service/inc/windowsdefs.idl index 3cf561921a..8ca9e6f4b4 100644 --- a/src/windows/service/inc/windowsdefs.idl +++ b/src/windows/service/inc/windowsdefs.idl @@ -204,4 +204,55 @@ cpp_quote("namespace p9fs") cpp_quote("{") cpp_quote("class DECLSPEC_UUID(\"AFC7B6DE-D642-41B7-AB0C-A01019510741\") Plan9FileSystem;") cpp_quote("}") -cpp_quote("#endif") \ No newline at end of file +cpp_quote("#endif") + +// +// IWslVmService - COM interface for managing an OpenVMM VM via ttrpc. +// +// Implemented by wslvmservice.dll (from hvlite). The Rust side handles +// ttrpc transport and protobuf serialization internally, so no protobuf +// dependency is needed in the C++ consumer. +// +[ + uuid(A3B2C1D0-4E5F-6A7B-8C9D-0E1F2A3B4C5D), + pointer_default(unique), + object +] +interface IWslVmService : IUnknown +{ + HRESULT Connect([in] LPCWSTR socketPath, [in] UINT32 timeoutMs); + HRESULT Disconnect(); + + // VM configuration (call before CreateVm). + HRESULT SetKernelPath([in] LPCWSTR path); + HRESULT SetInitrdPath([in] LPCWSTR path); + HRESULT SetKernelCmdLine([in] LPCWSTR cmdline); + HRESULT SetMemoryMb([in] UINT64 memoryMb); + HRESULT SetProcessorCount([in] UINT32 count); + HRESULT SetHvSocketPath([in] LPCWSTR path); + HRESULT AddBootDisk([in] UINT32 controller, [in] UINT32 lun, [in] LPCWSTR hostPath, [in] BOOL readOnly); + HRESULT SetConsommeNic([in] LPCWSTR nicId, [in] LPCWSTR macAddress); + HRESULT AddSerialPort([in] UINT32 port, [in] LPCWSTR socketPath); + HRESULT SetVirtioConsolePath([in] LPCWSTR path); + + // VM lifecycle. + HRESULT CreateVm(); + HRESULT ResumeVm(); + HRESULT WaitVm([out, system_handle(sh_event)] HANDLE* completionEvent); + HRESULT TeardownVm(); + + // Runtime device management. + HRESULT AttachScsiDisk([in] UINT32 controller, [in] UINT32 lun, [in] LPCWSTR hostPath, [in] BOOL readOnly); + HRESULT DetachScsiDisk([in] UINT32 controller, [in] UINT32 lun); + + // Runtime directory sharing (plan9/virtiofs). 'tag' uniquely identifies the share. + HRESULT AddShare([in] LPCWSTR tag, [in] LPCWSTR hostPath, [in] BOOL readOnly); + HRESULT RemoveShare([in] LPCWSTR tag); + + // Runtime host<->guest port forwarding. 'tcp' selects TCP (TRUE) or UDP (FALSE); + // 'family' is the address family (AF_INET / AF_INET6). + HRESULT BindPort([in] UINT16 hostPort, [in] UINT16 guestPort, [in] BOOL tcp, [in] INT32 family); + HRESULT UnbindPort([in] UINT16 hostPort, [in] UINT16 guestPort, [in] BOOL tcp, [in] INT32 family); +}; + +cpp_quote("DEFINE_GUID(CLSID_WslVmService, 0xE7A1B2C3, 0xD4E5, 0xF6A7, 0xB8, 0xC9, 0xD0, 0xE1, 0xF2, 0xA3, 0xB4, 0xC5);") \ No newline at end of file diff --git a/src/windows/wslc/CMakeLists.txt b/src/windows/wslc/CMakeLists.txt index 283d72ff24..d4a4ba6236 100644 --- a/src/windows/wslc/CMakeLists.txt +++ b/src/windows/wslc/CMakeLists.txt @@ -11,10 +11,6 @@ file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS ${SOURCE_PATTERNS}) add_library(wslclib OBJECT ${SOURCES} ${HEADERS}) target_include_directories(wslclib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${WSLC_SUBDIR_PATHS}) -if (INCLUDE_OPENVMM) - wsl_add_openvmm_proto(wslclib) -endif() - target_link_libraries(wslclib ${COMMON_LINK_LIBRARIES} yaml-cpp From 0e92b20611bb57a22a5f3ccc0f799bb1f6a53b86 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 10 Jun 2026 14:24:07 -0700 Subject: [PATCH 09/10] Avoid using COM --- msipackage/package.wix.in | 10 - .../service/exe/OpenVmmVirtualMachine.cpp | 2 +- .../service/exe/OpenVmmVirtualMachine.h | 5 +- src/windows/service/exe/WslVmServiceClient.h | 245 ++++++++++++++++++ src/windows/service/inc/windowsdefs.idl | 51 ---- 5 files changed, 249 insertions(+), 64 deletions(-) create mode 100644 src/windows/service/exe/WslVmServiceClient.h diff --git a/msipackage/package.wix.in b/msipackage/package.wix.in index 9f0e92bf73..a684afbf58 100644 --- a/msipackage/package.wix.in +++ b/msipackage/package.wix.in @@ -244,16 +244,6 @@ - - - - - - - - - - diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 4074c290cb..02d9b3f25c 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -385,7 +385,7 @@ void OpenVmmVirtualMachine::LaunchOpenVmm() // Monitor the openvmm process and signal m_vmExitEvent on exit. m_processWatchThread = std::thread(&OpenVmmVirtualMachine::WatchProcessExit, this); - m_vmService = wil::CoCreateInstance(CLSID_WslVmService, CLSCTX_INPROC_SERVER); + m_vmService = std::make_unique(); THROW_IF_FAILED_MSG( m_vmService->Connect(m_ttrpcSocketPath.c_str(), 30000), "Failed to connect to OpenVMM ttrpc server"); diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.h b/src/windows/service/exe/OpenVmmVirtualMachine.h index a0200b1678..139ae88f3c 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.h +++ b/src/windows/service/exe/OpenVmmVirtualMachine.h @@ -21,6 +21,7 @@ Module Name: #include "wslc.h" #include "INetworkingEngine.h" #include "Dmesg.h" +#include "WslVmServiceClient.h" #include #include #include @@ -155,9 +156,9 @@ class OpenVmmVirtualMachine // Networking engine (ConsommeNetworking for the OpenVMM backend). std::unique_ptr m_networkEngine; - // ttrpc client COM object for runtime VM management (disk hot-add/remove etc.). + // ttrpc client (wslvmservice.dll) for runtime VM management (disk hot-add/remove etc.). std::filesystem::path m_ttrpcSocketPath; - wil::com_ptr m_vmService; + std::unique_ptr m_vmService; // Termination callback to invoke when the VM exits. wil::com_ptr m_terminationCallback; diff --git a/src/windows/service/exe/WslVmServiceClient.h b/src/windows/service/exe/WslVmServiceClient.h new file mode 100644 index 0000000000..187d8de0b5 --- /dev/null +++ b/src/windows/service/exe/WslVmServiceClient.h @@ -0,0 +1,245 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. + +/*++ + +Module Name: + + WslVmServiceClient.h + +Abstract: + + Thin C++ wrapper around wslvmservice.dll. + + wslvmservice.dll (built from the hvlite repo) exports a plain C ABI that + drives an OpenVMM VM over ttrpc. This wrapper loads the DLL by full path, + resolves the exports, and exposes them as member functions that return + HRESULT, so the rest of WSLC can call it like a normal object. + + The Rust side owns an opaque handle; this class creates one on construction + and destroys it on teardown. + +--*/ + +#pragma once + +#include +#include +#include +#include +#include + +namespace wsl::windows::service::wslc { + +class WslVmServiceClient +{ +public: + WslVmServiceClient() + { + // Load the DLL by full path from the directory of the running module to + // avoid DLL search-order hijacking. wslvmservice.dll is installed + // alongside wslservice.exe. + wchar_t modulePath[MAX_PATH]{}; + THROW_LAST_ERROR_IF(GetModuleFileNameW(nullptr, modulePath, ARRAYSIZE(modulePath)) == 0); + + auto dllPath = std::filesystem::path(modulePath).parent_path() / L"wslvmservice.dll"; + m_module.reset(LoadLibraryExW(dllPath.c_str(), nullptr, LOAD_WITH_ALTERED_SEARCH_PATH)); + THROW_LAST_ERROR_IF_MSG(!m_module, "Failed to load wslvmservice.dll from %ls", dllPath.c_str()); + + m_create = GetProc("WslVmServiceCreate"); + m_destroy = GetProc("WslVmServiceDestroy"); + m_connect = GetProc("WslVmServiceConnect"); + m_disconnect = GetProc("WslVmServiceDisconnect"); + m_setKernelPath = GetProc("WslVmServiceSetKernelPath"); + m_setInitrdPath = GetProc("WslVmServiceSetInitrdPath"); + m_setKernelCmdLine = GetProc("WslVmServiceSetKernelCmdLine"); + m_setMemoryMb = GetProc("WslVmServiceSetMemoryMb"); + m_setProcessorCount = GetProc("WslVmServiceSetProcessorCount"); + m_setHvSocketPath = GetProc("WslVmServiceSetHvSocketPath"); + m_addBootDisk = GetProc("WslVmServiceAddBootDisk"); + m_setConsommeNic = GetProc("WslVmServiceSetConsommeNic"); + m_addSerialPort = GetProc("WslVmServiceAddSerialPort"); + m_setVirtioConsolePath = GetProc("WslVmServiceSetVirtioConsolePath"); + m_createVm = GetProc("WslVmServiceCreateVm"); + m_resumeVm = GetProc("WslVmServiceResumeVm"); + m_teardownVm = GetProc("WslVmServiceTeardownVm"); + m_attachScsiDisk = GetProc("WslVmServiceAttachScsiDisk"); + m_detachScsiDisk = GetProc("WslVmServiceDetachScsiDisk"); + m_addShare = GetProc("WslVmServiceAddShare"); + m_removeShare = GetProc("WslVmServiceRemoveShare"); + m_bindPort = GetProc("WslVmServiceBindPort"); + m_unbindPort = GetProc("WslVmServiceUnbindPort"); + + THROW_IF_FAILED_MSG(m_create(&m_handle), "Failed to create wslvmservice client"); + } + + ~WslVmServiceClient() + { + if (m_handle != nullptr) + { + m_destroy(m_handle); + m_handle = nullptr; + } + } + + WslVmServiceClient(const WslVmServiceClient&) = delete; + WslVmServiceClient& operator=(const WslVmServiceClient&) = delete; + WslVmServiceClient(WslVmServiceClient&&) = delete; + WslVmServiceClient& operator=(WslVmServiceClient&&) = delete; + + HRESULT Connect(LPCWSTR socketPath, UINT32 timeoutMs) const + { + return m_connect(m_handle, socketPath, timeoutMs); + } + + HRESULT Disconnect() const + { + return m_disconnect(m_handle); + } + + HRESULT SetKernelPath(LPCWSTR path) const + { + return m_setKernelPath(m_handle, path); + } + + HRESULT SetInitrdPath(LPCWSTR path) const + { + return m_setInitrdPath(m_handle, path); + } + + HRESULT SetKernelCmdLine(LPCWSTR cmdline) const + { + return m_setKernelCmdLine(m_handle, cmdline); + } + + HRESULT SetMemoryMb(UINT64 memoryMb) const + { + return m_setMemoryMb(m_handle, memoryMb); + } + + HRESULT SetProcessorCount(UINT32 count) const + { + return m_setProcessorCount(m_handle, count); + } + + HRESULT SetHvSocketPath(LPCWSTR path) const + { + return m_setHvSocketPath(m_handle, path); + } + + HRESULT AddBootDisk(UINT32 controller, UINT32 lun, LPCWSTR hostPath, BOOL readOnly) const + { + return m_addBootDisk(m_handle, controller, lun, hostPath, readOnly); + } + + HRESULT SetConsommeNic(LPCWSTR nicId, LPCWSTR macAddress) const + { + return m_setConsommeNic(m_handle, nicId, macAddress); + } + + HRESULT AddSerialPort(UINT32 port, LPCWSTR socketPath) const + { + return m_addSerialPort(m_handle, port, socketPath); + } + + HRESULT SetVirtioConsolePath(LPCWSTR path) const + { + return m_setVirtioConsolePath(m_handle, path); + } + + HRESULT CreateVm() const + { + return m_createVm(m_handle); + } + + HRESULT ResumeVm() const + { + return m_resumeVm(m_handle); + } + + HRESULT TeardownVm() const + { + return m_teardownVm(m_handle); + } + + HRESULT AttachScsiDisk(UINT32 controller, UINT32 lun, LPCWSTR hostPath, BOOL readOnly) const + { + return m_attachScsiDisk(m_handle, controller, lun, hostPath, readOnly); + } + + HRESULT DetachScsiDisk(UINT32 controller, UINT32 lun) const + { + return m_detachScsiDisk(m_handle, controller, lun); + } + + HRESULT AddShare(LPCWSTR tag, LPCWSTR hostPath, BOOL readOnly) const + { + return m_addShare(m_handle, tag, hostPath, readOnly); + } + + HRESULT RemoveShare(LPCWSTR tag) const + { + return m_removeShare(m_handle, tag); + } + + HRESULT BindPort(UINT16 hostPort, UINT16 guestPort, BOOL tcp, INT32 family) const + { + return m_bindPort(m_handle, hostPort, guestPort, tcp, family); + } + + HRESULT UnbindPort(UINT16 hostPort, UINT16 guestPort, BOOL tcp, INT32 family) const + { + return m_unbindPort(m_handle, hostPort, guestPort, tcp, family); + } + +private: + using WslVmServiceCreateFn = HRESULT(__cdecl*)(void** handle); + using WslVmServiceDestroyFn = void(__cdecl*)(void* handle); + using HandleOnlyFn = HRESULT(__cdecl*)(void* handle); + using ConnectFn = HRESULT(__cdecl*)(void* handle, LPCWSTR socketPath, UINT32 timeoutMs); + using StringFn = HRESULT(__cdecl*)(void* handle, LPCWSTR value); + using TwoStringFn = HRESULT(__cdecl*)(void* handle, LPCWSTR a, LPCWSTR b); + using SetMemoryMbFn = HRESULT(__cdecl*)(void* handle, UINT64 value); + using SetU32Fn = HRESULT(__cdecl*)(void* handle, UINT32 value); + using DiskFn = HRESULT(__cdecl*)(void* handle, UINT32 controller, UINT32 lun, LPCWSTR hostPath, BOOL readOnly); + using DetachScsiDiskFn = HRESULT(__cdecl*)(void* handle, UINT32 controller, UINT32 lun); + using SerialPortFn = HRESULT(__cdecl*)(void* handle, UINT32 port, LPCWSTR socketPath); + using AddShareFn = HRESULT(__cdecl*)(void* handle, LPCWSTR tag, LPCWSTR hostPath, BOOL readOnly); + using PortFn = HRESULT(__cdecl*)(void* handle, UINT16 hostPort, UINT16 guestPort, BOOL tcp, INT32 family); + + template + TFn GetProc(const char* name) const + { + auto proc = reinterpret_cast(GetProcAddress(m_module.get(), name)); + THROW_LAST_ERROR_IF_MSG(proc == nullptr, "wslvmservice.dll missing export: %hs", name); + return proc; + } + + wil::unique_hmodule m_module; + void* m_handle = nullptr; + + WslVmServiceCreateFn m_create = nullptr; + WslVmServiceDestroyFn m_destroy = nullptr; + ConnectFn m_connect = nullptr; + HandleOnlyFn m_disconnect = nullptr; + StringFn m_setKernelPath = nullptr; + StringFn m_setInitrdPath = nullptr; + StringFn m_setKernelCmdLine = nullptr; + SetMemoryMbFn m_setMemoryMb = nullptr; + SetU32Fn m_setProcessorCount = nullptr; + StringFn m_setHvSocketPath = nullptr; + DiskFn m_addBootDisk = nullptr; + TwoStringFn m_setConsommeNic = nullptr; + SerialPortFn m_addSerialPort = nullptr; + StringFn m_setVirtioConsolePath = nullptr; + HandleOnlyFn m_createVm = nullptr; + HandleOnlyFn m_resumeVm = nullptr; + HandleOnlyFn m_teardownVm = nullptr; + DiskFn m_attachScsiDisk = nullptr; + DetachScsiDiskFn m_detachScsiDisk = nullptr; + AddShareFn m_addShare = nullptr; + StringFn m_removeShare = nullptr; + PortFn m_bindPort = nullptr; + PortFn m_unbindPort = nullptr; +}; + +} // namespace wsl::windows::service::wslc diff --git a/src/windows/service/inc/windowsdefs.idl b/src/windows/service/inc/windowsdefs.idl index 8ca9e6f4b4..9b3fcc80d4 100644 --- a/src/windows/service/inc/windowsdefs.idl +++ b/src/windows/service/inc/windowsdefs.idl @@ -205,54 +205,3 @@ cpp_quote("{") cpp_quote("class DECLSPEC_UUID(\"AFC7B6DE-D642-41B7-AB0C-A01019510741\") Plan9FileSystem;") cpp_quote("}") cpp_quote("#endif") - -// -// IWslVmService - COM interface for managing an OpenVMM VM via ttrpc. -// -// Implemented by wslvmservice.dll (from hvlite). The Rust side handles -// ttrpc transport and protobuf serialization internally, so no protobuf -// dependency is needed in the C++ consumer. -// -[ - uuid(A3B2C1D0-4E5F-6A7B-8C9D-0E1F2A3B4C5D), - pointer_default(unique), - object -] -interface IWslVmService : IUnknown -{ - HRESULT Connect([in] LPCWSTR socketPath, [in] UINT32 timeoutMs); - HRESULT Disconnect(); - - // VM configuration (call before CreateVm). - HRESULT SetKernelPath([in] LPCWSTR path); - HRESULT SetInitrdPath([in] LPCWSTR path); - HRESULT SetKernelCmdLine([in] LPCWSTR cmdline); - HRESULT SetMemoryMb([in] UINT64 memoryMb); - HRESULT SetProcessorCount([in] UINT32 count); - HRESULT SetHvSocketPath([in] LPCWSTR path); - HRESULT AddBootDisk([in] UINT32 controller, [in] UINT32 lun, [in] LPCWSTR hostPath, [in] BOOL readOnly); - HRESULT SetConsommeNic([in] LPCWSTR nicId, [in] LPCWSTR macAddress); - HRESULT AddSerialPort([in] UINT32 port, [in] LPCWSTR socketPath); - HRESULT SetVirtioConsolePath([in] LPCWSTR path); - - // VM lifecycle. - HRESULT CreateVm(); - HRESULT ResumeVm(); - HRESULT WaitVm([out, system_handle(sh_event)] HANDLE* completionEvent); - HRESULT TeardownVm(); - - // Runtime device management. - HRESULT AttachScsiDisk([in] UINT32 controller, [in] UINT32 lun, [in] LPCWSTR hostPath, [in] BOOL readOnly); - HRESULT DetachScsiDisk([in] UINT32 controller, [in] UINT32 lun); - - // Runtime directory sharing (plan9/virtiofs). 'tag' uniquely identifies the share. - HRESULT AddShare([in] LPCWSTR tag, [in] LPCWSTR hostPath, [in] BOOL readOnly); - HRESULT RemoveShare([in] LPCWSTR tag); - - // Runtime host<->guest port forwarding. 'tcp' selects TCP (TRUE) or UDP (FALSE); - // 'family' is the address family (AF_INET / AF_INET6). - HRESULT BindPort([in] UINT16 hostPort, [in] UINT16 guestPort, [in] BOOL tcp, [in] INT32 family); - HRESULT UnbindPort([in] UINT16 hostPort, [in] UINT16 guestPort, [in] BOOL tcp, [in] INT32 family); -}; - -cpp_quote("DEFINE_GUID(CLSID_WslVmService, 0xE7A1B2C3, 0xD4E5, 0xF6A7, 0xB8, 0xC9, 0xD0, 0xE1, 0xF2, 0xA3, 0xB4, 0xC5);") \ No newline at end of file From 360b4353993ec24386e2af0287901bc483db7527 Mon Sep 17 00:00:00 2001 From: Daman Mulye Date: Wed, 10 Jun 2026 15:36:14 -0700 Subject: [PATCH 10/10] Remove unnecessary changes --- src/shared/inc/SocketChannel.h | 244 +----------------- .../service/exe/OpenVmmVirtualMachine.cpp | 9 - src/windows/service/inc/wslc.idl | 5 +- .../wslcsession/WSLCVirtualMachine.cpp | 23 +- 4 files changed, 7 insertions(+), 274 deletions(-) diff --git a/src/shared/inc/SocketChannel.h b/src/shared/inc/SocketChannel.h index 4a66f61d15..b202217367 100644 --- a/src/shared/inc/SocketChannel.h +++ b/src/shared/inc/SocketChannel.h @@ -110,7 +110,6 @@ class SocketChannel #ifdef WIN32 m_exitEvents = std::move(other.m_exitEvents); - m_blockingIO = other.m_blockingIO; #endif m_ignore_sequence = other.m_ignore_sequence; m_sent_non_transaction_messages = other.m_sent_non_transaction_messages; @@ -127,8 +126,7 @@ class SocketChannel #ifdef WIN32 SocketChannel(TSocket&& socket, std::string&& name, std::vector&& exitEvents) : - m_socket(std::move(socket)), m_exitEvents(std::move(exitEvents)), m_name(std::move(name)), - m_blockingIO(IsNonOverlappedSocket(m_socket.get())) + m_socket(std::move(socket)), m_exitEvents(std::move(exitEvents)), m_name(std::move(name)) { } @@ -144,11 +142,6 @@ class SocketChannel return m_exitEvents; } - bool IsBlockingIO() const - { - return m_blockingIO; - } - #endif template @@ -190,17 +183,10 @@ class SocketChannel #ifdef WIN32 - if (m_blockingIO) - { - BlockingSend(span, timeout); - } - else - { - auto io = CreateIO(); - io.AddHandle(std::make_unique(m_socket.get(), span)); + auto io = CreateIO(); + io.AddHandle(std::make_unique(m_socket.get(), span)); - io.Run(TimeoutToMilliseconds(timeout)); - } + io.Run(TimeoutToMilliseconds(timeout)); WSL_LOG( "SentMessage", @@ -645,27 +631,8 @@ class SocketChannel return std::chrono::milliseconds{timeout}; } - // Returns true if the socket does not support overlapped I/O (e.g. AF_UNIX on Windows). - static bool IsNonOverlappedSocket(SOCKET s) - { - WSAPROTOCOL_INFOW protocolInfo{}; - int infoLen = sizeof(protocolInfo); - if (getsockopt(s, SOL_SOCKET, SO_PROTOCOL_INFOW, - reinterpret_cast(&protocolInfo), &infoLen) == 0) - { - return protocolInfo.iAddressFamily == AF_UNIX; - } - - return false; - } - gsl::span ReceiveImpl(TTimeout timeout) { - if (m_blockingIO) - { - return BlockingReceive(timeout); - } - auto io = CreateIO(); gsl::span message; @@ -678,208 +645,6 @@ class SocketChannel return message; } - // Blocking send for sockets that do not support overlapped I/O (e.g. AF_UNIX on Windows). - // Handles WSAEWOULDBLOCK gracefully since the socket may be in non-blocking mode - // if a concurrent BlockingReceive is active (WSAEventSelect sets non-blocking). - void BlockingSend(gsl::span span, TTimeout timeout) - { - size_t offset = 0; - while (offset < span.size()) - { - int sent = ::send( - m_socket.get(), - reinterpret_cast(span.data() + offset), - static_cast(span.size() - offset), - 0); - - if (sent == SOCKET_ERROR) - { - if (WSAGetLastError() == WSAEWOULDBLOCK) - { - // Socket is temporarily in non-blocking mode. Wait for writability. - fd_set writeSet; - FD_ZERO(&writeSet); - FD_SET(m_socket.get(), &writeSet); - timeval tv{1, 0}; - if (select(0, nullptr, &writeSet, nullptr, &tv) > 0) - { - continue; - } - - THROW_HR_MSG(E_FAIL, "BlockingSend timed out waiting for writability on channel: %hs", m_name.c_str()); - } - - THROW_LAST_ERROR_MSG("BlockingSend failed on channel: %hs", m_name.c_str()); - } - - THROW_HR_IF_MSG(E_UNEXPECTED, sent == 0, "Socket closed during BlockingSend on channel: %hs", m_name.c_str()); - - offset += sent; - } - } - - // Blocking receive for sockets that do not support overlapped I/O (e.g. AF_UNIX on Windows). - // Uses WSAEventSelect + WaitForMultipleObjects to integrate exit event cancellation - // with non-blocking recv() on the data socket. - gsl::span BlockingReceive(TTimeout timeout) - { - // Set up a WSA event to detect when data is available or the socket closes. - wil::unique_event socketEvent(wil::EventOptions::ManualReset); - THROW_LAST_ERROR_IF( - WSAEventSelect(m_socket.get(), socketEvent.get(), FD_READ | FD_CLOSE) == SOCKET_ERROR); - - // Restore the socket to blocking mode on exit (WSAEventSelect sets non-blocking). - auto restoreBlocking = wil::scope_exit([&] { - WSAEventSelect(m_socket.get(), nullptr, 0); - u_long nonBlocking = 0; - ioctlsocket(m_socket.get(), FIONBIO, &nonBlocking); - }); - - // Build wait handle array: exit events first, then socket event. - std::vector waitHandles; - waitHandles.reserve(m_exitEvents.size() + 1); - for (const auto event : m_exitEvents) - { - waitHandles.push_back(event); - } - const DWORD socketEventIndex = static_cast(waitHandles.size()); - waitHandles.push_back(socketEvent.get()); - - auto messageSize = sizeof(MESSAGE_HEADER); - if (m_buffer.size() < messageSize) - { - m_buffer.resize(messageSize); - } - - size_t bytesNeeded = sizeof(MESSAGE_HEADER); - size_t currentOffset = 0; - bool readingHeader = true; - - for (;;) - { - // Try to read data that may already be buffered before waiting. - // This is critical because WSAEventSelect only signals on state - // transitions; data that arrived before the call would be missed - // if we waited first. - while (bytesNeeded > 0) - { - int received = ::recv( - m_socket.get(), - reinterpret_cast(m_buffer.data() + currentOffset), - static_cast(bytesNeeded), - 0); - - if (received == SOCKET_ERROR) - { - auto error = WSAGetLastError(); - if (error == WSAEWOULDBLOCK) - { - break; // No more data available, need to wait. - } - - if (error == WSAECONNABORTED || error == WSAECONNRESET) - { - return {}; // Clean close. - } - - THROW_WIN32(error); - } - - if (received == 0) - { - // Socket closed. - THROW_HR_IF_MSG( - E_UNEXPECTED, - currentOffset > 0, - "Socket closed mid-message during BlockingReceive. Offset: %zu, Remaining: %zu, channel: %hs", - currentOffset, - bytesNeeded, - m_name.c_str()); - - return {}; - } - - currentOffset += received; - bytesNeeded -= received; - } - - // When the header is fully read, parse the message size and set up for the body. - if (readingHeader && bytesNeeded == 0) - { - messageSize = gslhelpers::get_struct( - gsl::make_span(m_buffer.data(), sizeof(MESSAGE_HEADER)))->MessageSize; - - THROW_HR_IF_MSG(E_UNEXPECTED, messageSize < sizeof(MESSAGE_HEADER), - "Unexpected message size: %zu on channel: %hs", messageSize, m_name.c_str()); - THROW_HR_IF_MSG(E_UNEXPECTED, messageSize > 4 * 1024 * 1024, - "Message size too large: %zu on channel: %hs", messageSize, m_name.c_str()); - - if (messageSize > sizeof(MESSAGE_HEADER)) - { - if (m_buffer.size() < messageSize) - { - m_buffer.resize(messageSize); - } - - readingHeader = false; - bytesNeeded = messageSize - sizeof(MESSAGE_HEADER); - continue; // Try to read body data immediately. - } - } - - // Message complete. - if (bytesNeeded == 0) - { - break; - } - - // No data available (WSAEWOULDBLOCK). Wait for data or exit event. - // WSAEnumNetworkEvents atomically resets the event — never call - // ResetEvent manually, as that can clear a legitimate signal. - DWORD waitTimeout = (timeout == INFINITE) ? INFINITE : timeout; - auto waitResult = WaitForMultipleObjects( - static_cast(waitHandles.size()), waitHandles.data(), FALSE, waitTimeout); - - if (waitResult == WAIT_TIMEOUT) - { - THROW_HR_MSG( - HCS_E_CONNECTION_TIMEOUT, - "BlockingReceive timeout on channel: %hs", - m_name.c_str()); - } - - // An exit event was signaled. - if (waitResult >= WAIT_OBJECT_0 && waitResult < WAIT_OBJECT_0 + socketEventIndex) - { - THROW_HR_MSG(E_ABORT, "Exit event signaled during BlockingReceive on channel: %hs", m_name.c_str()); - } - - THROW_HR_IF(E_UNEXPECTED, waitResult < WAIT_OBJECT_0 || waitResult > WAIT_OBJECT_0 + socketEventIndex); - - // Reset the event and check what happened. - WSANETWORKEVENTS netEvents{}; - THROW_LAST_ERROR_IF( - WSAEnumNetworkEvents(m_socket.get(), socketEvent.get(), &netEvents) != 0); - - if (netEvents.lNetworkEvents & FD_CLOSE) - { - THROW_HR_IF_MSG( - E_UNEXPECTED, - currentOffset > 0, - "Socket closed mid-message during BlockingReceive. Offset: %zu, Remaining: %zu, channel: %hs", - currentOffset, - bytesNeeded, - m_name.c_str()); - - return {}; - } - - // FD_READ signaled — loop back to recv. - } - - return gsl::make_span(m_buffer.data(), messageSize); - } - #else gsl::span ReceiveImpl(TTimeout timeout) @@ -958,7 +723,6 @@ class SocketChannel #ifdef WIN32 std::vector m_exitEvents; - bool m_blockingIO = false; #endif uint32_t m_sent_non_transaction_messages = 0; diff --git a/src/windows/service/exe/OpenVmmVirtualMachine.cpp b/src/windows/service/exe/OpenVmmVirtualMachine.cpp index 02d9b3f25c..239743b018 100644 --- a/src/windows/service/exe/OpenVmmVirtualMachine.cpp +++ b/src/windows/service/exe/OpenVmmVirtualMachine.cpp @@ -510,9 +510,6 @@ try m_initListenSocket = INVALID_SOCKET; DeleteFileW(m_initListenPath.c_str()); - // Return the AF_UNIX socket directly. Callers that wrap it in a - // SocketChannel should use blocking I/O mode since AF_UNIX on Windows - // does not support overlapped I/O. *Socket = reinterpret_cast(unixSock); return S_OK; } @@ -691,9 +688,6 @@ try TraceLoggingValue(Port, "Port"), TraceLoggingValue(response, "Response")); - // Return the AF_UNIX socket directly. Callers that wrap it in a - // SocketChannel should use blocking I/O mode since AF_UNIX on Windows - // does not support overlapped I/O. closeUnix.release(); *Socket = reinterpret_cast(unixSock); return S_OK; @@ -719,9 +713,6 @@ try SOCKET unixSock = accept(m_crashDumpListenSocket, nullptr, nullptr); THROW_LAST_ERROR_IF(unixSock == INVALID_SOCKET); - // Return the AF_UNIX socket directly. Callers that wrap it in a - // SocketChannel should use blocking I/O mode since AF_UNIX on Windows - // does not support overlapped I/O. *Socket = reinterpret_cast(unixSock); return S_OK; } diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index bc48a1146c..a2d0d13398 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -484,9 +484,8 @@ interface IWSLCVirtualMachine : IUnknown HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event); // Connects to a vsock port in the VM. Returns a socket handle. - // For HCS VMs, this uses hvsocket (supports overlapped I/O). - // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge. - // AF_UNIX sockets do not support overlapped I/O + // For HCS VMs, this uses hvsocket. + // For OpenVMM VMs, this uses the hybrid_vsock Unix domain socket bridge; HRESULT ConnectToVsockPort([in] ULONG Port, [out, system_handle(sh_socket)] HANDLE* Socket); // Accepts a crash dump connection from the VM. Blocks until a crash dump diff --git a/src/windows/wslcsession/WSLCVirtualMachine.cpp b/src/windows/wslcsession/WSLCVirtualMachine.cpp index e4cf1afcc7..eac28e0a06 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.cpp +++ b/src/windows/wslcsession/WSLCVirtualMachine.cpp @@ -1370,28 +1370,7 @@ void WSLCVirtualMachine::CollectCrashDumps() transaction.SendResultMessage(0); - // InterruptableRelay uses overlapped I/O which is not supported on AF_UNIX. - // Use a simple blocking recv-to-write loop for sockets in blocking I/O mode. - if (channel.IsBlockingIO()) - { - constexpr size_t bufferSize = 65536; - std::vector buf(bufferSize); - for (;;) - { - int bytesRead = ::recv(channel.Socket(), buf.data(), static_cast(buf.size()), 0); - if (bytesRead <= 0) - { - break; - } - - DWORD bytesWritten{}; - THROW_IF_WIN32_BOOL_FALSE(WriteFile(file.get(), buf.data(), static_cast(bytesRead), &bytesWritten, nullptr)); - } - } - else - { - relay::InterruptableRelay(reinterpret_cast(channel.Socket()), file.get(), nullptr); - } + relay::InterruptableRelay(reinterpret_cast(channel.Socket()), file.get(), nullptr); } CATCH_LOG() }