diff --git a/msipackage/package.wix.in b/msipackage/package.wix.in index 88dc3ea171..18b79acead 100644 --- a/msipackage/package.wix.in +++ b/msipackage/package.wix.in @@ -306,14 +306,6 @@ - - - - - - - - diff --git a/src/windows/WslcSDK/CMakeLists.txt b/src/windows/WslcSDK/CMakeLists.txt index 8c026430f6..45b1a0dd25 100644 --- a/src/windows/WslcSDK/CMakeLists.txt +++ b/src/windows/WslcSDK/CMakeLists.txt @@ -1,7 +1,6 @@ set(SOURCES IOCallback.cpp ProgressCallback.cpp - TerminationCallback.cpp CrashDumpCallback.cpp wslcsdk.cpp WslcsdkPrivate.cpp @@ -10,7 +9,6 @@ set(HEADERS Defaults.h IOCallback.h ProgressCallback.h - TerminationCallback.h CrashDumpCallback.h wslcsdk.h WslcsdkPrivate.h diff --git a/src/windows/WslcSDK/TerminationCallback.cpp b/src/windows/WslcSDK/TerminationCallback.cpp deleted file mode 100644 index bd98a59529..0000000000 --- a/src/windows/WslcSDK/TerminationCallback.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/*++ - -Copyright (c) Microsoft. All rights reserved. - -Module Name: - - TerminationCallback.cpp - -Abstract: - - Implementation of a type that implements ITerminationCallback. - ---*/ -#include "precomp.h" -#include "TerminationCallback.h" - -namespace { -WslcSessionTerminationReason ConvertReason(WSLCVirtualMachineTerminationReason Reason) -{ - switch (Reason) - { - case WSLCVirtualMachineTerminationReasonShutdown: - return WSLC_SESSION_TERMINATION_REASON_SHUTDOWN; - case WSLCVirtualMachineTerminationReasonCrashed: - return WSLC_SESSION_TERMINATION_REASON_CRASHED; - default: - return WSLC_SESSION_TERMINATION_REASON_UNKNOWN; - } -} -} // namespace - -TerminationCallback::TerminationCallback(WslcSessionTerminationCallback callback, PVOID context) : - m_callback(callback), m_context(context) -{ -} - -// TODO: Details from the runtime are dropped; should the SDK callback function be updated to include the reasons string? -HRESULT STDMETHODCALLTYPE TerminationCallback::OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR) -{ - if (m_callback) - { - m_callback(ConvertReason(Reason), m_context); - } - - return S_OK; -} - -winrt::com_ptr TerminationCallback::CreateIf(const WslcSessionOptionsInternal* options) -{ - if (options->terminationCallback) - { - return winrt::make_self(options->terminationCallback, options->terminationCallbackContext); - } - else - { - return nullptr; - } -} diff --git a/src/windows/WslcSDK/TerminationCallback.h b/src/windows/WslcSDK/TerminationCallback.h deleted file mode 100644 index afd8542bf3..0000000000 --- a/src/windows/WslcSDK/TerminationCallback.h +++ /dev/null @@ -1,32 +0,0 @@ -/*++ - -Copyright (c) Microsoft. All rights reserved. - -Module Name: - - TerminationCallback.h - -Abstract: - - Header for a type that implements ITerminationCallback. - ---*/ -#pragma once -#include "wslc.h" -#include "wslcsdkprivate.h" -#include - -struct TerminationCallback : public winrt::implements -{ - TerminationCallback(WslcSessionTerminationCallback callback, PVOID context); - - // ITerminationCallback - HRESULT STDMETHODCALLTYPE OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details) override; - - // Creates a TerminationCallback if the options provides a callback. - static winrt::com_ptr CreateIf(const WslcSessionOptionsInternal* options); - -private: - WslcSessionTerminationCallback m_callback = nullptr; - PVOID m_context = nullptr; -}; diff --git a/src/windows/WslcSDK/WslcsdkPrivate.h b/src/windows/WslcSDK/WslcsdkPrivate.h index 5000363abd..d487285665 100644 --- a/src/windows/WslcSDK/WslcsdkPrivate.h +++ b/src/windows/WslcSDK/WslcsdkPrivate.h @@ -33,8 +33,6 @@ typedef struct WslcSessionOptionsInternal WslcVhdRequirements vhdRequirements; WslcSessionFeatureFlags featureFlags; - WslcSessionTerminationCallback terminationCallback; - PVOID terminationCallbackContext; } WslcSessionOptionsInternal; static_assert(sizeof(WslcSessionOptionsInternal) == WSLC_SESSION_OPTIONS_SIZE, "WSLC_SESSION_OPTIONS_INTERNAL size mismatch"); @@ -107,7 +105,6 @@ const WslcContainerOptionsInternal* GetInternalType(const WslcContainerSettings* struct WslcSessionImpl { wil::com_ptr session; - wil::com_ptr terminationCallback; }; WslcSessionImpl* GetInternalType(WslcSession handle); diff --git a/src/windows/WslcSDK/winrt/Session.cpp b/src/windows/WslcSDK/winrt/Session.cpp index 5a29437c99..46bb7afdcc 100644 --- a/src/windows/WslcSDK/winrt/Session.cpp +++ b/src/windows/WslcSDK/winrt/Session.cpp @@ -52,12 +52,16 @@ void Session::Start() throw winrt::hresult_illegal_method_call(L"Session has already been started"); } - winrt::check_hresult(WslcSetSessionSettingsTerminationCallback(GetStructPointer(m_settings), TerminatedCallback, /* context */ this)); - wil::unique_cotaskmem_string errorMessage; auto hr = WslcCreateSession(GetStructPointer(m_settings), m_session.put(), errorMessage.put()); THROW_MSG_IF_FAILED(hr, errorMessage); m_settings = nullptr; + + winrt::check_hresult(WslcGetSessionTerminationEvent(m_session.get(), m_terminationEvent.put())); + + m_terminationWait.reset(CreateThreadpoolWait(&Session::OnTerminated, this, nullptr)); + THROW_LAST_ERROR_IF_NULL(m_terminationWait); + SetThreadpoolWait(m_terminationWait.get(), m_terminationEvent.get(), nullptr); } void Session::EnsureStarted() const @@ -300,11 +304,15 @@ WslcSession Session::ToHandle() return m_session.get(); } -void CALLBACK Session::TerminatedCallback(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context) noexcept +void CALLBACK Session::OnTerminated(PTP_CALLBACK_INSTANCE /* instance */, PVOID context, PTP_WAIT /* wait */, TP_WAIT_RESULT /* waitResult */) noexcept { try { auto session = static_cast(context); + + WslcSessionTerminationReason reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN; + LOG_IF_FAILED(WslcGetSessionTerminationReason(session->m_session.get(), &reason)); + session->m_terminatedEvent(static_cast(reason)); } CATCH_LOG(); diff --git a/src/windows/WslcSDK/winrt/Session.h b/src/windows/WslcSDK/winrt/Session.h index 954591262c..f6e8f7bd14 100644 --- a/src/windows/WslcSDK/winrt/Session.h +++ b/src/windows/WslcSDK/winrt/Session.h @@ -45,12 +45,15 @@ struct Session : SessionT void EnsureStarted() const; winrt::Microsoft::WSL::Containers::SessionSettings m_settings; // Only kept until Start() is called - static void CALLBACK TerminatedCallback(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context) noexcept; + // Threadpool callback that raises the Terminated event once the session's termination handle is signaled. + static void CALLBACK OnTerminated(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WAIT wait, TP_WAIT_RESULT waitResult) noexcept; - // Releasing the session handle may trigger the termination callback. - // Keep these two in this order so that the session handle is released before the termination event is destructed. winrt::event m_terminatedEvent; wil::unique_any m_session{nullptr}; + + // Bridges the one-off termination event surfaced by the SDK to the WinRT Terminated event. + wil::unique_handle m_terminationEvent; + wil::unique_threadpool_wait m_terminationWait; }; } // namespace winrt::Microsoft::WSL::Containers::implementation namespace winrt::Microsoft::WSL::Containers::factory_implementation { diff --git a/src/windows/WslcSDK/wslcsdk.cpp b/src/windows/WslcSDK/wslcsdk.cpp index 25dcdf1426..f83ff24fc0 100644 --- a/src/windows/WslcSDK/wslcsdk.cpp +++ b/src/windows/WslcSDK/wslcsdk.cpp @@ -17,7 +17,6 @@ Module Name: #include "WslcsdkPrivate.h" #include "Defaults.h" #include "ProgressCallback.h" -#include "TerminationCallback.h" #include "CrashDumpCallback.h" #include "Localization.h" #include "WslInstall.h" @@ -435,12 +434,6 @@ try runtimeSettings.MemoryMb = internalType->memoryMb; runtimeSettings.BootTimeoutMs = internalType->timeoutMS; runtimeSettings.NetworkingMode = WSLCNetworkingModeVirtioProxy; - auto terminationCallback = TerminationCallback::CreateIf(internalType); - if (terminationCallback) - { - result->terminationCallback.attach(terminationCallback.as().detach()); - runtimeSettings.TerminationCallback = terminationCallback.get(); - } runtimeSettings.FeatureFlags = ConvertFlags(internalType->featureFlags); WI_SetFlag(runtimeSettings.FeatureFlags, WslcFeatureFlagsVirtioFs); WI_SetFlag(runtimeSettings.FeatureFlags, WslcFeatureFlagsDnsTunneling); @@ -587,15 +580,39 @@ try } CATCH_RETURN(); -STDAPI WslcSetSessionSettingsTerminationCallback( - _In_ WslcSessionSettings* sessionSettings, _In_opt_ WslcSessionTerminationCallback terminationCallback, _In_opt_ PVOID terminationContext) +STDAPI WslcGetSessionTerminationEvent(_In_ WslcSession session, _Out_ HANDLE* terminationEvent) try { - auto internalType = CheckAndGetInternalType(sessionSettings); - RETURN_HR_IF(E_INVALIDARG, terminationCallback == nullptr && terminationContext != nullptr); + RETURN_HR_IF_NULL(E_POINTER, terminationEvent); + *terminationEvent = nullptr; + + auto internalType = CheckAndGetInternalType(session); + RETURN_HR_IF_NULL(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), internalType->session); + + RETURN_HR(internalType->session->GetTerminationEvent(terminationEvent)); +} +CATCH_RETURN(); + +STDAPI WslcGetSessionTerminationReason(_In_ WslcSession session, _Out_ WslcSessionTerminationReason* reason) +try +{ + static_assert( + WSLC_SESSION_TERMINATION_REASON_UNKNOWN == WSLCVirtualMachineTerminationReasonUnknown && + WSLC_SESSION_TERMINATION_REASON_SHUTDOWN == WSLCVirtualMachineTerminationReasonShutdown && + WSLC_SESSION_TERMINATION_REASON_CRASHED == WSLCVirtualMachineTerminationReasonCrashed, + "Termination reason enum values mismatch."); + + RETURN_HR_IF_NULL(E_POINTER, reason); + *reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN; + + auto internalType = CheckAndGetInternalType(session); + RETURN_HR_IF_NULL(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), internalType->session); + + WSLCVirtualMachineTerminationReason runtimeReason = WSLCVirtualMachineTerminationReasonUnknown; + wil::unique_cotaskmem_string details; + RETURN_IF_FAILED(internalType->session->GetTerminationReason(&runtimeReason, &details)); - internalType->terminationCallback = terminationCallback; - internalType->terminationCallbackContext = terminationContext; + *reason = static_cast(runtimeReason); return S_OK; } @@ -649,10 +666,7 @@ try { auto internalType = CheckAndGetInternalTypeUniquePointer(session); - // Drop the session before the termination callback, in case session destruction triggers - // the termination callback. internalType->session.reset(); - internalType->terminationCallback.reset(); return S_OK; } diff --git a/src/windows/WslcSDK/wslcsdk.def b/src/windows/WslcSDK/wslcsdk.def index a96b4cf008..cc68fd6b1d 100644 --- a/src/windows/WslcSDK/wslcsdk.def +++ b/src/windows/WslcSDK/wslcsdk.def @@ -16,7 +16,8 @@ WslcReleaseContainer WslcReleaseProcess WslcSetSessionSettingsFeatureFlags -WslcSetSessionSettingsTerminationCallback +WslcGetSessionTerminationEvent +WslcGetSessionTerminationReason WslcSetSessionSettingsCpuCount WslcSetSessionSettingsMemory WslcSetSessionSettingsTimeout diff --git a/src/windows/WslcSDK/wslcsdk.h b/src/windows/WslcSDK/wslcsdk.h index 8db0757c3d..954e06be2a 100644 --- a/src/windows/WslcSDK/wslcsdk.h +++ b/src/windows/WslcSDK/wslcsdk.h @@ -42,7 +42,7 @@ EXTERN_C_START #define WSLC_E_REGISTRY_BLOCKED_BY_POLICY MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, WSLC_E_BASE + 13) /* 0x8004060D */ // Session values -#define WSLC_SESSION_OPTIONS_SIZE 88 +#define WSLC_SESSION_OPTIONS_SIZE 72 #define WSLC_SESSION_OPTIONS_ALIGNMENT 8 typedef struct WslcSessionSettings @@ -123,8 +123,6 @@ typedef enum WslcSessionTerminationReason WSLC_SESSION_TERMINATION_REASON_CRASHED = 2, } WslcSessionTerminationReason; -typedef __callback void(CALLBACK* WslcSessionTerminationCallback)(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context); - typedef struct WslcSessionCrashDumpInfo { _Field_z_ PCWSTR dumpPath; @@ -153,9 +151,8 @@ STDAPI WslcSetSessionSettingsVhd(_In_ WslcSessionSettings* sessionSettings, _In_ STDAPI WslcSetSessionSettingsFeatureFlags(_In_ WslcSessionSettings* sessionSettings, _In_ WslcSessionFeatureFlags flags); -// Pass in Null for callback to clear the termination callback -STDAPI WslcSetSessionSettingsTerminationCallback( - _In_ WslcSessionSettings* sessionSettings, _In_opt_ WslcSessionTerminationCallback terminationCallback, _In_opt_ PVOID terminationContext); +STDAPI WslcGetSessionTerminationEvent(_In_ WslcSession session, _Out_ HANDLE* terminationEvent); +STDAPI WslcGetSessionTerminationReason(_In_ WslcSession session, _Out_ WslcSessionTerminationReason* reason); STDAPI WslcTerminateSession(_In_ WslcSession session); STDAPI WslcReleaseSession(_In_ WslcSession session); diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp index 6778079396..84679878bb 100644 --- a/src/windows/service/exe/HcsVirtualMachine.cpp +++ b/src/windows/service/exe/HcsVirtualMachine.cpp @@ -285,12 +285,6 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings) m_guestDeviceManager = std::make_shared<::GuestDeviceManager>(m_vmIdString, m_vmId); } - // Configure termination callback - if (Settings->TerminationCallback) - { - m_terminationCallback = Settings->TerminationCallback; - } - hcs::RegisterCallback(m_computeSystem.get(), &HcsVirtualMachine::OnVmExitCallback, this); // Create a listening socket for mini_init to connect to once the VM is running. @@ -319,7 +313,11 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings) HcsVirtualMachine::~HcsVirtualMachine() { - std::lock_guard lock(m_lock); + // N.B. Do not hold m_lock here. OnExit() acquires m_lock to cache the termination reason + // before signaling m_vmExitEvent, and closing the compute system below drains any in-flight + // HCS exit/crash callbacks. Holding m_lock across the exit-event wait and HcsCloseComputeSystem + // would deadlock against an in-flight OnExit() that is blocked acquiring m_lock. By the time the + // compute system is closed no further callbacks can run, so the remaining teardown is unguarded. // Wait up to 5 seconds for the VM to terminate gracefully. bool forceTerminate = false; @@ -692,8 +690,6 @@ CATCH_LOG() void HcsVirtualMachine::OnExit(const HCS_EVENT* Event) { - m_vmExitEvent.SetEvent(); - const auto exitStatus = wsl::shared::FromJson(Event->EventData); auto reason = WSLCVirtualMachineTerminationReasonUnknown; @@ -715,12 +711,29 @@ void HcsVirtualMachine::OnExit(const HCS_EVENT* Event) } } - if (m_terminationCallback) + // Cache the termination reason and details before signaling the exit event. { - LOG_IF_FAILED(m_terminationCallback->OnTermination(reason, Event->EventData)); + std::lock_guard lock(m_lock); + m_terminationReason = reason; + m_terminationDetails = Event->EventData; } + + m_vmExitEvent.SetEvent(); } +HRESULT HcsVirtualMachine::GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) +try +{ + RETURN_HR_IF(E_POINTER, Reason == nullptr || Details == nullptr); + + std::lock_guard lock(m_lock); + *Reason = m_terminationReason; + *Details = wil::make_cotaskmem_string(m_terminationDetails.c_str()).release(); + + return S_OK; +} +CATCH_RETURN() + void HcsVirtualMachine::OnCrash(const HCS_EVENT* Event) { if (m_crashLogCaptured.load() && m_vmSavedStateCaptured.load()) @@ -863,7 +876,6 @@ WSLCVirtualMachineFactory::WSLCVirtualMachineFactory(_In_ const WSLCSessionSetti m_dmesgOutput.reset(wslutil::DuplicateHandle(wslutil::FromCOMInputHandle(Settings->DmesgOutput), GENERIC_WRITE | SYNCHRONIZE)); } - m_terminationCallback = Settings->TerminationCallback; m_maximumStorageSizeMb = Settings->MaximumStorageSizeMb; m_cpuCount = Settings->CpuCount; m_memoryMb = Settings->MemoryMb; @@ -883,7 +895,6 @@ WSLCSessionSettings WSLCVirtualMachineFactory::BuildSettings() settings.MemoryMb = m_memoryMb; settings.BootTimeoutMs = m_bootTimeoutMs; settings.NetworkingMode = m_networkingMode; - settings.TerminationCallback = m_terminationCallback.get(); settings.FeatureFlags = m_featureFlags; settings.StorageFlags = m_storageFlags; settings.RootVhdOverride = m_rootVhdOverride ? m_rootVhdOverride->c_str() : nullptr; diff --git a/src/windows/service/exe/HcsVirtualMachine.h b/src/windows/service/exe/HcsVirtualMachine.h index 3ad6a9eea5..2a862d1435 100644 --- a/src/windows/service/exe/HcsVirtualMachine.h +++ b/src/windows/service/exe/HcsVirtualMachine.h @@ -48,6 +48,7 @@ class HcsVirtualMachine IFACEMETHOD(RemoveShare)(_In_ REFGUID ShareId) override; IFACEMETHOD(ApplyGuestCapabilities)(_In_ const WSLCGuestCapabilities* Capabilities) override; IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; + IFACEMETHOD(GetTerminationReason)(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) override; private: struct DiskInfo @@ -104,7 +105,9 @@ class HcsVirtualMachine std::atomic m_vmSavedStateCaptured = false; std::atomic m_crashLogCaptured = false; - wil::com_ptr m_terminationCallback; + // Termination reason and details, cached when the VM exits (see OnExit). Guarded by m_lock. + WSLCVirtualMachineTerminationReason m_terminationReason{WSLCVirtualMachineTerminationReasonUnknown}; + std::wstring m_terminationDetails; }; // @@ -136,8 +139,6 @@ class WSLCVirtualMachineFactory // subsequent VMs reuse this duplicate, whose writes simply fail if the sink is gone. wil::unique_handle m_dmesgOutput; - wil::com_ptr m_terminationCallback; - ULONGLONG m_maximumStorageSizeMb{}; ULONG m_cpuCount{}; ULONG m_memoryMb{}; diff --git a/src/windows/service/exe/WSLCSessionManager.cpp b/src/windows/service/exe/WSLCSessionManager.cpp index 339f3ce6ca..e08883608d 100644 --- a/src/windows/service/exe/WSLCSessionManager.cpp +++ b/src/windows/service/exe/WSLCSessionManager.cpp @@ -270,8 +270,7 @@ void WSLCSessionManagerImpl::CreateSession( g_pluginManager, sessionId, creatorPid, std::wstring(resolvedDisplayName), wil::shared_handle(sharedToken), std::vector(storedSid)); // Create the VM factory in the SYSTEM service (privileged). The per-user session - // uses it to create the VM. Funneling VM creation through a factory lets the session - // own when VMs are created, rather than having one handed to it up front. + // uses it to create VMs on demand and recreate them after idle-termination. auto vmFactory = Microsoft::WRL::Make(Settings); // Launch per-user COM server factory and add it to a fresh per-session job object for crash cleanup. diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index 92e135d4f4..9bdf612e0c 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -99,16 +99,6 @@ typedef enum _WSLCSignal WSLCSignalSIGSYS = 31 } WSLCSignal; -[ - uuid(7BC4E198-6531-4FA6-ADE2-5EF3D2A04DFE), - pointer_default(unique), - object -] -interface ITerminationCallback : IUnknown -{ - HRESULT OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details); -}; - [ uuid(8C5A7B14-9D26-4FAE-AB31-7E5BC23F4801), pointer_default(unique), @@ -536,6 +526,11 @@ interface IWSLCVirtualMachine : IUnknown // Returns an event that is signaled when the VM exits (graceful or forced). HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event); + + // Returns the cached termination reason and details. The values are only meaningful + // after the termination event has been signaled; before that the reason is + // WSLCVirtualMachineTerminationReasonUnknown and Details is an empty string. + HRESULT GetTerminationReason([out] WSLCVirtualMachineTerminationReason* Reason, [out] LPWSTR* Details); } // @@ -576,7 +571,6 @@ typedef struct _WSLCSessionSettings { ULONG MemoryMb; ULONG BootTimeoutMs; WSLCNetworkingMode NetworkingMode; - [unique] ITerminationCallback* TerminationCallback; WSLCFeatureFlags FeatureFlags; WSLCHandle DmesgOutput; WSLCSessionStorageFlags StorageFlags; @@ -762,6 +756,18 @@ typedef enum _WSLCSessionState WSLCSessionStateTerminated = 1 } WSLCSessionState; +// Diagnostics describing the session's on-demand VM lifecycle. Used by tests and +// troubleshooting to observe lazy VM bring-up and idle termination without affecting +// the VM (querying these does not bring the VM up or count as activity). +typedef struct _WSLCVmDiagnostics +{ + // TRUE when the backing VM is currently running. + boolean Running; + + // Number of times the VM has been (re)created over the session's lifetime. + unsigned long StartCount; +} WSLCVmDiagnostics; + // Settings for IWSLCSession::Initialize - passed from service to per-user process typedef struct _WSLCSessionInitSettings { @@ -788,6 +794,19 @@ interface IWSLCSession : IUnknown HRESULT GetId([out] ULONG* Id); HRESULT GetState([out] WSLCSessionState* State); + // Returns a one-off event that is signaled when the session terminates, whether due to an + // explicit Terminate() call or an unexpected VM exit. The returned handle is owned by the + // caller and remains valid (and observes the signaled state) even after the session is released. + HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event); + + // Returns the cached termination reason and details. Only valid once the session has terminated, + // i.e. after the event returned by GetTerminationEvent is signaled; before that it returns + // HRESULT_FROM_WIN32(ERROR_INVALID_STATE). On success the caller owns Details and must free it. + HRESULT GetTerminationReason([out] WSLCVirtualMachineTerminationReason* Reason, [out] LPWSTR* Details); + + // Reports on-demand VM lifecycle diagnostics. Does not bring the VM up or count as activity. + HRESULT GetVmDiagnostics([out] WSLCVmDiagnostics* Diagnostics); + // Image management. HRESULT PullImage([in] LPCSTR Image, [in, unique] LPCSTR RegistryAuthenticationInformation, [in, unique] IProgressCallback* ProgressCallback, [in, unique] IWarningCallback* WarningCallback); HRESULT BuildImage([in] const WSLCBuildImageOptions* Options, [in, unique] IProgressCallback* ProgressCallback, [in, unique, system_handle(sh_event)] HANDLE CancelEvent); @@ -804,6 +823,14 @@ interface IWSLCSession : IUnknown // Container management. HRESULT CreateContainer([in] const WSLCContainerOptions* Options, [in, unique] IWarningCallback* WarningCallback, [out] IWSLCContainer** Container); HRESULT OpenContainer([in, ref] LPCSTR Id, [out] IWSLCContainer** Container); + + // Keeps the VM alive for the duration of a client-side container operation. The CLI performs + // each mutation as two round-trips (OpenContainer followed by the operation) and may stream + // output afterwards. With on-demand VM idle-termination the VM could otherwise tear down + // between those calls, disconnecting the container wrapper and failing the second call with + // RPC_E_DISCONNECTED. The client holds the returned token for the whole operation; releasing + // it (or the client exiting) lets the VM idle-terminate again. + HRESULT BeginContainerOperation([out] IUnknown** Operation); HRESULT ListContainers([in, unique] const WSLCListContainersOptions* Options,[out, size_is(, *Count)] WSLCContainerEntry** Containers,[out] ULONG* Count, [out, size_is(, *PortsCount)] WSLCContainerPortMapping** Ports, [out] ULONG* PortsCount); HRESULT PruneContainers([in, unique, size_is(FiltersCount)] const WSLCFilter* Filters, [in] ULONG FiltersCount, [out] WSLCPruneContainersResults* Result); @@ -828,7 +855,9 @@ interface IWSLCSession : IUnknown // Returns a handle to this COM server process (used to add to job object). HRESULT GetProcessHandle([out, system_handle(sh_process)] HANDLE* ProcessHandle); - // Initializes the session with a VM factory. VMs are created through the factory. + // Initializes the session. The session creates VMs on demand via the supplied + // factory: a VM is brought up when work requires it and idle-terminated when there + // are no running containers and no in-flight operations. HRESULT Initialize( [in] const WSLCSessionInitSettings* Settings, [in] IWSLCVirtualMachineFactory* VmFactory, diff --git a/src/windows/wslc/services/ContainerService.cpp b/src/windows/wslc/services/ContainerService.cpp index 1015762ecc..4a712190aa 100644 --- a/src/windows/wslc/services/ContainerService.cpp +++ b/src/windows/wslc/services/ContainerService.cpp @@ -285,6 +285,7 @@ std::wstring ContainerService::FormatRelativeTime(ULONGLONG timestamp) int ContainerService::Attach(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -446,6 +447,7 @@ CreateContainerResult ContainerService::Create(Session& session, const std::stri int ContainerService::Start(Session& session, const std::string& id, bool attach) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); WSLCContainerStartFlags flags = attach ? WSLCContainerStartFlagsAttach : WSLCContainerStartFlagsNone; @@ -476,6 +478,7 @@ int ContainerService::Start(Session& session, const std::string& id, bool attach void ContainerService::Stop(Session& session, const std::string& id, StopContainerOptions options) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED_EXCEPT(container->Stop(options.Signal, options.Timeout), WSLC_E_CONTAINER_NOT_RUNNING); @@ -483,6 +486,7 @@ void ContainerService::Stop(Session& session, const std::string& id, StopContain void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal signal) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED(container->Kill(signal)); @@ -490,6 +494,7 @@ void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal void ContainerService::Delete(Session& session, const std::string& id, bool force) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED(container->Delete(force ? WSLCDeleteFlagsForce : WSLCDeleteFlagsNone)); @@ -544,6 +549,7 @@ std::vector ContainerService::List( int ContainerService::Exec(Session& session, const std::string& id, ContainerOptions options) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -575,6 +581,7 @@ int ContainerService::Exec(Session& session, const std::string& id, ContainerOpt InspectContainer ContainerService::Inspect(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); wil::unique_cotaskmem_ansistring output; @@ -584,6 +591,7 @@ InspectContainer ContainerService::Inspect(Session& session, const std::string& void ContainerService::Logs(Session& session, const std::string& id, bool follow, bool timestamps, ULONGLONG since, ULONGLONG until, ULONGLONG tail) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -611,6 +619,7 @@ void ContainerService::Logs(Session& session, const std::string& id, bool follow wsl::windows::common::docker_schema::ContainerStats ContainerService::Stats(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); wil::unique_cotaskmem_ansistring output; diff --git a/src/windows/wslc/services/SessionModel.h b/src/windows/wslc/services/SessionModel.h index 7424a8c043..48ce0fe800 100644 --- a/src/windows/wslc/services/SessionModel.h +++ b/src/windows/wslc/services/SessionModel.h @@ -19,7 +19,8 @@ namespace wsl::windows::wslc::models { struct Session { - explicit Session(wil::com_ptr session) : m_session(std::move(session)) + explicit Session(wil::com_ptr session, wil::com_ptr warningCallback = {}) : + m_session(std::move(session)), m_warningCallback(std::move(warningCallback)) { } IWSLCSession* Get() const noexcept @@ -27,8 +28,23 @@ struct Session return m_session.get(); } + // Acquires an activity token that keeps the VM alive for the duration of a client-side + // container operation (resolve + operate, plus any streamed output). Hold the returned + // pointer for the whole operation; releasing it lets the VM idle-terminate again. + [[nodiscard]] wil::com_ptr BeginContainerOperation() const + { + wil::com_ptr operation; + THROW_IF_FAILED(m_session->BeginContainerOperation(&operation)); + return operation; + } + private: wil::com_ptr m_session; + + // Kept alive for the lifetime of the session model (i.e. the whole CLI command) so the service + // can deliver warnings emitted by lazy/background work — such as resource recovery on the first + // VM start — back to this CLI invocation, even though no single COM call carries the callback. + wil::com_ptr m_warningCallback; }; } // namespace wsl::windows::wslc::models \ No newline at end of file diff --git a/src/windows/wslc/services/SessionService.cpp b/src/windows/wslc/services/SessionService.cpp index d382cfea62..eb160bfaab 100644 --- a/src/windows/wslc/services/SessionService.cpp +++ b/src/windows/wslc/services/SessionService.cpp @@ -118,7 +118,10 @@ Session SessionService::CreateDefaultSession() auto warningCallback = Microsoft::WRL::Make(); THROW_IF_FAILED(sessionManager->CreateSession(nullptr, WSLCSessionFlagsNone, warningCallback.Get(), &session)); wsl::windows::common::security::ConfigureForCOMImpersonation(session.get()); - return Session(std::move(session)); + + // Hold the warning callback for the lifetime of the session so warnings emitted by the lazy VM + // start (e.g. resource recovery) are still delivered to this CLI invocation. + return Session(std::move(session), wil::com_ptr(warningCallback.Get())); } int SessionService::Enter(const std::wstring& storagePath, const std::wstring& displayName) diff --git a/src/windows/wslcsession/IORelay.cpp b/src/windows/wslcsession/IORelay.cpp index 6677bca87a..09f5e05cd4 100644 --- a/src/windows/wslcsession/IORelay.cpp +++ b/src/windows/wslcsession/IORelay.cpp @@ -68,11 +68,20 @@ void IORelay::Stop() } } +bool IORelay::IsRelayThread() const noexcept +{ + return m_thread.get_id() == std::this_thread::get_id(); +} + void IORelay::Run() try { common::wslutil::SetThreadDescription(L"IORelay"); + // Handle callbacks dispatched from this thread (e.g. unexpected VM exit) can tear the VM down, + // releasing cross-process COM proxies, so join the process MTA to avoid RPC_E_WRONG_THREAD. + const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED); + windows::common::io::MultiHandleWait io; // N.B. All the IO must happen on the thread. diff --git a/src/windows/wslcsession/IORelay.h b/src/windows/wslcsession/IORelay.h index 879d3fee13..844c16dee6 100644 --- a/src/windows/wslcsession/IORelay.h +++ b/src/windows/wslcsession/IORelay.h @@ -30,6 +30,12 @@ class IORelay void Stop(); + // Returns true if the calling thread is the IORelay's own worker thread (i.e. the call + // is being made from a handle callback). Destroying the IORelay from this thread would + // join the thread with itself and call std::terminate(), so callers that may run on the + // relay thread must check this before destroying the object. + bool IsRelayThread() const noexcept; + private: void Start(); void Run(); diff --git a/src/windows/wslcsession/WSLCContainer.cpp b/src/windows/wslcsession/WSLCContainer.cpp index 7da28ca63d..f3a2c61287 100644 --- a/src/windows/wslcsession/WSLCContainer.cpp +++ b/src/windows/wslcsession/WSLCContainer.cpp @@ -675,6 +675,21 @@ void WSLCContainerImpl::CopyTo(IWSLCContainer** Container) const THROW_IF_FAILED(m_comWrapper.CopyTo(Container)); } +bool WSLCContainerImpl::IsExternallyReferenced() const noexcept +{ + auto lock = m_lock.lock_shared(); + + // The impl owns exactly one reference to the COM wrapper (m_comWrapper); any additional + // references belong to clients holding marshaled proxies. A null wrapper means the container has + // already been disconnected, so there is nothing left to keep the VM alive for. + if (m_comWrapper == nullptr) + { + return false; + } + + return m_comWrapper->HasExternalReference(); +} + void WSLCContainerImpl::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* Stdout, WSLCHandle* Stderr) const { auto lock = m_lock.lock_shared(); @@ -1248,6 +1263,12 @@ void WSLCContainerImpl::Exec(const WSLCProcessOptions* Options, const WSLCProces } while (!control->GetExitEvent().wait(100)); auto process = wil::MakeOrThrow(std::move(control), std::move(io), Options->Flags); + + // The exec'd process wrapper is handed to the client and is not retained internally, so its + // lifetime tracks the client's proxy. Bind a keep-alive token to it so the idle worker does + // not tear the VM down (killing the process) while the client still holds the proxy. + process->SetKeepAliveToken(m_wslcSession.CreateActivityToken()); + THROW_IF_FAILED(process.CopyTo(__uuidof(IWSLCProcess), (void**)Process)); } CATCH_AND_THROW_DOCKER_USER_ERROR("Failed to exec process in container %hs", m_id.c_str()); @@ -2129,11 +2150,54 @@ __requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerSta m_state = State; m_stateChangedAt = stateChangedAt.value_or(static_cast(std::time(nullptr))); + + // A container transitioning to a terminal state (e.g. Exited) may leave the session idle. + // Ask the session to re-evaluate whether the VM can be torn down. This is a non-blocking signal. + m_wslcSession.RequestIdleCheck(); } WSLCContainer::WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std::function&& OnDeleted) : COMImplClass(impl), m_session(session), m_onDeleted(std::move(OnDeleted)) { + // Bind the idle-check signaler to the session's shared idle state rather than to m_session, so + // Release() can wake the idle worker without dereferencing the (possibly torn-down) session. The + // captured shared_ptr keeps the idle state alive independently of the session's lifetime. + std::shared_ptr idleState = session.m_idleState; + m_requestIdleCheck = [idleState = std::move(idleState)]() { idleState->IdleCheckEvent.SetEvent(); }; +} + +ULONG STDMETHODCALLTYPE WSLCContainer::Release() +{ + // Snapshot the signaler on the stack BEFORE dropping our reference. Once Release() returns, this + // object may already be gone: a concurrent owner of the last remaining reference (e.g. container + // deletion releasing WSLCContainerImpl::m_comWrapper) can destroy it, and the session itself may + // be torn down while a client still holds this proxy. The captured shared state keeps the wake + // valid in both cases, so we never touch a member after Release(). + const std::function requestIdleCheck = m_requestIdleCheck; + + const ULONG count = RuntimeClassBase::Release(); + + // A count of 1 means only WSLCContainerImpl::m_comWrapper (the single internal reference) is + // left, i.e. a client just released its last proxy. Wake the idle worker so the now-idle VM can + // be reclaimed. N.B. at count 0 the object has already been destroyed; we deliberately signal + // only through the stack-local snapshot, never a member, on any post-Release path. + if (count == 1 && requestIdleCheck) + { + requestIdleCheck(); + } + + return count; +} + +bool WSLCContainer::HasExternalReference() noexcept +{ + // Read the current reference count without retaining a lasting reference. Call the base + // Release() directly (not the override above) so this query never triggers a spurious idle + // check, which would otherwise re-arm the idle worker in a busy loop. Safe because the caller + // (WSLCContainerImpl, via its m_comWrapper reference) guarantees the count cannot reach zero + // and destroy the object here. + AddRef(); + return RuntimeClassBase::Release() > 1; } HRESULT WSLCContainer::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* Stdout, WSLCHandle* Stderr) @@ -2274,6 +2338,11 @@ try THROW_HR_IF_MSG(E_INVALIDARG, WI_IsAnyFlagSet(Flags, ~WSLCDeleteFlagsValid), "Invalid flags: 0x%x", Flags); // Special case for Delete(): If deletion is successful, notify the WSLCSession that the container has been deleted. + // Hold a VM lease across the whole operation: deleting a container makes it inactive and + // can trigger an idle teardown. Without the lease the idle worker could take the session + // lock exclusively and clear m_containers (destroying this container) concurrently, racing + // the delete and inverting the container->session lock order. + auto vmLease = m_session.AcquireVmLease(); auto [lock, impl] = LockImpl(); impl->Delete(Flags); diff --git a/src/windows/wslcsession/WSLCContainer.h b/src/windows/wslcsession/WSLCContainer.h index c3ad79288c..00c5e82c9c 100644 --- a/src/windows/wslcsession/WSLCContainer.h +++ b/src/windows/wslcsession/WSLCContainer.h @@ -110,6 +110,12 @@ class WSLCContainerImpl void CopyTo(IWSLCContainer** Container) const; + // Returns true if a client still holds a reference to this container's COM wrapper (i.e. the + // wrapper's reference count exceeds the single reference owned internally by the impl). Used by + // the idle worker so the VM is not torn down out from under an outstanding container proxy, + // which would otherwise leave the client with RPC_E_DISCONNECTED. + bool IsExternallyReferenced() const noexcept; + const std::string& Image() const noexcept; const std::string& Name() const noexcept; WSLCContainerState State() const noexcept; @@ -222,6 +228,9 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer { public: + using RuntimeClassBase = + Microsoft::WRL::RuntimeClass, IWSLCContainer, IFastRundown, ISupportErrorInfo>; + WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std::function&& OnDeleted); IFACEMETHOD(Attach)(_In_opt_ LPCSTR DetachKeys, _Out_ WSLCHandle* Stdin, _Out_ WSLCHandle* Stdout, _Out_ WSLCHandle* Stderr) override; @@ -244,6 +253,21 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer IFACEMETHOD(InterfaceSupportsErrorInfo)(REFIID riid); + // RuntimeClass reference-count override. When a client releases its last proxy (leaving only the + // single internal reference owned by WSLCContainerImpl::m_comWrapper), wake the idle worker so + // the VM can be reclaimed. This pairs with WSLCContainerImpl::IsExternallyReferenced(), which + // keeps the VM alive while a client still holds a container proxy; without this signal the idle + // worker would never re-evaluate after the proxy was released and the VM would stay up forever. + // The wake is delivered through the captured m_idleState (not m_session) so it stays safe even + // if the wrapper outlives the session or is concurrently destroyed once our reference drops. + ULONG STDMETHODCALLTYPE Release() override; + + // Returns true if a client still holds a reference to this wrapper, i.e. the reference count + // exceeds the single internal reference owned by WSLCContainerImpl::m_comWrapper. Reads the + // count via an AddRef + base Release round-trip that deliberately bypasses the Release() override + // above, so querying does not itself wake the idle worker. + bool HasExternalReference() noexcept; + // Cache read-only properties so they remain accessible after the impl is disconnected. // Called from WSLCContainerImpl::PrepareDisconnectComWrapper() while m_lock is held exclusively. void CacheState(const std::string& id, const std::string& name, WSLCContainerState state, const Microsoft::WRL::ComPtr& initProcess) noexcept; @@ -252,6 +276,14 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer WSLCSession& m_session; std::function m_onDeleted; + // Wakes the idle worker when the last client proxy is released (see Release()). Bound at + // construction to a lambda that captures the session's shared idle state (WSLCSession::IdleState + // held via shared_ptr), so signalling never dereferences m_session: the wrapper can outlive the + // session (a client keeps this proxy past releasing the session) and can be concurrently + // destroyed the instant Release() drops our reference. The captured shared_ptr keeps the idle + // state alive and valid in both cases. + std::function m_requestIdleCheck; + // Cached read-only properties populated by CacheState() so they remain // accessible after the impl is disconnected. mutable wil::srwlock m_cacheLock; diff --git a/src/windows/wslcsession/WSLCExecutionContext.h b/src/windows/wslcsession/WSLCExecutionContext.h index 5d75bfed14..b3abec5a0e 100644 --- a/src/windows/wslcsession/WSLCExecutionContext.h +++ b/src/windows/wslcsession/WSLCExecutionContext.h @@ -27,7 +27,19 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon protected: bool CollectUserWarning(const std::wstring& warning) override { - if (m_warningCallback != nullptr) + IWarningCallback* callback = m_warningCallback; + + // When the operation carries no explicit callback, fall back to the callback supplied when + // the session was created/entered. This routes warnings emitted outside a callback-bearing + // operation (e.g. resource recovery during the lazy VM start) back to the session creator. + wil::com_ptr sessionCallback; + if (callback == nullptr && m_session != nullptr) + { + sessionCallback = m_session->AcquireWarningCallback(); + callback = sessionCallback.get(); + } + + if (callback != nullptr) { std::unique_ptr comCallback; if (m_session != nullptr) @@ -35,7 +47,7 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon comCallback = std::make_unique(m_session->RegisterUserCOMCallback()); } - auto hr = m_warningCallback->OnWarning(warning.c_str()); + auto hr = callback->OnWarning(warning.c_str()); if (SUCCEEDED(hr) || hr == RPC_E_CALL_CANCELED || hr == HRESULT_FROM_WIN32(ERROR_CANCELLED)) { return true; diff --git a/src/windows/wslcsession/WSLCProcess.h b/src/windows/wslcsession/WSLCProcess.h index cce5543d91..bc4bc0c04d 100644 --- a/src/windows/wslcsession/WSLCProcess.h +++ b/src/windows/wslcsession/WSLCProcess.h @@ -41,9 +41,19 @@ class DECLSPEC_UUID("AFBEA6D6-D8A4-4F81-8FED-F947EB74B33B") WSLCProcess HANDLE GetExitEvent(); int GetPid() const; + // Attaches an opaque keep-alive token whose lifetime is bound to this process object. A + // root-namespace process is not tracked as a container, so it relies on this token to hold an + // activity reference on the owning session for as long as the client keeps the process alive, + // preventing the idle worker from tearing the VM down (and killing the process) underneath it. + void SetKeepAliveToken(Microsoft::WRL::ComPtr&& Token) noexcept + { + m_keepAliveToken = std::move(Token); + } + private: WSLCProcessFlags m_flags; std::shared_ptr m_control; std::unique_ptr m_io; + Microsoft::WRL::ComPtr m_keepAliveToken; }; } // namespace wsl::windows::service::wslc \ No newline at end of file diff --git a/src/windows/wslcsession/WSLCProcessControl.cpp b/src/windows/wslcsession/WSLCProcessControl.cpp index 04e8ff2f8a..4e6da4e045 100644 --- a/src/windows/wslcsession/WSLCProcessControl.cpp +++ b/src/windows/wslcsession/WSLCProcessControl.cpp @@ -101,7 +101,15 @@ void DockerContainerProcessControl::OnContainerReleased() noexcept // Signal the exit event to prevent callers from being blocked on it. if (!m_exitEvent.is_signaled()) { - m_exitedCode = 128 + WSLCSignalSIGKILL; + // If the container already produced a real exit code (recorded by SetExitCode but not yet + // signaled — e.g. an --rm container whose init-exit signal is deferred to the Destroy + // event), preserve it. Only synthesize SIGKILL when the container is released without ever + // having produced an exit code (an abrupt teardown of a still-running container). + if (!m_exitedCode.has_value()) + { + m_exitedCode = 128 + WSLCSignalSIGKILL; + } + m_exitEvent.SetEvent(); } } diff --git a/src/windows/wslcsession/WSLCSession.cpp b/src/windows/wslcsession/WSLCSession.cpp index 2be375fe99..36fe67bb66 100644 --- a/src/windows/wslcsession/WSLCSession.cpp +++ b/src/windows/wslcsession/WSLCSession.cpp @@ -37,6 +37,12 @@ constexpr auto c_dockerdReadyLogLine = "API listen on /var/run/docker.sock"; constexpr DWORD c_processTerminateTimeoutMs = 30 * 1000; constexpr DWORD c_processKillTimeoutMs = 10 * 1000; +// Grace period to keep an otherwise-idle VM running before tearing it down. This avoids +// thrashing the VM (repeated teardown/recreate) when containers are created and destroyed, +// or operations issued, in quick succession. The clock restarts whenever the VM is observed +// to be non-idle, so a full grace period of continuous idleness is required before teardown. +constexpr auto c_vmIdleGracePeriod = std::chrono::seconds(30); + namespace { // Group policy: WSLContainerRegistryAllowlist restricts which container-image @@ -296,7 +302,7 @@ HRESULT WSLCSession::Initialize( try { RETURN_HR_IF(E_POINTER, Settings == nullptr || VmFactory == nullptr); - RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_virtualMachine.has_value()); + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_vmFactoryGitCookie != 0); THROW_HR_IF_MSG( E_INVALIDARG, WI_IsAnyFlagSet(Settings->FeatureFlags, ~WSLCFeatureFlagsValid), "Invalid feature flags: 0x%x", Settings->FeatureFlags); @@ -307,9 +313,23 @@ try Settings->StorageFlags); // Set up a warning context for the duration of initialization so that non-fatal - // failures (e.g., container/volume/network recovery) are streamed to the CLI. + // failures are streamed to the CLI. WSLCExecutionContext warningContext(this, WarningCallback); + // The VM (and storage VHD) is created lazily on the first operation. Validate the storage + // configuration eagerly here so misconfiguration is reported at session creation rather than + // surfacing later on the first VM-starting operation. With WSLCSessionStorageFlagsNoCreate the + // storage VHD must already exist (ConfigureStorage will not create it). + if (Settings->StoragePath != nullptr && WI_IsFlagSet(Settings->StorageFlags, WSLCSessionStorageFlagsNoCreate)) + { + const std::filesystem::path storagePath{Settings->StoragePath}; + THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Settings->StoragePath), !storagePath.is_absolute()); + THROW_HR_WITH_USER_ERROR_IF( + HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND), + Localization::MessageWslcSessionStorageNotFound(Settings->StoragePath), + !std::filesystem::exists(storagePath / "storage.vhdx")); + } + // N.B. No locking is required because Initialize() is always called before the session is returned to the caller. m_id = Settings->SessionId; m_displayName = Settings->DisplayName ? Settings->DisplayName : L""; @@ -317,8 +337,24 @@ try m_featureFlags = Settings->FeatureFlags; m_pluginNotifier = PluginNotifier; - // Get user token for the current process + // Park the VM factory in the Global Interface Table. It is supplied here (on the call that + // creates the session) but used on demand from other threads/apartments; storing the raw + // proxy and calling it later would raise RPC_E_WRONG_THREAD. + m_git = wil::CoCreateInstance(CLSID_StdGlobalInterfaceTable, CLSCTX_INPROC_SERVER); + THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(VmFactory, __uuidof(IWSLCVirtualMachineFactory), &m_vmFactoryGitCookie)); + + // Park the warning callback too. The VM (and resource recovery) is created lazily on the + // first operation, which may not carry its own warning callback, so recovery warnings are + // routed back to this callback via AcquireWarningCallback()/WSLCExecutionContext. + if (WarningCallback != nullptr) + { + THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(WarningCallback, __uuidof(IWarningCallback), &m_warningCallbackGitCookie)); + } + + // Persist a deep copy of the settings (and the creating user's SID) required to + // (re)create the VM on demand. const auto tokenInfo = wil::get_token_information(GetCurrentProcessToken()); + PersistSettings(*Settings, tokenInfo->User.Sid); WSL_LOG( "SessionInitialized", @@ -326,58 +362,521 @@ try TraceLoggingValue(m_displayName.c_str(), "DisplayName"), TraceLoggingValue(m_creatorProcessName.c_str(), "CreatorProcess")); - // Create the VM through the factory. The VM produces crash events; the session multiplexes - // them out to any registered ICrashDumpCallback subscribers via OnCrashDumpWritten. + // The VM is created lazily on the first operation that requires it (see EnsureVmRunning) + // and torn down when the session becomes idle. Start the worker that performs idle teardown. + // The body is wrapped so an unexpected throw (e.g. COM initialization failure before the + // worker's own try/catch loop) is logged rather than escaping the thread and calling + // std::terminate(), which would crash the session process. + m_idleThread = std::thread([this]() { + try + { + IdleWorker(); + } + CATCH_LOG() + }); + + return S_OK; +} +CATCH_RETURN() + +void WSLCSession::PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid) +{ + m_settings = Settings; + + // Repoint the string fields at storage owned by the session so they outlive the caller's buffers. + m_settings.DisplayName = m_displayName.c_str(); + + if (Settings.CreatorProcessName != nullptr) + { + m_settingsCreatorProcessName = Settings.CreatorProcessName; + m_settings.CreatorProcessName = m_settingsCreatorProcessName->c_str(); + } + else + { + m_settings.CreatorProcessName = nullptr; + } + + if (Settings.StoragePath != nullptr) + { + m_settingsStoragePath = Settings.StoragePath; + m_settings.StoragePath = m_settingsStoragePath->c_str(); + } + else + { + m_settings.StoragePath = nullptr; + } + + if (Settings.RootVhdTypeOverride != nullptr) + { + m_settingsRootVhdTypeOverride = Settings.RootVhdTypeOverride; + m_settings.RootVhdTypeOverride = m_settingsRootVhdTypeOverride->c_str(); + } + else + { + m_settings.RootVhdTypeOverride = nullptr; + } + + if (UserSid != nullptr) + { + const auto length = GetLengthSid(UserSid); + const auto* bytes = reinterpret_cast(UserSid); + m_userSid.assign(bytes, bytes + length); + } + else + { + m_userSid.clear(); + } +} + +bool WSLCSession::IdleTerminationEnabled() const noexcept +{ + // Only tear the VM down when there is persistent storage to recover from. A tmpfs-backed + // session would lose all image/container state on teardown, so its VM is kept alive once started. + return m_settings.StoragePath != nullptr; +} + +void WSLCSession::EnsureVmRunning() +{ + if (m_vmState.load() == VmState::Running) + { + return; + } + + auto lock = m_lock.lock_exclusive(); + + // Do not (re)start the VM once the session is terminating or has terminated. This also + // bounds VmLease's retry loop: a lease that races with Terminate() fails here instead of + // restarting a VM that is being permanently torn down. + THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), m_terminating.load() || m_sessionTerminatedEvent.is_signaled()); + + if (m_vmState.load() == VmState::Running) + { + return; + } + + StartVmLockHeld(); +} + +void WSLCSession::StartVmLockHeld() +{ + WI_ASSERT(m_vmState.load() != VmState::Running); + + WSL_LOG("WslcVmStarting", TraceLoggingValue(m_id, "SessionId")); + + m_vmState.store(VmState::Starting); + m_vmStopRequested.store(false); + + // Tear everything back down if bring-up fails partway through. + auto startCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { + TearDownVmLockHeld(); + m_vmState.store(VmState::None); + }); + + // Create a fresh IO relay for this VM instance. The previous one (if any) was stopped + // during teardown and cannot be restarted. + m_ioRelay.emplace(); + + // Create the VM via the factory. Re-fetch the factory from the GIT so we call it through a + // proxy marshalled into this thread's apartment (see m_git). The VM produces crash events; + // the session multiplexes them out to any registered ICrashDumpCallback subscribers via + // OnCrashDumpWritten. + wil::com_ptr vmFactory; + THROW_IF_FAILED(m_git->GetInterfaceFromGlobal(m_vmFactoryGitCookie, __uuidof(IWSLCVirtualMachineFactory), vmFactory.put_void())); + wil::com_ptr vm; - THROW_IF_FAILED(VmFactory->CreateVirtualMachine(&vm)); + THROW_IF_FAILED(vmFactory->CreateVirtualMachine(&vm)); m_virtualMachine.emplace( vm.get(), - Settings, + &m_settings, m_sessionTerminatingEvent.get(), std::bind(&WSLCSession::OnCrashDumpWritten, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5)); - - // Make sure that everything is destroyed correctly if an exception is thrown. - auto errorCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { LOG_IF_FAILED(Terminate()); }); - m_virtualMachine->Initialize(); // Get an event from the service that is signaled when the VM exits. + m_vmExitedEvent.reset(); THROW_IF_FAILED(vm->GetTerminationEvent(&m_vmExitedEvent)); // Configure storage. - ConfigureStorage(*Settings, tokenInfo->User.Sid); + ConfigureStorage(m_settings, m_userSid.empty() ? nullptr : reinterpret_cast(m_userSid.data())); - // Launch containerd first + // Launch containerd first, then dockerd with the external containerd socket. StartContainerd(); - // Launch dockerd with external containerd socket + // Reset the readiness event before (re)starting dockerd so a stale signal from a prior + // VM instance is not observed. + m_dockerdReadyEvent.ResetEvent(); StartDockerd(); // Wait for dockerd to be ready before starting the event tracker. THROW_WIN32_IF_MSG( - ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(Settings->BootTimeoutMs), "Timed out waiting for dockerd to start"); + ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(m_settings.BootTimeoutMs), "Timed out waiting for dockerd to start"); auto [_, __, channel] = m_virtualMachine->Fork(WSLC_FORK::Thread); m_dockerClient.emplace(std::move(channel), m_virtualMachine->TerminatingEvent(), m_virtualMachine->VmId(), 10 * 1000); // Start the event tracker. - m_eventTracker.emplace(m_dockerClient.value(), *this, m_ioRelay); + m_eventTracker.emplace(m_dockerClient.value(), *this, *m_ioRelay); m_volumes.emplace(m_dockerClient.value(), m_virtualMachine.value(), m_eventTracker.value(), m_storageVhdPath.parent_path()); // Monitor for unexpected VM exit. - m_ioRelay.AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this))); + m_ioRelay->AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this))); // Recover any existing resources from storage. RecoverExistingNetworks(); RecoverExistingContainers(); - errorCleanup.release(); - return S_OK; + m_vmState.store(VmState::Running); + m_vmStartCount.fetch_add(1); + startCleanup.release(); + + WSL_LOG("WslcVmStarted", TraceLoggingValue(m_id, "SessionId")); +} + +void WSLCSession::StopVmLockHeld() +{ + if (m_vmState.load() != VmState::Running) + { + return; + } + + WSL_LOG("WslcVmIdleStop", TraceLoggingValue(m_id, "SessionId")); + + // Flag the teardown as intentional so VM/dockerd/containerd exit callbacks (which fire + // from the IO relay thread while we hold the lock) do not treat it as a crash. + m_vmStopRequested.store(true); + m_vmState.store(VmState::Stopping); + + TearDownVmLockHeld(); + + m_vmState.store(VmState::None); + m_vmStopRequested.store(false); +} + +void WSLCSession::TearDownVmLockHeld(bool CaptureTerminationReason) +{ + std::lock_guard containersLock(m_containersLock); + std::lock_guard networksLock(m_networksLock); + + m_containers.clear(); + m_volumes.reset(); + m_networks.clear(); + + // Stop the IO relay. + // This stops: + // - container state monitoring. + // - container init process relays + // - execs relays + // - container logs relays + if (m_ioRelay) + { + m_ioRelay->Stop(); + } + + { + std::lock_guard allocatedPortsLock(m_allocatedPortsLock); + m_allocatedPorts.clear(); + } + + m_eventTracker.reset(); + m_dockerClient.reset(); + + if (CaptureTerminationReason) + { + // Default: an explicit/graceful teardown is a shutdown (the VM is still alive and we are + // bringing it down). Overridden below if the VM exited on its own and recorded a cause. + m_terminationReason = WSLCVirtualMachineTerminationReasonShutdown; + } + + // Check if the VM has already exited (e.g., killed externally). + // If so, skip operations that require a live VM to avoid unnecessary waits. + // N.B. m_vmExitedEvent may be uninitialized if teardown runs before GetTerminationEvent() succeeds. + if (m_vmExitedEvent && m_vmExitedEvent.is_signaled()) + { + WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId")); + + // The VM exited on its own, so it recorded the cause. + if (CaptureTerminationReason && m_virtualMachine) + { + wil::unique_cotaskmem_string details; + LOG_IF_FAILED(m_virtualMachine->GetTerminationReason(&m_terminationReason, &details)); + m_terminationDetails = details ? details.get() : L""; + } + } + else if (m_virtualMachine) + { + m_virtualMachine->OnSessionTerminated(); + + // Stop dockerd first, then containerd (dockerd is a client of containerd). + // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened. + if (m_dockerdProcess.has_value()) + { + auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); + WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code")); + } + + if (m_containerdProcess.has_value()) + { + auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); + WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code")); + } + + // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running. + try + { + m_virtualMachine->Unmount(c_containerdStorage); + } + CATCH_LOG(); + } + + m_dockerdProcess.reset(); + m_containerdProcess.reset(); + m_virtualMachine.reset(); + + // Destroy the (stopped) relay so the next StartVm can create a fresh one. + // + // N.B. The relay must NOT be destroyed from its own thread: ~IORelay joins the relay + // thread, and joining a thread from itself calls std::terminate(). This situation arises + // on the unexpected-VM-exit path, where OnVmExited() runs on the relay thread and drives + // Terminate() -> TearDownVmLockHeld(). In that (terminal) case the stopped relay and the + // VM-exit event it watches are left in place and destroyed later by ~WSLCSession on a + // different thread. On the idle-stop and external-terminate paths this runs on a non-relay + // thread, so both are destroyed here. (StartVmLockHeld resets m_vmExitedEvent before reuse.) + if (!m_ioRelay || !m_ioRelay->IsRelayThread()) + { + m_ioRelay.reset(); + m_vmExitedEvent.reset(); + } + + // Delete the ephemeral swap VHD now that the VM is gone. + if (!m_swapVhdPath.empty()) + { + LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str())); + m_swapVhdPath.clear(); + } +} + +bool WSLCSession::HasActiveContainerLockHeld() +{ + std::lock_guard containersLock(m_containersLock); + + // A container in the Created or Running state keeps the VM alive (it is non-terminal and + // may still be started/used). Exited containers do not -- unless a client still holds a proxy + // to the container's COM wrapper, in which case tearing the VM down would disconnect that proxy + // (RPC_E_DISCONNECTED). Keep the VM alive while any container is non-terminal or still + // externally referenced. + return std::ranges::any_of(m_containers, [](const auto& entry) { + const auto state = entry.second->State(); + if (state == WslcContainerStateCreated || state == WslcContainerStateRunning) + { + return true; + } + + return entry.second->IsExternallyReferenced(); + }); +} + +void WSLCSession::RequestIdleCheck() noexcept +{ + if (m_idleState->IdleCheckEvent) + { + m_idleState->IdleCheckEvent.SetEvent(); + } +} + +void WSLCSession::IdleWorker() +{ + // Idle teardown releases cross-process COM proxies (the VM and its VM-scoped state), so this + // thread must join the process MTA; otherwise those Release/calls fail with RPC_E_WRONG_THREAD. + const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED); + + const HANDLE handles[] = {m_idleState->IdleCheckEvent.get(), m_sessionTerminatingEvent.get()}; + + // Absolute time at which a continuously-idle VM becomes eligible for teardown. Unset while + // the VM is non-idle; (re)armed when the VM is first observed idle. The wait below times out + // at this deadline so teardown happens promptly once the grace period elapses. + std::optional idleDeadline; + + for (;;) + { + DWORD timeout = INFINITE; + if (idleDeadline.has_value()) + { + const auto now = std::chrono::steady_clock::now(); + timeout = (*idleDeadline <= now) + ? 0 + : static_cast(std::chrono::duration_cast(*idleDeadline - now).count()); + } + + const auto wait = WaitForMultipleObjects(ARRAYSIZE(handles), handles, FALSE, timeout); + + // handles[1] (session terminating) or a wait failure ends the worker. handles[0] (idle + // check) and WAIT_TIMEOUT (grace period may have elapsed) both trigger a re-evaluation. + if (wait != WAIT_OBJECT_0 && wait != WAIT_TIMEOUT) + { + break; + } + + if (wait == WAIT_OBJECT_0) + { + m_idleState->IdleCheckEvent.ResetEvent(); + + // An explicit idle-check signal means an operation or container state change just + // completed (it is raised on every lease/token release and terminal state change). + // Restart the grace clock so teardown happens a full grace period after the last + // activity, not after the first time the VM was ever observed idle. + idleDeadline.reset(); + } + + if (m_terminating.load()) + { + break; + } + + if (!IdleTerminationEnabled()) + { + idleDeadline.reset(); + continue; + } + + try + { + // Use a non-blocking acquire. A blocking exclusive acquire would queue behind any + // in-flight operation's shared VmLease and, because SRW locks favor a waiting writer, + // would stall every new operation behind it until that operation completed. A + // long-running operation (e.g. a blocking SaveImage/Export) would therefore serialize + // all concurrent operations. If the lock is currently held an operation is in flight, + // so treat it as activity and re-evaluate on the next idle-check signal (raised when + // that operation releases its lease). + auto lock = m_lock.try_lock_exclusive(); + if (!lock) + { + idleDeadline.reset(); + continue; + } + if (m_terminating.load() || m_vmState.load() != VmState::Running) + { + idleDeadline.reset(); + continue; + } + + // If the VM's exit event is already signaled, the VM has crashed and OnVmExited() + // (running on the IO relay thread) is about to drive Terminate(). Do NOT tear the VM + // down here: StopVmLockHeld() would join the relay thread while this exclusive lock is + // held, and the relay thread can be simultaneously blocked acquiring that same lock + // inside Terminate()'s retry loop — a deadlock. Leave crash handling to the relay-thread + // path, whose Stop() self-join is a no-op and therefore cannot deadlock. + if (m_vmExitedEvent && m_vmExitedEvent.is_signaled()) + { + idleDeadline.reset(); + continue; + } + + // Keep the VM alive while any operation is in flight or any container is non-terminal, + // and restart the grace clock so a fresh idle period is required afterwards. + if (m_idleState->ActivityCount.load() != 0 || HasActiveContainerLockHeld()) + { + idleDeadline.reset(); + continue; + } + + // The VM is idle. Arm the grace period if the clock is not already running, then + // defer teardown until it has fully elapsed. The clock is reset by any non-idle + // observation or explicit idle-check signal (see the WAIT_OBJECT_0 handling above), + // so a WAIT_TIMEOUT wake here means the VM has been idle for the whole grace period. + const auto now = std::chrono::steady_clock::now(); + if (!idleDeadline.has_value()) + { + idleDeadline = now + c_vmIdleGracePeriod; + } + + if (now < *idleDeadline) + { + continue; + } + + idleDeadline.reset(); + StopVmLockHeld(); + } + CATCH_LOG(); + } +} + +WSLCSession::VmLease WSLCSession::AcquireVmLease() +{ + return VmLease(*this); +} + +WSLCSession::VmLease::VmLease(WSLCSession& Session) : m_session(&Session) +{ + // Record an in-flight operation before bringing the VM up so the idle worker cannot tear + // it down between EnsureVmRunning() and acquiring the shared lock. + m_session->m_idleState->ActivityCount.fetch_add(1); + + auto countCleanup = wil::scope_exit([this]() { + m_session->m_idleState->ActivityCount.fetch_sub(1); + m_session = nullptr; + }); + + // The idle worker may complete a teardown in the window between EnsureVmRunning() and our + // shared-lock acquisition (it could have committed to the stop before our activity-count + // increment was visible). Our increment prevents any *future* idle teardown, so retry until + // we hold the shared lock with the VM running. This is bounded: once the increment is visible + // the idle worker will not stop the VM again, so at most one restart is needed. + // N.B. EnsureVmRunning() throws if the session has been terminated, which breaks the loop. + for (;;) + { + m_session->EnsureVmRunning(); + + m_lock = m_session->m_lock.lock_shared(); + + if (m_session->m_vmState.load() == VmState::Running) + { + break; + } + + m_lock.reset(); + } + + countCleanup.release(); +} + +WSLCSession::VmLease::VmLease(VmLease&& Other) noexcept : + m_session(std::exchange(Other.m_session, nullptr)), m_lock(std::move(Other.m_lock)) +{ +} + +WSLCSession::VmLease& WSLCSession::VmLease::operator=(VmLease&& Other) noexcept +{ + if (this != &Other) + { + if (m_session != nullptr) + { + m_lock.reset(); + m_session->m_idleState->ActivityCount.fetch_sub(1); + m_session->RequestIdleCheck(); + } + + m_session = std::exchange(Other.m_session, nullptr); + m_lock = std::move(Other.m_lock); + } + + return *this; +} + +WSLCSession::VmLease::~VmLease() +{ + if (m_session != nullptr) + { + // Release the shared lock before triggering the idle check so the idle worker can + // immediately take the exclusive lock if the session is now idle. + m_lock.reset(); + m_session->m_idleState->ActivityCount.fetch_sub(1); + m_session->RequestIdleCheck(); + } } -CATCH_RETURN() WSLCSession::~WSLCSession() { @@ -496,7 +995,7 @@ HRESULT WSLCSession::GetId(ULONG* Id) void WSLCSession::OnDockerdExited() { - if (!m_sessionTerminatingEvent.is_signaled()) + if (!m_sessionTerminatingEvent.is_signaled() && !m_vmStopRequested.load()) { WSL_LOG("UnexpectedDockerdExit", TraceLoggingValue(m_displayName.c_str(), "Name")); } @@ -504,7 +1003,7 @@ void WSLCSession::OnDockerdExited() void WSLCSession::OnContainerdExited() { - if (!m_sessionTerminatingEvent.is_signaled()) + if (!m_sessionTerminatingEvent.is_signaled() && !m_vmStopRequested.load()) { WSL_LOG("UnexpectedContainerdExit", TraceLoggingValue(m_displayName.c_str(), "Name")); } @@ -512,6 +1011,15 @@ void WSLCSession::OnContainerdExited() void WSLCSession::OnVmExited() { + // A teardown we initiated (idle shutdown) is in progress — the VM exit is expected and + // must not terminate the session. N.B. This runs on the IO relay thread; the flag is set + // under the exclusive lock before the relay is stopped, so it is visible here. + if (m_vmStopRequested.load()) + { + WSL_LOG("WslcVmExitedDuringStop", TraceLoggingValue(m_id, "SessionId")); + return; + } + WSL_LOG( "VmExited", TraceLoggingLevel(WINEVENT_LEVEL_WARNING), @@ -554,13 +1062,13 @@ ServiceRunningProcess WSLCSession::StartProcess( auto process = launcher.Launch(*m_virtualMachine); - m_ioRelay.AddHandle(std::make_unique( + m_ioRelay->AddHandle(std::make_unique( process.GetStdHandle(1), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false)); - m_ioRelay.AddHandle(std::make_unique( + m_ioRelay->AddHandle(std::make_unique( process.GetStdHandle(2), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false)); - m_ioRelay.AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback))); + m_ioRelay->AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback))); return process; } @@ -733,7 +1241,7 @@ try auto [repo, tagOrDigest] = wslutil::ParseImage(Image); EnforceRegistryAllowlist(repo); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); if (!tagOrDigest.has_value()) @@ -790,7 +1298,7 @@ try comCall = RegisterUserCOMCallback(); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -1100,7 +1608,7 @@ try WSLCExecutionContext context(this, WarningCallback); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1126,7 +1634,7 @@ try THROW_HR_IF_MSG(E_INVALIDARG, !tagOrDigest.has_value(), "Expected tag for image import: %hs", ImageName); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1241,7 +1749,7 @@ try RETURN_HR_IF_NULL(E_POINTER, ImageNameOrID); RETURN_HR_IF(E_INVALIDARG, strlen(ImageNameOrID) > WSLC_MAX_IMAGE_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1274,7 +1782,7 @@ try names.emplace_back(ImageNames->Values[i]); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1352,7 +1860,7 @@ try filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1461,7 +1969,7 @@ try *DeletedImages = nullptr; *Count = 0; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1535,7 +2043,7 @@ try RETURN_HR_IF_NULL(E_POINTER, Options->Tag); RETURN_HR_IF(E_INVALIDARG, strlen(Options->Repo) + strlen(Options->Tag) + 1 > WSLC_MAX_IMAGE_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1572,7 +2080,7 @@ try auto [repo, tagOrDigest] = wslutil::ParseImage(Image); EnforceRegistryAllowlist(repo); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); auto requestContext = m_dockerClient->PushImage(repo, tagOrDigest, RegistryAuthenticationInformation); @@ -1593,7 +2101,7 @@ try *Output = nullptr; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); *Output = wil::make_unique_ansistring(InspectImageLockHeld(ImageNameOrId).c_str()).release(); @@ -1641,7 +2149,7 @@ try *IdentityToken = nullptr; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); wil::unique_cotaskmem_ansistring token; @@ -1673,7 +2181,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); docker_schema::PruneImageResult pruneResult; @@ -1734,7 +2242,7 @@ try "Invalid process flags: 0x%x", containerOptions->InitProcessOptions.Flags); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); auto result = wil::ResultFromException([&]() { CreateContainerImpl(containerOptions, Container); }); @@ -1810,7 +2318,7 @@ void WSLCSession::CreateContainerImpl(const WSLCContainerOptions* containerOptio std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1), m_eventTracker.value(), m_dockerClient.value(), - m_ioRelay); + *m_ioRelay); // Key the map by Docker's container ID, which is set in the WSLCContainerImpl constructor and stable for its lifetime. auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container)); @@ -1843,7 +2351,7 @@ try ValidateName(Id, WSLC_MAX_CONTAINER_NAME_LENGTH); // Look for an exact ID match first. - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard containersLock{m_containersLock}; // Purge containers that were auto-deleted via OnEvent (--rm). @@ -1882,6 +2390,90 @@ try } CATCH_RETURN(); +namespace { + + // Activity token created by WSLCSession::CreateActivityToken (returned directly by + // BeginContainerOperation, and bound to a root-namespace process's lifetime by + // CreateRootNamespaceProcess). While a client holds it, the session's activity count is + // non-zero, so the idle worker will not tear the VM down. It runs a release callback (which + // decrements the activity count via the shared IdleState, without keeping the session alive) + // when the client releases it or exits. It implements IFastRundown so that if the holding + // client crashes, COM reclaims the stub promptly (rather than via slow default rundown) and the + // VM can idle-terminate without a multi-minute delay. + class ContainerOperation + : public Microsoft::WRL::RuntimeClass, IUnknown, IFastRundown> + { + public: + // Adopts an activity-count reference already taken by CreateActivityToken; the callback + // releases it. + void Initialize(std::function&& onRelease) noexcept + { + m_onRelease = std::move(onRelease); + } + + ~ContainerOperation() override + { + if (m_onRelease) + { + m_onRelease(); + } + } + + private: + std::function m_onRelease; + }; + +} // namespace + +Microsoft::WRL::ComPtr WSLCSession::CreateActivityToken() +{ + // Record the in-flight activity up front so the VM cannot idle-terminate before the caller + // takes ownership of the returned token. + m_idleState->ActivityCount.fetch_add(1); + auto countCleanup = wil::scope_exit([this]() { + m_idleState->ActivityCount.fetch_sub(1); + RequestIdleCheck(); + }); + + auto operation = Microsoft::WRL::Make(); + THROW_IF_NULL_ALLOC(operation.Get()); + + // Capture the shared idle state rather than the session itself: the token may outlive the + // session (e.g. a client keeps a root-namespace process proxy past releasing the session), and + // it must not keep the session alive. On release it decrements the activity count and wakes the + // idle worker; if the session is already gone this is a harmless no-op (no idle worker waits on + // the event). N.B. releasing the token therefore never blocks an explicit session teardown. + std::shared_ptr idleState = m_idleState; + operation->Initialize([idleState = std::move(idleState)]() { + idleState->ActivityCount.fetch_sub(1); + idleState->IdleCheckEvent.SetEvent(); + }); + + // The token now owns the activity-count reference and will release it on destruction. + countCleanup.release(); + + Microsoft::WRL::ComPtr token; + THROW_IF_FAILED(operation.As(&token)); + return token; +} + +HRESULT WSLCSession::BeginContainerOperation(IUnknown** Operation) +try +{ + WSLCExecutionContext context(this); + + RETURN_HR_IF_NULL(E_POINTER, Operation); + *Operation = nullptr; + + // Record the in-flight operation up front so the VM cannot idle-terminate before the client + // resolves the container and issues the operation (and streams any output). + auto token = CreateActivityToken(); + + RETURN_IF_FAILED(token.CopyTo(Operation)); + return S_OK; +} +CATCH_RETURN(); + HRESULT WSLCSession::ListContainers( const WSLCListContainersOptions* Options, WSLCContainerEntry** Containers, ULONG* Count, WSLCContainerPortMapping** Ports, ULONG* PortsCount) try @@ -1916,7 +2508,7 @@ try filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); std::vector dockerContainers; @@ -1995,7 +2587,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); std::lock_guard containersLock{m_containersLock}; @@ -2066,10 +2658,18 @@ try *Errno = -1; // Make sure not to return 0 if something fails. } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); auto process = m_virtualMachine->CreateLinuxProcess(Executable, *Options, TtyRows, TtyColumns, Errno); + + // The VmLease above is released when this call returns, but the process keeps running in the + // VM and the client holds the returned proxy. A root-namespace process is not tracked as a + // container, so attach an activity token bound to the process's lifetime; this keeps the VM + // alive for as long as the client holds the process, preventing the idle worker from tearing + // the VM down and killing the process out from under the client. + process->SetKeepAliveToken(CreateActivityToken()); + THROW_IF_FAILED(process.CopyTo(Process)); return S_OK; @@ -2092,7 +2692,7 @@ try THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Path), !std::filesystem::path(Path).is_absolute()); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); // Attach the disk to the VM (AttachDisk() performs the access check for the VHD file). @@ -2120,7 +2720,7 @@ try auto driverOpts = wslutil::ParseKeyValuePairs(Options->DriverOpts, Options->DriverOptsCount); auto labels = wslutil::ParseKeyValuePairs(Options->Labels, Options->LabelsCount, WSLCVolumeMetadataLabel); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); if (Options->Name != nullptr && Options->Name[0] != '\0') @@ -2140,7 +2740,7 @@ try RETURN_HR_IF_NULL(E_POINTER, Name); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); m_volumes->DeleteVolume(Name); @@ -2161,7 +2761,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); auto volumeList = m_volumes->ListVolumes(std::move(filters)); @@ -2193,7 +2793,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_VOLUME_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); std::string json = m_volumes->InspectVolume(name); @@ -2218,7 +2818,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); WSLCVolumes::PruneVolumesResult pruneResult; @@ -2298,7 +2898,7 @@ try auto driverOpts = wslutil::ParseKeyValuePairs(Options->DriverOpts, Options->DriverOptsCount); auto labels = wslutil::ParseKeyValuePairs(Options->Labels, Options->LabelsCount, WSLCNetworkManagedLabel); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -2399,7 +2999,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -2439,7 +3039,7 @@ try *Networks = nullptr; *Count = 0; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard networksLock(m_networksLock); if (m_networks.empty()) @@ -2478,7 +3078,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard networksLock(m_networksLock); auto it = m_networks.find(name); @@ -2589,81 +3189,56 @@ try // Acquire an exclusive lock to ensure that no operation is running. WI_VERIFY(sessionLock); - std::lock_guard containersLock(m_containersLock); - std::lock_guard networksLock(m_networksLock); + // Tear down the VM (if running) and all VM-scoped state, capturing the termination reason. + // This mirrors the soft teardown used for idle shutdown, but here it is permanent. + TearDownVmLockHeld(/* CaptureTerminationReason */ true); - m_containers.clear(); - m_volumes.reset(); - m_networks.clear(); + m_vmState.store(VmState::None); - // Stop the IO relay. - // This stops: - // - container state monitoring. - // - container init process relays - // - execs relays - // - container logs relays - m_ioRelay.Stop(); + // Signal completion last so any observer of the terminated event sees a fully torn-down + // session and a populated termination reason. + m_sessionTerminatedEvent.SetEvent(); + + // Release the exclusive lock before joining the idle worker. If the worker is currently + // blocked acquiring the exclusive lock (about to evaluate idle teardown), it must be able + // to obtain it, observe m_terminating, and exit — otherwise the join below would deadlock. + sessionLock.reset(); + if (m_idleThread.joinable()) { - std::lock_guard allocatedPortsLock(m_allocatedPortsLock); - m_allocatedPorts.clear(); + m_idleThread.join(); } - m_eventTracker.reset(); - m_dockerClient.reset(); - - // Check if the VM has already exited (e.g., killed externally). - // If so, skip operations that require a live VM to avoid unnecessary waits. - // N.B. m_vmExitedEvent may be uninitialized if Terminate() is called from the - // Initialize() error path before GetTerminationEvent() succeeds. - if (m_vmExitedEvent && m_vmExitedEvent.is_signaled()) + // The idle worker has exited and no operation can run past termination, so the parked VM + // factory can no longer be re-fetched; revoke it from the GIT. + if (m_vmFactoryGitCookie != 0) { - WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId")); + LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_vmFactoryGitCookie)); + m_vmFactoryGitCookie = 0; } - else - { - if (m_virtualMachine) - { - m_virtualMachine->OnSessionTerminated(); - // Stop dockerd first, then containerd (dockerd is a client of containerd). - // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened. - if (m_dockerdProcess.has_value()) - { - auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); - WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code")); - } - - if (m_containerdProcess.has_value()) - { - auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); - WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code")); - } - - // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running. - try - { - m_virtualMachine->Unmount(c_containerdStorage); - } - CATCH_LOG(); - } + if (m_warningCallbackGitCookie != 0) + { + LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_warningCallbackGitCookie)); + m_warningCallbackGitCookie = 0; } - m_dockerdProcess.reset(); - m_containerdProcess.reset(); - m_virtualMachine.reset(); + return S_OK; +} +CATCH_RETURN(); - // Delete the ephemeral swap VHD now that the VM is gone. - if (!m_swapVhdPath.empty()) +wil::com_ptr WSLCSession::AcquireWarningCallback() const +{ + wil::com_ptr callback; + if (m_warningCallbackGitCookie != 0) { - LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str())); - m_swapVhdPath.clear(); + // Best-effort: the creating client's proxy may already be gone (e.g. the CLI exited before + // a later VM restart), in which case the warning falls through to the default sink. + LOG_IF_FAILED(m_git->GetInterfaceFromGlobal(m_warningCallbackGitCookie, __uuidof(IWarningCallback), callback.put_void())); } - m_terminated = true; - return S_OK; + return callback; } -CATCH_RETURN(); HRESULT WSLCSession::RegisterCrashDumpCallback(_In_ ICrashDumpCallback* Callback, _Out_ IUnknown** Subscription) try @@ -2728,7 +3303,7 @@ try RETURN_HR_IF_NULL(E_POINTER, WindowsPath); RETURN_HR_IF_NULL(E_POINTER, LinuxPath); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); return m_virtualMachine->MountWindowsFolder(WindowsPath, LinuxPath, ReadOnly); @@ -2742,7 +3317,7 @@ try RETURN_HR_IF_NULL(E_POINTER, LinuxPath); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); return m_virtualMachine->UnmountWindowsFolder(LinuxPath); @@ -2754,7 +3329,7 @@ try { WSLCExecutionContext context(this); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); std::lock_guard allocatedPortsLock(m_allocatedPortsLock); @@ -2800,7 +3375,7 @@ try { WSLCExecutionContext context(this); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); std::lock_guard allocatedPortsLock(m_allocatedPortsLock); @@ -2948,7 +3523,11 @@ void WSLCSession::CancelUserCOMCallbacks() void WSLCSession::OnContainerDeleted(const WSLCContainerImpl* Container) { - auto lock = m_lock.lock_shared(); + // N.B. Invoked only from WSLCContainer::Delete, which already holds a VmLease (the shared + // session lock). The lease prevents a concurrent idle teardown from clearing m_containers, + // so this only needs m_containersLock. It must NOT re-acquire the shared session lock here: + // doing so while the idle worker is queued for the exclusive lock would deadlock (recursive + // shared acquire behind a pending writer). std::lock_guard containersLock(m_containersLock); WI_VERIFY(m_containers.erase(Container->ID()) == 1); @@ -2958,7 +3537,51 @@ HRESULT WSLCSession::GetState(_Out_ WSLCSessionState* State) { RETURN_HR_IF_NULL(E_POINTER, State); - *State = m_terminated ? WSLCSessionStateTerminated : WSLCSessionStateRunning; + *State = m_sessionTerminatedEvent.is_signaled() ? WSLCSessionStateTerminated : WSLCSessionStateRunning; + return S_OK; +} + +HRESULT WSLCSession::GetTerminationEvent(_Out_ HANDLE* Event) +try +{ + RETURN_HR_IF(E_POINTER, Event == nullptr); + + *Event = nullptr; + + // Duplicate the "terminated" event. The caller owns the returned handle, which stays valid even after the session is released. + *Event = wsl::windows::common::wslutil::DuplicateHandle(m_sessionTerminatedEvent.get(), SYNCHRONIZE); + + return S_OK; +} +CATCH_RETURN(); + +HRESULT WSLCSession::GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) +try +{ + RETURN_HR_IF(E_POINTER, Reason == nullptr || Details == nullptr); + + *Reason = WSLCVirtualMachineTerminationReasonUnknown; + *Details = nullptr; + + // m_terminationReason/m_terminationDetails are written once before m_sessionTerminatedEvent is + // signaled and never modified afterward, so observing the signaled event safely publishes them. + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_sessionTerminatedEvent.is_signaled()); + + *Reason = m_terminationReason; + *Details = wil::make_cotaskmem_string(m_terminationDetails.c_str()).release(); + + return S_OK; +} +CATCH_RETURN(); + +HRESULT WSLCSession::GetVmDiagnostics(_Out_ WSLCVmDiagnostics* Diagnostics) +{ + RETURN_HR_IF_NULL(E_POINTER, Diagnostics); + + // Reads atomics only: this must not acquire a VM lease or otherwise bring the VM up, + // so callers can observe idle termination without keeping the VM alive. + Diagnostics->Running = m_vmState.load() == VmState::Running; + Diagnostics->StartCount = m_vmStartCount.load(); return S_OK; } @@ -2983,7 +3606,7 @@ void WSLCSession::RecoverExistingContainers() std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1), m_eventTracker.value(), m_dockerClient.value(), - m_ioRelay); + *m_ioRelay); auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container)); WI_ASSERT(inserted); diff --git a/src/windows/wslcsession/WSLCSession.h b/src/windows/wslcsession/WSLCSession.h index 5c7bee2227..f8e0a2c907 100644 --- a/src/windows/wslcsession/WSLCSession.h +++ b/src/windows/wslcsession/WSLCSession.h @@ -22,7 +22,10 @@ Module Name: #include "DockerEventTracker.h" #include "DockerHTTPClient.h" #include "IORelay.h" +#include #include +#include +#include #include namespace wsl::windows::service::wslc { @@ -70,11 +73,16 @@ class UserCOMCallback // // WSLCSession - Implements IWSLCSession for container management. // Runs in a per-user COM server process for security isolation. -// The SYSTEM service creates the VM and passes IWSLCVirtualMachine to Initialize(). +// The SYSTEM service passes an IWSLCVirtualMachineFactory to Initialize(); the VM is created +// lazily on first use and may be torn down when idle and recreated on demand. // class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession : public Microsoft::WRL::RuntimeClass, IWSLCSession, IFastRundown, ISupportErrorInfo> { + // WSLCContainer::Delete acquires a VmLease to keep the VM alive (and block idle + // teardown) for the duration of a container deletion. + friend class WSLCContainer; + public: WSLCSession() = default; @@ -98,6 +106,9 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession IFACEMETHOD(GetId)(_Out_ ULONG* Id) override; IFACEMETHOD(GetState)(_Out_ WSLCSessionState* State) override; + IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override; + IFACEMETHOD(GetTerminationReason)(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) override; + IFACEMETHOD(GetVmDiagnostics)(_Out_ WSLCVmDiagnostics* Diagnostics) override; // Image management. IFACEMETHOD(PullImage)( @@ -135,6 +146,7 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession // Container management. IFACEMETHOD(CreateContainer)(_In_ const WSLCContainerOptions* Options, _In_opt_ IWarningCallback* WarningCallback, _Out_ IWSLCContainer** Container) override; IFACEMETHOD(OpenContainer)(_In_ LPCSTR Id, _In_ IWSLCContainer** Container) override; + IFACEMETHOD(BeginContainerOperation)(_Outptr_ IUnknown** Operation) override; IFACEMETHOD(ListContainers)( _In_opt_ const WSLCListContainersOptions* Options, _Out_ WSLCContainerEntry** Containers, @@ -201,6 +213,13 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession UserCOMCallback RegisterUserCOMCallback(); void UnregisterUserCOMCallback(DWORD ThreadId); + // Returns the warning callback supplied when the session was created/entered, re-marshalled + // into the calling apartment. Used as a fallback by WSLCExecutionContext so that warnings + // emitted by operations that carry no explicit callback (e.g. resource recovery during the + // lazy VM start) still reach the session creator. Returns null if no callback was supplied + // or the creating client's proxy is no longer reachable. + wil::com_ptr AcquireWarningCallback() const; + HANDLE SessionTerminatingEvent() const noexcept { return m_sessionTerminatingEvent.get(); @@ -213,9 +232,79 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession bool WaitForEventOrSessionTerminating(HANDLE Event, std::chrono::milliseconds Timeout) const; + // Signals the idle worker to re-evaluate whether the VM can be torn down. + // Safe to call from any thread, including IO relay / container callbacks. + void RequestIdleCheck() noexcept; + + // Creates an opaque activity token that holds a reference on this session's activity count for + // its lifetime, deferring idle teardown of the VM until every outstanding token is released. + // Used both for transient client operations (BeginContainerOperation) and to keep the VM alive + // for the lifetime of a process whose wrapper a client may keep (root-namespace and exec'd + // processes). + Microsoft::WRL::ComPtr CreateActivityToken(); + + // Idle-activity state shared between the session and any outstanding activity tokens. Held via + // shared_ptr so a token (or a container COM wrapper) can outlive the session (e.g. a client + // keeps a root-namespace process or container proxy past releasing the session) and still + // safely release its activity reference / wake the idle worker without keeping the session + // object alive. Tearing down the session therefore proceeds normally; a late token release + // simply decrements the count and signals an event with no waiter. Public so the container COM + // wrapper (WSLCContainer) can hold a shared_ptr to it; see WSLCContainer::Release(). + struct IdleState + { + std::atomic ActivityCount{0}; + wil::unique_event IdleCheckEvent{wil::EventOptions::ManualReset}; + }; + private: ULONG m_id = 0; + // VM lifecycle state for on-demand creation / idle termination. + enum class VmState + { + None, + Starting, + Running, + Stopping, + }; + + _Requires_exclusive_lock_held_(m_lock) + void StartVmLockHeld(); + _Requires_exclusive_lock_held_(m_lock) + void StopVmLockHeld(); + _Requires_exclusive_lock_held_(m_lock) + void TearDownVmLockHeld(bool CaptureTerminationReason = false); + _Requires_exclusive_lock_held_(m_lock) + bool HasActiveContainerLockHeld(); + void EnsureVmRunning(); + + void IdleWorker(); + bool IdleTerminationEnabled() const noexcept; + void PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid); + + // RAII lease taken at the top of every VM-requiring operation. On construction it + // ensures the VM is running and records an in-flight operation so idle teardown is + // deferred; it then holds the shared session lock for the operation's duration. On + // destruction it releases the lock and triggers an idle check. + class VmLease + { + public: + VmLease() = default; + explicit VmLease(WSLCSession& Session); + VmLease(VmLease&& Other) noexcept; + VmLease& operator=(VmLease&& Other) noexcept; + ~VmLease(); + + VmLease(const VmLease&) = delete; + VmLease& operator=(const VmLease&) = delete; + + private: + WSLCSession* m_session{}; + wil::rwlock_release_shared_scope_exit m_lock; + }; + + [[nodiscard]] VmLease AcquireVmLease(); + __requires_lock_held(m_userHandlesLock) void CancelUserHandleIO(); __requires_lock_held(m_userCOMCallbacksLock) void CancelUserCOMCallbacks(); @@ -253,6 +342,19 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession void StreamImageOperation(DockerHTTPClient::HTTPRequestContext& requestContext, LPCSTR Image, LPCSTR OperationName, IProgressCallback* ProgressCallback); std::optional m_dockerClient; + + // The VM factory is a cross-process proxy supplied by the SYSTEM service at Initialize() time + // but first used later (on demand) from a different thread/apartment. A directly stored proxy + // would fail with RPC_E_WRONG_THREAD, so it is parked in the process Global Interface Table and + // re-fetched (re-marshalled into the calling apartment) each time a VM is created. + wil::com_ptr m_git; + DWORD m_vmFactoryGitCookie{}; + + // The warning callback supplied at Initialize() is parked in the GIT for the same reason as + // the VM factory: it is used later, on demand, from other threads/apartments (a directly + // stored proxy would fail with RPC_E_WRONG_THREAD). Zero if no callback was supplied. + DWORD m_warningCallbackGitCookie{}; + std::optional m_virtualMachine; std::optional m_eventTracker; wil::unique_event m_dockerdReadyEvent{wil::EventOptions::ManualReset}; @@ -271,15 +373,38 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession std::mutex m_networksLock; std::unordered_map m_networks; wil::unique_event m_sessionTerminatingEvent{wil::EventOptions::ManualReset}; + wil::unique_event m_sessionTerminatedEvent{wil::EventOptions::ManualReset}; wil::unique_event m_vmExitedEvent; + + WSLCVirtualMachineTerminationReason m_terminationReason{WSLCVirtualMachineTerminationReasonUnknown}; + std::wstring m_terminationDetails; wil::srwlock m_lock; - IORelay m_ioRelay; + std::optional m_ioRelay; + + // VM lifecycle / idle-termination state. + std::atomic m_vmState{VmState::None}; + std::atomic m_vmStopRequested{false}; + // Number of times the VM has been (re)created; surfaced via GetVmDiagnostics. + std::atomic m_vmStartCount{0}; + // In-flight activity count and idle-worker wake event, decoupled from this object's lifetime + // (see IdleState) so activity tokens never extend the session's lifetime. + std::shared_ptr m_idleState{std::make_shared()}; + std::thread m_idleThread; + + // Persisted settings required to (re)create the VM on demand. The string fields point + // into the owned storage members below (or m_displayName) so they remain valid for the + // lifetime of the session. + WSLCSessionInitSettings m_settings{}; + std::optional m_settingsCreatorProcessName; + std::optional m_settingsStoragePath; + std::optional m_settingsRootVhdTypeOverride; + std::vector m_userSid; + std::optional m_containerdProcess; std::optional m_dockerdProcess; WSLCFeatureFlags m_featureFlags{}; std::function m_destructionCallback; std::atomic m_terminating{false}; - std::atomic m_terminated{false}; wil::com_ptr m_pluginNotifier; diff --git a/src/windows/wslcsession/WSLCVirtualMachine.h b/src/windows/wslcsession/WSLCVirtualMachine.h index dbdf1e2483..f6e438c5c0 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.h +++ b/src/windows/wslcsession/WSLCVirtualMachine.h @@ -170,6 +170,12 @@ class WSLCVirtualMachine return m_vmTerminatingEvent.get(); } + // Retrieves the cached termination reason and details from the underlying VM. + HRESULT GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) const + { + return m_vm->GetTerminationReason(Reason, Details); + } + GUID VmId() const { return m_vmId; diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp index 1327dbb907..a30a8911ab 100644 --- a/test/windows/WSLCTests.cpp +++ b/test/windows/WSLCTests.cpp @@ -2876,46 +2876,26 @@ class WSLCTests } } - WSLC_TEST_METHOD(TerminationCallback) + WSLC_TEST_METHOD(TerminationEvent) { - class DECLSPEC_UUID("7BC4E198-6531-4FA6-ADE2-5EF3D2A04DFF") CallbackInstance - : public Microsoft::WRL::RuntimeClass, ITerminationCallback, IFastRundown> - { - - public: - CallbackInstance(std::function&& callback) : - m_callback(std::move(callback)) - { - } + auto session = CreateSession(GetDefaultSessionSettings(L"termination-event-test")); - HRESULT OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details) override - { - m_callback(Reason, Details); - return S_OK; - } + wil::unique_handle terminationEvent; + VERIFY_SUCCEEDED(session->GetTerminationEvent(&terminationEvent)); + VERIFY_IS_NOT_NULL(terminationEvent.get()); - private: - std::function m_callback; - }; + // The reason is unavailable until the session has terminated. + WSLCVirtualMachineTerminationReason reason{}; + wil::unique_cotaskmem_string details; + VERIFY_ARE_EQUAL(session->GetTerminationReason(&reason, &details), HRESULT_FROM_WIN32(ERROR_INVALID_STATE)); - std::promise> promise; + // Terminating the session should signal the event and record a graceful shutdown reason. + VERIFY_SUCCEEDED(session->Terminate()); - CallbackInstance callback{[&](WSLCVirtualMachineTerminationReason reason, LPCWSTR details) { - promise.set_value(std::make_pair(reason, details)); - }}; + VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0)); - WSLCSessionSettings sessionSettings = GetDefaultSessionSettings(L"termination-callback-test"); - sessionSettings.TerminationCallback = &callback; - - auto session = CreateSession(sessionSettings); - - session.reset(); - auto future = promise.get_future(); - auto result = future.wait_for(std::chrono::seconds(30)); - VERIFY_ARE_EQUAL(result, std::future_status::ready); - auto [reason, details] = future.get(); + VERIFY_SUCCEEDED(session->GetTerminationReason(&reason, &details)); VERIFY_ARE_EQUAL(reason, WSLCVirtualMachineTerminationReasonShutdown); - VERIFY_ARE_NOT_EQUAL(details, L""); } WSLC_TEST_METHOD(CrashDumpCallback) @@ -10358,6 +10338,10 @@ class WSLCTests auto settings = GetDefaultSessionSettings(c_sessionName); auto session = CreateSession(settings); + // Session creation is lazy, so start the VM by launching a process before killing it. + WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "60"}); + auto process = launcher.Launch(*session); + KillVmByOwner(c_sessionName); WaitForSessionTermination(session.get()); @@ -10449,6 +10433,10 @@ class WSLCTests VERIFY_SUCCEEDED(sessionManager2->CreateSession(&settings2, WSLCSessionFlagsNone, warningCallback.Get(), &session2)); wsl::windows::common::security::ConfigureForCOMImpersonation(session2.get()); + // The VM (and container recovery) starts lazily on the first operation. Trigger it so + // recovery runs and its warning is delivered to the session's warning callback. + WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session2).GetExitEvent().wait(30000); + // Verify the warning matches the expected localized message for the corrupt container. auto warnings = warningCallback->GetWarnings(); auto expectedWarning = std::format( @@ -10517,6 +10505,10 @@ class WSLCTests VERIFY_SUCCEEDED(sessionManager->CreateSession(&settings, WSLCSessionFlagsNone, warningCallback.Get(), &session)); wsl::windows::common::security::ConfigureForCOMImpersonation(session.get()); + // The VM (and volume recovery) starts lazily on the first operation. Trigger it so + // recovery runs and its warning is delivered to the session's warning callback. + WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session).GetExitEvent().wait(30000); + // Verify the warning matches the expected localized message for the missing volume. auto warnings = warningCallback->GetWarnings(); auto expectedWarning = diff --git a/test/windows/WslcSdkTests.cpp b/test/windows/WslcSdkTests.cpp index 2588a13fe2..28b6be3e1e 100644 --- a/test/windows/WslcSdkTests.cpp +++ b/test/windows/WslcSdkTests.cpp @@ -267,60 +267,53 @@ class WslcSdkTests VERIFY_ARE_EQUAL(WslcCreateSession(nullptr, &session2, nullptr), E_POINTER); } - WSLC_TEST_METHOD(TerminationCallbackViaTerminate) + WSLC_TEST_METHOD(TerminationEventViaTerminate) { - std::promise promise; - - auto callback = [](WslcSessionTerminationReason reason, PVOID context) { - auto* p = static_cast*>(context); - p->set_value(reason); - }; - - std::filesystem::path extraStorage = m_storagePath / "wslc-termcb-term-storage"; + std::filesystem::path extraStorage = m_storagePath / "wslc-termevt-term-storage"; WslcSessionSettings sessionSettings; - VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termcb-term-test", extraStorage.c_str(), &sessionSettings)); + VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termevt-term-test", extraStorage.c_str(), &sessionSettings)); VERIFY_SUCCEEDED(WslcSetSessionSettingsTimeout(&sessionSettings, 30 * 1000)); - VERIFY_SUCCEEDED(WslcSetSessionSettingsTerminationCallback(&sessionSettings, callback, &promise)); UniqueSession session; VERIFY_SUCCEEDED(WslcCreateSession(&sessionSettings, &session, nullptr)); - // Terminating the session should trigger a graceful shutdown and fire the callback. + wil::unique_handle terminationEvent; + VERIFY_SUCCEEDED(WslcGetSessionTerminationEvent(session.get(), &terminationEvent)); + VERIFY_IS_NOT_NULL(terminationEvent.get()); + + // Terminating the session should trigger a graceful shutdown and signal the event. VERIFY_SUCCEEDED(WslcTerminateSession(session.get())); - auto future = promise.get_future(); - VERIFY_ARE_EQUAL(future.wait_for(std::chrono::seconds(30)), std::future_status::ready); - VERIFY_ARE_EQUAL(future.get(), WSLC_SESSION_TERMINATION_REASON_SHUTDOWN); + VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0)); + + WslcSessionTerminationReason reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN; + VERIFY_SUCCEEDED(WslcGetSessionTerminationReason(session.get(), &reason)); + VERIFY_ARE_EQUAL(reason, WSLC_SESSION_TERMINATION_REASON_SHUTDOWN); } - WSLC_TEST_METHOD(TerminationCallbackViaRelease) + WSLC_TEST_METHOD(TerminationEventViaRelease) { - std::promise promise; - - auto callback = [](WslcSessionTerminationReason reason, PVOID context) { - auto* p = static_cast*>(context); - p->set_value(reason); - }; - - std::filesystem::path extraStorage = m_storagePath / "wslc-termcb-release-storage"; + std::filesystem::path extraStorage = m_storagePath / "wslc-termevt-release-storage"; WslcSessionSettings sessionSettings; - VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termcb-release-test", extraStorage.c_str(), &sessionSettings)); + VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termevt-release-test", extraStorage.c_str(), &sessionSettings)); VERIFY_SUCCEEDED(WslcSetSessionSettingsTimeout(&sessionSettings, 30 * 1000)); - VERIFY_SUCCEEDED(WslcSetSessionSettingsTerminationCallback(&sessionSettings, callback, &promise)); UniqueSession session; VERIFY_SUCCEEDED(WslcCreateSession(&sessionSettings, &session, nullptr)); - // Releasing the session should trigger a graceful shutdown and fire the callback. + // The termination event is owned by the caller and stays valid even after the session is released. + wil::unique_handle terminationEvent; + VERIFY_SUCCEEDED(WslcGetSessionTerminationEvent(session.get(), &terminationEvent)); + VERIFY_IS_NOT_NULL(terminationEvent.get()); + + // Releasing the session should trigger a graceful shutdown and signal the event. VERIFY_SUCCEEDED(WslcReleaseSession(session.get())); - // Calling WslcSessionRelease will destroy the session + // Calling WslcReleaseSession will destroy the session. session.release(); - auto future = promise.get_future(); - VERIFY_ARE_EQUAL(future.wait_for(std::chrono::seconds(30)), std::future_status::ready); - VERIFY_ARE_EQUAL(future.get(), WSLC_SESSION_TERMINATION_REASON_SHUTDOWN); + VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0)); } WSLC_TEST_METHOD(CrashDumpCallback) diff --git a/test/windows/wslc/e2e/WSLCE2EHelpers.h b/test/windows/wslc/e2e/WSLCE2EHelpers.h index d05940c58e..79c6d2f8ff 100644 --- a/test/windows/wslc/e2e/WSLCE2EHelpers.h +++ b/test/windows/wslc/e2e/WSLCE2EHelpers.h @@ -111,6 +111,11 @@ struct TestSession return m_storagePath; } + IWSLCSession* Session() const + { + return m_session.get(); + } + private: std::wstring m_name; std::filesystem::path m_storagePath; diff --git a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp index c10f432951..c512452cf8 100644 --- a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp @@ -109,8 +109,14 @@ class WSLCE2ESessionEnterTests WSLC_TEST_METHOD(WSLCE2E_SessionEnter_StoragePathNotFound) { auto result = RunWslc(L"system session enter does-not-exist"); + + // The CLI resolves the storage argument to an absolute path (see EnterSession task) and the + // service validates it eagerly at session creation, reporting the friendly "No WSLC session + // found in ''" message rather than a bare system error. + const auto storagePath = std::filesystem::absolute(L"does-not-exist").wstring(); result.Verify({ - .Stderr = L"The system cannot find the path specified. \r\nError code: ERROR_PATH_NOT_FOUND\r\n", + .Stderr = wsl::shared::Localization::MessageWslcSessionStorageNotFound(storagePath) + + L"\r\nError code: ERROR_PATH_NOT_FOUND\r\n", .ExitCode = 1, }); } diff --git a/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp b/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp new file mode 100644 index 0000000000..2c10b02698 --- /dev/null +++ b/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp @@ -0,0 +1,213 @@ +/*++ + +Copyright (c) Microsoft. All rights reserved. + +Module Name: + + WSLCE2EVmIdleTests.cpp + +Abstract: + + End-to-end tests for on-demand / idle-terminating wslc VMs. A session's backing VM is + created lazily on the first VM-requiring operation and torn down again once there are no + active (Created or Running) containers and no in-flight operations, while the per-user + session survives across VM restarts. VM lifecycle is observed via + IWSLCSession::GetVmDiagnostics, which reads state without bringing the VM up. + +--*/ + +#include "precomp.h" +#include "windows/Common.h" +#include "WSLCExecutor.h" +#include "WSLCE2EHelpers.h" +#include + +namespace WSLCE2ETests { +using namespace wsl::shared; + +class WSLCE2EVmIdleTests +{ + WSLC_TEST_CLASS(WSLCE2EVmIdleTests) + + const TestImage& AlpineImage = AlpineTestImage(); + + static WSLCVmDiagnostics QueryDiagnostics(const TestSession& session) + { + WSLCVmDiagnostics diagnostics{}; + VERIFY_SUCCEEDED(session.Session()->GetVmDiagnostics(&diagnostics)); + return diagnostics; + } + + // Polls VM diagnostics until the VM reaches the desired running state. Idle teardown and + // on-demand bring-up happen asynchronously, so callers must wait rather than assume. + static void WaitForVmRunningState(const TestSession& session, bool running) + { + retry::RetryWithTimeout( + [&]() { + const auto diagnostics = QueryDiagnostics(session); + THROW_HR_IF(E_FAIL, static_cast(diagnostics.Running) != running); + }, + std::chrono::milliseconds(250), + std::chrono::seconds(60)); + } + + // A freshly created session has no VM until the first VM-requiring operation arrives, and + // the VM idle-terminates once that operation completes with nothing left active. + WSLC_TEST_METHOD(WSLCE2E_VmIdle_LazyStartAndIdleStop) + { + auto session = TestSession::Create(L"wslc-vmidle-lazy"); + + const auto initial = QueryDiagnostics(session); + VERIFY_IS_FALSE(static_cast(initial.Running)); + VERIFY_ARE_EQUAL(initial.StartCount, 0ul); + + // The first VM-requiring operation brings the VM up on demand. + EnsureImageIsLoaded(AlpineImage, session.Name()); + + // With no Created/Running containers and no in-flight operations, the VM tears down. + WaitForVmRunningState(session, false); + + const auto afterIdle = QueryDiagnostics(session); + VERIFY_IS_TRUE(afterIdle.StartCount >= 1ul); + } + + // After the VM idle-terminates, a subsequent operation recreates it from scratch and any + // previously loaded images remain available (storage persists across VM restarts). + WSLC_TEST_METHOD(WSLCE2E_VmIdle_RecreateOnDemandAndPersistState) + { + auto session = TestSession::Create(L"wslc-vmidle-recreate"); + + EnsureImageIsLoaded(AlpineImage, session.Name()); + WaitForVmRunningState(session, false); + const auto startCountBeforeRecreate = QueryDiagnostics(session).StartCount; + + // Running a container recreates the VM, runs to completion, then idles again. + RunWslcAndVerify( + std::format(L"container run --session {} --rm {} echo hello", session.Name(), AlpineImage.NameAndTag()), + {.Stderr = L"", .ExitCode = 0}); + + WaitForVmRunningState(session, false); + const auto startCountAfterRecreate = QueryDiagnostics(session).StartCount; + VERIFY_IS_TRUE(startCountAfterRecreate > startCountBeforeRecreate); + + // The image loaded before the restart survived the VM teardown/recreate cycle. + auto images = RunWslc(std::format(L"image list --session {}", session.Name())); + images.Verify({.Stderr = L"", .ExitCode = 0}); + VERIFY_IS_TRUE(images.StdoutContainsSubstring(L"alpine")); + } + + // A container in the Created state (created but never started) counts as active and keeps + // the VM alive; removing it lets the VM idle-terminate. + WSLC_TEST_METHOD(WSLCE2E_VmIdle_CreatedContainerKeepsVmAlive) + { + constexpr auto containerName = L"wslc-vmidle-created"; + auto session = TestSession::Create(L"wslc-vmidle-created-session"); + + EnsureImageIsLoaded(AlpineImage, session.Name()); + + RunWslcAndVerify( + std::format(L"container create --session {} --name {} {} sleep 3600", session.Name(), containerName, AlpineImage.NameAndTag()), + {.Stderr = L"", .ExitCode = 0}); + + // The VM must stay up while a Created container exists, even with nothing running. + WaitForVmRunningState(session, true); + std::this_thread::sleep_for(std::chrono::seconds(3)); + VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running)); + + // Removing the only container drops the active count to zero and the VM idles. + RunWslcAndVerify(std::format(L"container rm --session {} {}", session.Name(), containerName), {.Stderr = L"", .ExitCode = 0}); + + WaitForVmRunningState(session, false); + } + + // A long-lived root-namespace process (created via CreateRootNamespaceProcess) is not tracked + // as a container, so it does not contribute to the active-container check. It must nonetheless + // keep the VM alive for as long as the client holds the returned process, via the activity + // token bound to the process's lifetime. Without that token the idle worker would tear the VM + // down once the grace period elapsed, killing the process out from under the client. + WSLC_TEST_METHOD(WSLCE2E_VmIdle_RootProcessKeepsVmAlive) + { + auto session = TestSession::Create(L"wslc-vmidle-rootproc"); + + // Launch a long-running root-namespace process and keep the returned process object alive. + // This brings the VM up on demand to host the process. + wsl::windows::common::WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "3600"}); + std::optional process = launcher.Launch(*session.Session()); + + WaitForVmRunningState(session, true); + + // The VM must remain running past the idle grace period (30s) while the process is held, + // even though there are no containers and no in-flight operations. Without the keep-alive + // token the idle worker would have torn the VM down ~30s after the creating call returned, + // so a generous margin past the grace period reliably catches that regression. + std::this_thread::sleep_for(std::chrono::seconds(40)); + VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running)); + + // Releasing the process proxy drops the activity count to zero and the VM idle-terminates. + process.reset(); + WaitForVmRunningState(session, false); + } + + // A client may hold a proxy to a container that has exited and is therefore no longer "active" + // by state. Tearing the VM down would disconnect that proxy (leaving the client with + // RPC_E_DISCONNECTED), so the idle worker must keep the VM alive while any container proxy is + // outstanding -- and reclaim it promptly once the client releases the proxy. This is the + // container analogue of the root-process keep-alive above. + WSLC_TEST_METHOD(WSLCE2E_VmIdle_HeldContainerProxyKeepsVmAlive) + { + auto session = TestSession::Create(L"wslc-vmidle-heldcontainer"); + + EnsureImageIsLoaded(AlpineImage, session.Name()); + + // Launch a container that exits almost immediately, then keep the returned proxy. Once it has + // exited it no longer counts as active by state, so only the held proxy can keep the VM up. + wsl::windows::common::WSLCContainerLauncher launcher( + wsl::shared::string::WideToMultiByte(AlpineImage.NameAndTag()), + "wslc-vmidle-heldcontainer", + {"/bin/true"}, + {}, + "none"); + + std::optional container = launcher.Launch(*session.Session(), WSLCContainerStartFlagsNone); + + // Exercise the pure proxy-release path (not container deletion) as the trigger for teardown. + container->SetDeleteOnClose(false); + + // Wait for the container to exit so it no longer keeps the VM alive by being Created/Running. + retry::RetryWithTimeout( + [&]() { THROW_HR_IF(E_FAIL, container->State() != WslcContainerStateExited); }, + std::chrono::milliseconds(250), + std::chrono::seconds(60)); + + // The VM must remain running well past the idle grace period (30s) while the exited + // container's proxy is held. Without the pin the idle worker would tear the VM down ~30s + // after the launch returned, so a generous margin past the grace period catches that + // regression reliably. + std::this_thread::sleep_for(std::chrono::seconds(40)); + VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running)); + + // Releasing the container proxy drops the last external reference and the VM idle-terminates. + container.reset(); + WaitForVmRunningState(session, false); + } + + // arrives while teardown may still be in flight. All operations must succeed (no spurious + // ERROR_INVALID_STATE from racing a VM that is stopping). + WSLC_TEST_METHOD(WSLCE2E_VmIdle_ConcurrentRecreateDoesNotFail) + { + auto session = TestSession::Create(L"wslc-vmidle-stress"); + + EnsureImageIsLoaded(AlpineImage, session.Name()); + + for (int i = 0; i < 12; i++) + { + // Intentionally do not wait between iterations so each lease races the previous + // run's idle teardown. + auto result = RunWslc( + std::format(L"container run --session {} --rm {} echo iteration-{}", session.Name(), AlpineImage.NameAndTag(), i)); + result.Verify({.Stderr = L"", .ExitCode = 0}); + } + } +}; + +} // namespace WSLCE2ETests