diff --git a/msipackage/package.wix.in b/msipackage/package.wix.in
index 88dc3ea171..18b79acead 100644
--- a/msipackage/package.wix.in
+++ b/msipackage/package.wix.in
@@ -306,14 +306,6 @@
-
-
-
-
-
-
-
-
diff --git a/src/windows/WslcSDK/CMakeLists.txt b/src/windows/WslcSDK/CMakeLists.txt
index 8c026430f6..45b1a0dd25 100644
--- a/src/windows/WslcSDK/CMakeLists.txt
+++ b/src/windows/WslcSDK/CMakeLists.txt
@@ -1,7 +1,6 @@
set(SOURCES
IOCallback.cpp
ProgressCallback.cpp
- TerminationCallback.cpp
CrashDumpCallback.cpp
wslcsdk.cpp
WslcsdkPrivate.cpp
@@ -10,7 +9,6 @@ set(HEADERS
Defaults.h
IOCallback.h
ProgressCallback.h
- TerminationCallback.h
CrashDumpCallback.h
wslcsdk.h
WslcsdkPrivate.h
diff --git a/src/windows/WslcSDK/TerminationCallback.cpp b/src/windows/WslcSDK/TerminationCallback.cpp
deleted file mode 100644
index bd98a59529..0000000000
--- a/src/windows/WslcSDK/TerminationCallback.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*++
-
-Copyright (c) Microsoft. All rights reserved.
-
-Module Name:
-
- TerminationCallback.cpp
-
-Abstract:
-
- Implementation of a type that implements ITerminationCallback.
-
---*/
-#include "precomp.h"
-#include "TerminationCallback.h"
-
-namespace {
-WslcSessionTerminationReason ConvertReason(WSLCVirtualMachineTerminationReason Reason)
-{
- switch (Reason)
- {
- case WSLCVirtualMachineTerminationReasonShutdown:
- return WSLC_SESSION_TERMINATION_REASON_SHUTDOWN;
- case WSLCVirtualMachineTerminationReasonCrashed:
- return WSLC_SESSION_TERMINATION_REASON_CRASHED;
- default:
- return WSLC_SESSION_TERMINATION_REASON_UNKNOWN;
- }
-}
-} // namespace
-
-TerminationCallback::TerminationCallback(WslcSessionTerminationCallback callback, PVOID context) :
- m_callback(callback), m_context(context)
-{
-}
-
-// TODO: Details from the runtime are dropped; should the SDK callback function be updated to include the reasons string?
-HRESULT STDMETHODCALLTYPE TerminationCallback::OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR)
-{
- if (m_callback)
- {
- m_callback(ConvertReason(Reason), m_context);
- }
-
- return S_OK;
-}
-
-winrt::com_ptr TerminationCallback::CreateIf(const WslcSessionOptionsInternal* options)
-{
- if (options->terminationCallback)
- {
- return winrt::make_self(options->terminationCallback, options->terminationCallbackContext);
- }
- else
- {
- return nullptr;
- }
-}
diff --git a/src/windows/WslcSDK/TerminationCallback.h b/src/windows/WslcSDK/TerminationCallback.h
deleted file mode 100644
index afd8542bf3..0000000000
--- a/src/windows/WslcSDK/TerminationCallback.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*++
-
-Copyright (c) Microsoft. All rights reserved.
-
-Module Name:
-
- TerminationCallback.h
-
-Abstract:
-
- Header for a type that implements ITerminationCallback.
-
---*/
-#pragma once
-#include "wslc.h"
-#include "wslcsdkprivate.h"
-#include
-
-struct TerminationCallback : public winrt::implements
-{
- TerminationCallback(WslcSessionTerminationCallback callback, PVOID context);
-
- // ITerminationCallback
- HRESULT STDMETHODCALLTYPE OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details) override;
-
- // Creates a TerminationCallback if the options provides a callback.
- static winrt::com_ptr CreateIf(const WslcSessionOptionsInternal* options);
-
-private:
- WslcSessionTerminationCallback m_callback = nullptr;
- PVOID m_context = nullptr;
-};
diff --git a/src/windows/WslcSDK/WslcsdkPrivate.h b/src/windows/WslcSDK/WslcsdkPrivate.h
index 5000363abd..d487285665 100644
--- a/src/windows/WslcSDK/WslcsdkPrivate.h
+++ b/src/windows/WslcSDK/WslcsdkPrivate.h
@@ -33,8 +33,6 @@ typedef struct WslcSessionOptionsInternal
WslcVhdRequirements vhdRequirements;
WslcSessionFeatureFlags featureFlags;
- WslcSessionTerminationCallback terminationCallback;
- PVOID terminationCallbackContext;
} WslcSessionOptionsInternal;
static_assert(sizeof(WslcSessionOptionsInternal) == WSLC_SESSION_OPTIONS_SIZE, "WSLC_SESSION_OPTIONS_INTERNAL size mismatch");
@@ -107,7 +105,6 @@ const WslcContainerOptionsInternal* GetInternalType(const WslcContainerSettings*
struct WslcSessionImpl
{
wil::com_ptr session;
- wil::com_ptr terminationCallback;
};
WslcSessionImpl* GetInternalType(WslcSession handle);
diff --git a/src/windows/WslcSDK/winrt/Session.cpp b/src/windows/WslcSDK/winrt/Session.cpp
index 5a29437c99..46bb7afdcc 100644
--- a/src/windows/WslcSDK/winrt/Session.cpp
+++ b/src/windows/WslcSDK/winrt/Session.cpp
@@ -52,12 +52,16 @@ void Session::Start()
throw winrt::hresult_illegal_method_call(L"Session has already been started");
}
- winrt::check_hresult(WslcSetSessionSettingsTerminationCallback(GetStructPointer(m_settings), TerminatedCallback, /* context */ this));
-
wil::unique_cotaskmem_string errorMessage;
auto hr = WslcCreateSession(GetStructPointer(m_settings), m_session.put(), errorMessage.put());
THROW_MSG_IF_FAILED(hr, errorMessage);
m_settings = nullptr;
+
+ winrt::check_hresult(WslcGetSessionTerminationEvent(m_session.get(), m_terminationEvent.put()));
+
+ m_terminationWait.reset(CreateThreadpoolWait(&Session::OnTerminated, this, nullptr));
+ THROW_LAST_ERROR_IF_NULL(m_terminationWait);
+ SetThreadpoolWait(m_terminationWait.get(), m_terminationEvent.get(), nullptr);
}
void Session::EnsureStarted() const
@@ -300,11 +304,15 @@ WslcSession Session::ToHandle()
return m_session.get();
}
-void CALLBACK Session::TerminatedCallback(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context) noexcept
+void CALLBACK Session::OnTerminated(PTP_CALLBACK_INSTANCE /* instance */, PVOID context, PTP_WAIT /* wait */, TP_WAIT_RESULT /* waitResult */) noexcept
{
try
{
auto session = static_cast(context);
+
+ WslcSessionTerminationReason reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN;
+ LOG_IF_FAILED(WslcGetSessionTerminationReason(session->m_session.get(), &reason));
+
session->m_terminatedEvent(static_cast(reason));
}
CATCH_LOG();
diff --git a/src/windows/WslcSDK/winrt/Session.h b/src/windows/WslcSDK/winrt/Session.h
index 954591262c..f6e8f7bd14 100644
--- a/src/windows/WslcSDK/winrt/Session.h
+++ b/src/windows/WslcSDK/winrt/Session.h
@@ -45,12 +45,15 @@ struct Session : SessionT
void EnsureStarted() const;
winrt::Microsoft::WSL::Containers::SessionSettings m_settings; // Only kept until Start() is called
- static void CALLBACK TerminatedCallback(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context) noexcept;
+ // Threadpool callback that raises the Terminated event once the session's termination handle is signaled.
+ static void CALLBACK OnTerminated(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WAIT wait, TP_WAIT_RESULT waitResult) noexcept;
- // Releasing the session handle may trigger the termination callback.
- // Keep these two in this order so that the session handle is released before the termination event is destructed.
winrt::event m_terminatedEvent;
wil::unique_any m_session{nullptr};
+
+ // Bridges the one-off termination event surfaced by the SDK to the WinRT Terminated event.
+ wil::unique_handle m_terminationEvent;
+ wil::unique_threadpool_wait m_terminationWait;
};
} // namespace winrt::Microsoft::WSL::Containers::implementation
namespace winrt::Microsoft::WSL::Containers::factory_implementation {
diff --git a/src/windows/WslcSDK/wslcsdk.cpp b/src/windows/WslcSDK/wslcsdk.cpp
index 25dcdf1426..f83ff24fc0 100644
--- a/src/windows/WslcSDK/wslcsdk.cpp
+++ b/src/windows/WslcSDK/wslcsdk.cpp
@@ -17,7 +17,6 @@ Module Name:
#include "WslcsdkPrivate.h"
#include "Defaults.h"
#include "ProgressCallback.h"
-#include "TerminationCallback.h"
#include "CrashDumpCallback.h"
#include "Localization.h"
#include "WslInstall.h"
@@ -435,12 +434,6 @@ try
runtimeSettings.MemoryMb = internalType->memoryMb;
runtimeSettings.BootTimeoutMs = internalType->timeoutMS;
runtimeSettings.NetworkingMode = WSLCNetworkingModeVirtioProxy;
- auto terminationCallback = TerminationCallback::CreateIf(internalType);
- if (terminationCallback)
- {
- result->terminationCallback.attach(terminationCallback.as().detach());
- runtimeSettings.TerminationCallback = terminationCallback.get();
- }
runtimeSettings.FeatureFlags = ConvertFlags(internalType->featureFlags);
WI_SetFlag(runtimeSettings.FeatureFlags, WslcFeatureFlagsVirtioFs);
WI_SetFlag(runtimeSettings.FeatureFlags, WslcFeatureFlagsDnsTunneling);
@@ -587,15 +580,39 @@ try
}
CATCH_RETURN();
-STDAPI WslcSetSessionSettingsTerminationCallback(
- _In_ WslcSessionSettings* sessionSettings, _In_opt_ WslcSessionTerminationCallback terminationCallback, _In_opt_ PVOID terminationContext)
+STDAPI WslcGetSessionTerminationEvent(_In_ WslcSession session, _Out_ HANDLE* terminationEvent)
try
{
- auto internalType = CheckAndGetInternalType(sessionSettings);
- RETURN_HR_IF(E_INVALIDARG, terminationCallback == nullptr && terminationContext != nullptr);
+ RETURN_HR_IF_NULL(E_POINTER, terminationEvent);
+ *terminationEvent = nullptr;
+
+ auto internalType = CheckAndGetInternalType(session);
+ RETURN_HR_IF_NULL(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), internalType->session);
+
+ RETURN_HR(internalType->session->GetTerminationEvent(terminationEvent));
+}
+CATCH_RETURN();
+
+STDAPI WslcGetSessionTerminationReason(_In_ WslcSession session, _Out_ WslcSessionTerminationReason* reason)
+try
+{
+ static_assert(
+ WSLC_SESSION_TERMINATION_REASON_UNKNOWN == WSLCVirtualMachineTerminationReasonUnknown &&
+ WSLC_SESSION_TERMINATION_REASON_SHUTDOWN == WSLCVirtualMachineTerminationReasonShutdown &&
+ WSLC_SESSION_TERMINATION_REASON_CRASHED == WSLCVirtualMachineTerminationReasonCrashed,
+ "Termination reason enum values mismatch.");
+
+ RETURN_HR_IF_NULL(E_POINTER, reason);
+ *reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN;
+
+ auto internalType = CheckAndGetInternalType(session);
+ RETURN_HR_IF_NULL(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), internalType->session);
+
+ WSLCVirtualMachineTerminationReason runtimeReason = WSLCVirtualMachineTerminationReasonUnknown;
+ wil::unique_cotaskmem_string details;
+ RETURN_IF_FAILED(internalType->session->GetTerminationReason(&runtimeReason, &details));
- internalType->terminationCallback = terminationCallback;
- internalType->terminationCallbackContext = terminationContext;
+ *reason = static_cast(runtimeReason);
return S_OK;
}
@@ -649,10 +666,7 @@ try
{
auto internalType = CheckAndGetInternalTypeUniquePointer(session);
- // Drop the session before the termination callback, in case session destruction triggers
- // the termination callback.
internalType->session.reset();
- internalType->terminationCallback.reset();
return S_OK;
}
diff --git a/src/windows/WslcSDK/wslcsdk.def b/src/windows/WslcSDK/wslcsdk.def
index a96b4cf008..cc68fd6b1d 100644
--- a/src/windows/WslcSDK/wslcsdk.def
+++ b/src/windows/WslcSDK/wslcsdk.def
@@ -16,7 +16,8 @@ WslcReleaseContainer
WslcReleaseProcess
WslcSetSessionSettingsFeatureFlags
-WslcSetSessionSettingsTerminationCallback
+WslcGetSessionTerminationEvent
+WslcGetSessionTerminationReason
WslcSetSessionSettingsCpuCount
WslcSetSessionSettingsMemory
WslcSetSessionSettingsTimeout
diff --git a/src/windows/WslcSDK/wslcsdk.h b/src/windows/WslcSDK/wslcsdk.h
index 8db0757c3d..954e06be2a 100644
--- a/src/windows/WslcSDK/wslcsdk.h
+++ b/src/windows/WslcSDK/wslcsdk.h
@@ -42,7 +42,7 @@ EXTERN_C_START
#define WSLC_E_REGISTRY_BLOCKED_BY_POLICY MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, WSLC_E_BASE + 13) /* 0x8004060D */
// Session values
-#define WSLC_SESSION_OPTIONS_SIZE 88
+#define WSLC_SESSION_OPTIONS_SIZE 72
#define WSLC_SESSION_OPTIONS_ALIGNMENT 8
typedef struct WslcSessionSettings
@@ -123,8 +123,6 @@ typedef enum WslcSessionTerminationReason
WSLC_SESSION_TERMINATION_REASON_CRASHED = 2,
} WslcSessionTerminationReason;
-typedef __callback void(CALLBACK* WslcSessionTerminationCallback)(_In_ WslcSessionTerminationReason reason, _In_opt_ PVOID context);
-
typedef struct WslcSessionCrashDumpInfo
{
_Field_z_ PCWSTR dumpPath;
@@ -153,9 +151,8 @@ STDAPI WslcSetSessionSettingsVhd(_In_ WslcSessionSettings* sessionSettings, _In_
STDAPI WslcSetSessionSettingsFeatureFlags(_In_ WslcSessionSettings* sessionSettings, _In_ WslcSessionFeatureFlags flags);
-// Pass in Null for callback to clear the termination callback
-STDAPI WslcSetSessionSettingsTerminationCallback(
- _In_ WslcSessionSettings* sessionSettings, _In_opt_ WslcSessionTerminationCallback terminationCallback, _In_opt_ PVOID terminationContext);
+STDAPI WslcGetSessionTerminationEvent(_In_ WslcSession session, _Out_ HANDLE* terminationEvent);
+STDAPI WslcGetSessionTerminationReason(_In_ WslcSession session, _Out_ WslcSessionTerminationReason* reason);
STDAPI WslcTerminateSession(_In_ WslcSession session);
STDAPI WslcReleaseSession(_In_ WslcSession session);
diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp
index 6778079396..84679878bb 100644
--- a/src/windows/service/exe/HcsVirtualMachine.cpp
+++ b/src/windows/service/exe/HcsVirtualMachine.cpp
@@ -285,12 +285,6 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings)
m_guestDeviceManager = std::make_shared<::GuestDeviceManager>(m_vmIdString, m_vmId);
}
- // Configure termination callback
- if (Settings->TerminationCallback)
- {
- m_terminationCallback = Settings->TerminationCallback;
- }
-
hcs::RegisterCallback(m_computeSystem.get(), &HcsVirtualMachine::OnVmExitCallback, this);
// Create a listening socket for mini_init to connect to once the VM is running.
@@ -319,7 +313,11 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings)
HcsVirtualMachine::~HcsVirtualMachine()
{
- std::lock_guard lock(m_lock);
+ // N.B. Do not hold m_lock here. OnExit() acquires m_lock to cache the termination reason
+ // before signaling m_vmExitEvent, and closing the compute system below drains any in-flight
+ // HCS exit/crash callbacks. Holding m_lock across the exit-event wait and HcsCloseComputeSystem
+ // would deadlock against an in-flight OnExit() that is blocked acquiring m_lock. By the time the
+ // compute system is closed no further callbacks can run, so the remaining teardown is unguarded.
// Wait up to 5 seconds for the VM to terminate gracefully.
bool forceTerminate = false;
@@ -692,8 +690,6 @@ CATCH_LOG()
void HcsVirtualMachine::OnExit(const HCS_EVENT* Event)
{
- m_vmExitEvent.SetEvent();
-
const auto exitStatus = wsl::shared::FromJson(Event->EventData);
auto reason = WSLCVirtualMachineTerminationReasonUnknown;
@@ -715,12 +711,29 @@ void HcsVirtualMachine::OnExit(const HCS_EVENT* Event)
}
}
- if (m_terminationCallback)
+ // Cache the termination reason and details before signaling the exit event.
{
- LOG_IF_FAILED(m_terminationCallback->OnTermination(reason, Event->EventData));
+ std::lock_guard lock(m_lock);
+ m_terminationReason = reason;
+ m_terminationDetails = Event->EventData;
}
+
+ m_vmExitEvent.SetEvent();
}
+HRESULT HcsVirtualMachine::GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details)
+try
+{
+ RETURN_HR_IF(E_POINTER, Reason == nullptr || Details == nullptr);
+
+ std::lock_guard lock(m_lock);
+ *Reason = m_terminationReason;
+ *Details = wil::make_cotaskmem_string(m_terminationDetails.c_str()).release();
+
+ return S_OK;
+}
+CATCH_RETURN()
+
void HcsVirtualMachine::OnCrash(const HCS_EVENT* Event)
{
if (m_crashLogCaptured.load() && m_vmSavedStateCaptured.load())
@@ -863,7 +876,6 @@ WSLCVirtualMachineFactory::WSLCVirtualMachineFactory(_In_ const WSLCSessionSetti
m_dmesgOutput.reset(wslutil::DuplicateHandle(wslutil::FromCOMInputHandle(Settings->DmesgOutput), GENERIC_WRITE | SYNCHRONIZE));
}
- m_terminationCallback = Settings->TerminationCallback;
m_maximumStorageSizeMb = Settings->MaximumStorageSizeMb;
m_cpuCount = Settings->CpuCount;
m_memoryMb = Settings->MemoryMb;
@@ -883,7 +895,6 @@ WSLCSessionSettings WSLCVirtualMachineFactory::BuildSettings()
settings.MemoryMb = m_memoryMb;
settings.BootTimeoutMs = m_bootTimeoutMs;
settings.NetworkingMode = m_networkingMode;
- settings.TerminationCallback = m_terminationCallback.get();
settings.FeatureFlags = m_featureFlags;
settings.StorageFlags = m_storageFlags;
settings.RootVhdOverride = m_rootVhdOverride ? m_rootVhdOverride->c_str() : nullptr;
diff --git a/src/windows/service/exe/HcsVirtualMachine.h b/src/windows/service/exe/HcsVirtualMachine.h
index 3ad6a9eea5..2a862d1435 100644
--- a/src/windows/service/exe/HcsVirtualMachine.h
+++ b/src/windows/service/exe/HcsVirtualMachine.h
@@ -48,6 +48,7 @@ class HcsVirtualMachine
IFACEMETHOD(RemoveShare)(_In_ REFGUID ShareId) override;
IFACEMETHOD(ApplyGuestCapabilities)(_In_ const WSLCGuestCapabilities* Capabilities) override;
IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override;
+ IFACEMETHOD(GetTerminationReason)(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) override;
private:
struct DiskInfo
@@ -104,7 +105,9 @@ class HcsVirtualMachine
std::atomic m_vmSavedStateCaptured = false;
std::atomic m_crashLogCaptured = false;
- wil::com_ptr m_terminationCallback;
+ // Termination reason and details, cached when the VM exits (see OnExit). Guarded by m_lock.
+ WSLCVirtualMachineTerminationReason m_terminationReason{WSLCVirtualMachineTerminationReasonUnknown};
+ std::wstring m_terminationDetails;
};
//
@@ -136,8 +139,6 @@ class WSLCVirtualMachineFactory
// subsequent VMs reuse this duplicate, whose writes simply fail if the sink is gone.
wil::unique_handle m_dmesgOutput;
- wil::com_ptr m_terminationCallback;
-
ULONGLONG m_maximumStorageSizeMb{};
ULONG m_cpuCount{};
ULONG m_memoryMb{};
diff --git a/src/windows/service/exe/WSLCSessionManager.cpp b/src/windows/service/exe/WSLCSessionManager.cpp
index 339f3ce6ca..e08883608d 100644
--- a/src/windows/service/exe/WSLCSessionManager.cpp
+++ b/src/windows/service/exe/WSLCSessionManager.cpp
@@ -270,8 +270,7 @@ void WSLCSessionManagerImpl::CreateSession(
g_pluginManager, sessionId, creatorPid, std::wstring(resolvedDisplayName), wil::shared_handle(sharedToken), std::vector(storedSid));
// Create the VM factory in the SYSTEM service (privileged). The per-user session
- // uses it to create the VM. Funneling VM creation through a factory lets the session
- // own when VMs are created, rather than having one handed to it up front.
+ // uses it to create VMs on demand and recreate them after idle-termination.
auto vmFactory = Microsoft::WRL::Make(Settings);
// Launch per-user COM server factory and add it to a fresh per-session job object for crash cleanup.
diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl
index 92e135d4f4..9bdf612e0c 100644
--- a/src/windows/service/inc/wslc.idl
+++ b/src/windows/service/inc/wslc.idl
@@ -99,16 +99,6 @@ typedef enum _WSLCSignal
WSLCSignalSIGSYS = 31
} WSLCSignal;
-[
- uuid(7BC4E198-6531-4FA6-ADE2-5EF3D2A04DFE),
- pointer_default(unique),
- object
-]
-interface ITerminationCallback : IUnknown
-{
- HRESULT OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details);
-};
-
[
uuid(8C5A7B14-9D26-4FAE-AB31-7E5BC23F4801),
pointer_default(unique),
@@ -536,6 +526,11 @@ interface IWSLCVirtualMachine : IUnknown
// Returns an event that is signaled when the VM exits (graceful or forced).
HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event);
+
+ // Returns the cached termination reason and details. The values are only meaningful
+ // after the termination event has been signaled; before that the reason is
+ // WSLCVirtualMachineTerminationReasonUnknown and Details is an empty string.
+ HRESULT GetTerminationReason([out] WSLCVirtualMachineTerminationReason* Reason, [out] LPWSTR* Details);
}
//
@@ -576,7 +571,6 @@ typedef struct _WSLCSessionSettings {
ULONG MemoryMb;
ULONG BootTimeoutMs;
WSLCNetworkingMode NetworkingMode;
- [unique] ITerminationCallback* TerminationCallback;
WSLCFeatureFlags FeatureFlags;
WSLCHandle DmesgOutput;
WSLCSessionStorageFlags StorageFlags;
@@ -762,6 +756,18 @@ typedef enum _WSLCSessionState
WSLCSessionStateTerminated = 1
} WSLCSessionState;
+// Diagnostics describing the session's on-demand VM lifecycle. Used by tests and
+// troubleshooting to observe lazy VM bring-up and idle termination without affecting
+// the VM (querying these does not bring the VM up or count as activity).
+typedef struct _WSLCVmDiagnostics
+{
+ // TRUE when the backing VM is currently running.
+ boolean Running;
+
+ // Number of times the VM has been (re)created over the session's lifetime.
+ unsigned long StartCount;
+} WSLCVmDiagnostics;
+
// Settings for IWSLCSession::Initialize - passed from service to per-user process
typedef struct _WSLCSessionInitSettings
{
@@ -788,6 +794,19 @@ interface IWSLCSession : IUnknown
HRESULT GetId([out] ULONG* Id);
HRESULT GetState([out] WSLCSessionState* State);
+ // Returns a one-off event that is signaled when the session terminates, whether due to an
+ // explicit Terminate() call or an unexpected VM exit. The returned handle is owned by the
+ // caller and remains valid (and observes the signaled state) even after the session is released.
+ HRESULT GetTerminationEvent([out, system_handle(sh_event)] HANDLE* Event);
+
+ // Returns the cached termination reason and details. Only valid once the session has terminated,
+ // i.e. after the event returned by GetTerminationEvent is signaled; before that it returns
+ // HRESULT_FROM_WIN32(ERROR_INVALID_STATE). On success the caller owns Details and must free it.
+ HRESULT GetTerminationReason([out] WSLCVirtualMachineTerminationReason* Reason, [out] LPWSTR* Details);
+
+ // Reports on-demand VM lifecycle diagnostics. Does not bring the VM up or count as activity.
+ HRESULT GetVmDiagnostics([out] WSLCVmDiagnostics* Diagnostics);
+
// Image management.
HRESULT PullImage([in] LPCSTR Image, [in, unique] LPCSTR RegistryAuthenticationInformation, [in, unique] IProgressCallback* ProgressCallback, [in, unique] IWarningCallback* WarningCallback);
HRESULT BuildImage([in] const WSLCBuildImageOptions* Options, [in, unique] IProgressCallback* ProgressCallback, [in, unique, system_handle(sh_event)] HANDLE CancelEvent);
@@ -804,6 +823,14 @@ interface IWSLCSession : IUnknown
// Container management.
HRESULT CreateContainer([in] const WSLCContainerOptions* Options, [in, unique] IWarningCallback* WarningCallback, [out] IWSLCContainer** Container);
HRESULT OpenContainer([in, ref] LPCSTR Id, [out] IWSLCContainer** Container);
+
+ // Keeps the VM alive for the duration of a client-side container operation. The CLI performs
+ // each mutation as two round-trips (OpenContainer followed by the operation) and may stream
+ // output afterwards. With on-demand VM idle-termination the VM could otherwise tear down
+ // between those calls, disconnecting the container wrapper and failing the second call with
+ // RPC_E_DISCONNECTED. The client holds the returned token for the whole operation; releasing
+ // it (or the client exiting) lets the VM idle-terminate again.
+ HRESULT BeginContainerOperation([out] IUnknown** Operation);
HRESULT ListContainers([in, unique] const WSLCListContainersOptions* Options,[out, size_is(, *Count)] WSLCContainerEntry** Containers,[out] ULONG* Count, [out, size_is(, *PortsCount)] WSLCContainerPortMapping** Ports, [out] ULONG* PortsCount);
HRESULT PruneContainers([in, unique, size_is(FiltersCount)] const WSLCFilter* Filters, [in] ULONG FiltersCount, [out] WSLCPruneContainersResults* Result);
@@ -828,7 +855,9 @@ interface IWSLCSession : IUnknown
// Returns a handle to this COM server process (used to add to job object).
HRESULT GetProcessHandle([out, system_handle(sh_process)] HANDLE* ProcessHandle);
- // Initializes the session with a VM factory. VMs are created through the factory.
+ // Initializes the session. The session creates VMs on demand via the supplied
+ // factory: a VM is brought up when work requires it and idle-terminated when there
+ // are no running containers and no in-flight operations.
HRESULT Initialize(
[in] const WSLCSessionInitSettings* Settings,
[in] IWSLCVirtualMachineFactory* VmFactory,
diff --git a/src/windows/wslc/services/ContainerService.cpp b/src/windows/wslc/services/ContainerService.cpp
index 1015762ecc..4a712190aa 100644
--- a/src/windows/wslc/services/ContainerService.cpp
+++ b/src/windows/wslc/services/ContainerService.cpp
@@ -285,6 +285,7 @@ std::wstring ContainerService::FormatRelativeTime(ULONGLONG timestamp)
int ContainerService::Attach(Session& session, const std::string& id)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
@@ -446,6 +447,7 @@ CreateContainerResult ContainerService::Create(Session& session, const std::stri
int ContainerService::Start(Session& session, const std::string& id, bool attach)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
WSLCContainerStartFlags flags = attach ? WSLCContainerStartFlagsAttach : WSLCContainerStartFlagsNone;
@@ -476,6 +478,7 @@ int ContainerService::Start(Session& session, const std::string& id, bool attach
void ContainerService::Stop(Session& session, const std::string& id, StopContainerOptions options)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
THROW_IF_FAILED_EXCEPT(container->Stop(options.Signal, options.Timeout), WSLC_E_CONTAINER_NOT_RUNNING);
@@ -483,6 +486,7 @@ void ContainerService::Stop(Session& session, const std::string& id, StopContain
void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal signal)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
THROW_IF_FAILED(container->Kill(signal));
@@ -490,6 +494,7 @@ void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal
void ContainerService::Delete(Session& session, const std::string& id, bool force)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
THROW_IF_FAILED(container->Delete(force ? WSLCDeleteFlagsForce : WSLCDeleteFlagsNone));
@@ -544,6 +549,7 @@ std::vector ContainerService::List(
int ContainerService::Exec(Session& session, const std::string& id, ContainerOptions options)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
@@ -575,6 +581,7 @@ int ContainerService::Exec(Session& session, const std::string& id, ContainerOpt
InspectContainer ContainerService::Inspect(Session& session, const std::string& id)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
wil::unique_cotaskmem_ansistring output;
@@ -584,6 +591,7 @@ InspectContainer ContainerService::Inspect(Session& session, const std::string&
void ContainerService::Logs(Session& session, const std::string& id, bool follow, bool timestamps, ULONGLONG since, ULONGLONG until, ULONGLONG tail)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
@@ -611,6 +619,7 @@ void ContainerService::Logs(Session& session, const std::string& id, bool follow
wsl::windows::common::docker_schema::ContainerStats ContainerService::Stats(Session& session, const std::string& id)
{
+ auto operation = session.BeginContainerOperation();
wil::com_ptr container;
THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container));
wil::unique_cotaskmem_ansistring output;
diff --git a/src/windows/wslc/services/SessionModel.h b/src/windows/wslc/services/SessionModel.h
index 7424a8c043..48ce0fe800 100644
--- a/src/windows/wslc/services/SessionModel.h
+++ b/src/windows/wslc/services/SessionModel.h
@@ -19,7 +19,8 @@ namespace wsl::windows::wslc::models {
struct Session
{
- explicit Session(wil::com_ptr session) : m_session(std::move(session))
+ explicit Session(wil::com_ptr session, wil::com_ptr warningCallback = {}) :
+ m_session(std::move(session)), m_warningCallback(std::move(warningCallback))
{
}
IWSLCSession* Get() const noexcept
@@ -27,8 +28,23 @@ struct Session
return m_session.get();
}
+ // Acquires an activity token that keeps the VM alive for the duration of a client-side
+ // container operation (resolve + operate, plus any streamed output). Hold the returned
+ // pointer for the whole operation; releasing it lets the VM idle-terminate again.
+ [[nodiscard]] wil::com_ptr BeginContainerOperation() const
+ {
+ wil::com_ptr operation;
+ THROW_IF_FAILED(m_session->BeginContainerOperation(&operation));
+ return operation;
+ }
+
private:
wil::com_ptr m_session;
+
+ // Kept alive for the lifetime of the session model (i.e. the whole CLI command) so the service
+ // can deliver warnings emitted by lazy/background work — such as resource recovery on the first
+ // VM start — back to this CLI invocation, even though no single COM call carries the callback.
+ wil::com_ptr m_warningCallback;
};
} // namespace wsl::windows::wslc::models
\ No newline at end of file
diff --git a/src/windows/wslc/services/SessionService.cpp b/src/windows/wslc/services/SessionService.cpp
index d382cfea62..eb160bfaab 100644
--- a/src/windows/wslc/services/SessionService.cpp
+++ b/src/windows/wslc/services/SessionService.cpp
@@ -118,7 +118,10 @@ Session SessionService::CreateDefaultSession()
auto warningCallback = Microsoft::WRL::Make();
THROW_IF_FAILED(sessionManager->CreateSession(nullptr, WSLCSessionFlagsNone, warningCallback.Get(), &session));
wsl::windows::common::security::ConfigureForCOMImpersonation(session.get());
- return Session(std::move(session));
+
+ // Hold the warning callback for the lifetime of the session so warnings emitted by the lazy VM
+ // start (e.g. resource recovery) are still delivered to this CLI invocation.
+ return Session(std::move(session), wil::com_ptr(warningCallback.Get()));
}
int SessionService::Enter(const std::wstring& storagePath, const std::wstring& displayName)
diff --git a/src/windows/wslcsession/IORelay.cpp b/src/windows/wslcsession/IORelay.cpp
index 6677bca87a..09f5e05cd4 100644
--- a/src/windows/wslcsession/IORelay.cpp
+++ b/src/windows/wslcsession/IORelay.cpp
@@ -68,11 +68,20 @@ void IORelay::Stop()
}
}
+bool IORelay::IsRelayThread() const noexcept
+{
+ return m_thread.get_id() == std::this_thread::get_id();
+}
+
void IORelay::Run()
try
{
common::wslutil::SetThreadDescription(L"IORelay");
+ // Handle callbacks dispatched from this thread (e.g. unexpected VM exit) can tear the VM down,
+ // releasing cross-process COM proxies, so join the process MTA to avoid RPC_E_WRONG_THREAD.
+ const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED);
+
windows::common::io::MultiHandleWait io;
// N.B. All the IO must happen on the thread.
diff --git a/src/windows/wslcsession/IORelay.h b/src/windows/wslcsession/IORelay.h
index 879d3fee13..844c16dee6 100644
--- a/src/windows/wslcsession/IORelay.h
+++ b/src/windows/wslcsession/IORelay.h
@@ -30,6 +30,12 @@ class IORelay
void Stop();
+ // Returns true if the calling thread is the IORelay's own worker thread (i.e. the call
+ // is being made from a handle callback). Destroying the IORelay from this thread would
+ // join the thread with itself and call std::terminate(), so callers that may run on the
+ // relay thread must check this before destroying the object.
+ bool IsRelayThread() const noexcept;
+
private:
void Start();
void Run();
diff --git a/src/windows/wslcsession/WSLCContainer.cpp b/src/windows/wslcsession/WSLCContainer.cpp
index 7da28ca63d..f3a2c61287 100644
--- a/src/windows/wslcsession/WSLCContainer.cpp
+++ b/src/windows/wslcsession/WSLCContainer.cpp
@@ -675,6 +675,21 @@ void WSLCContainerImpl::CopyTo(IWSLCContainer** Container) const
THROW_IF_FAILED(m_comWrapper.CopyTo(Container));
}
+bool WSLCContainerImpl::IsExternallyReferenced() const noexcept
+{
+ auto lock = m_lock.lock_shared();
+
+ // The impl owns exactly one reference to the COM wrapper (m_comWrapper); any additional
+ // references belong to clients holding marshaled proxies. A null wrapper means the container has
+ // already been disconnected, so there is nothing left to keep the VM alive for.
+ if (m_comWrapper == nullptr)
+ {
+ return false;
+ }
+
+ return m_comWrapper->HasExternalReference();
+}
+
void WSLCContainerImpl::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* Stdout, WSLCHandle* Stderr) const
{
auto lock = m_lock.lock_shared();
@@ -1248,6 +1263,12 @@ void WSLCContainerImpl::Exec(const WSLCProcessOptions* Options, const WSLCProces
} while (!control->GetExitEvent().wait(100));
auto process = wil::MakeOrThrow(std::move(control), std::move(io), Options->Flags);
+
+ // The exec'd process wrapper is handed to the client and is not retained internally, so its
+ // lifetime tracks the client's proxy. Bind a keep-alive token to it so the idle worker does
+ // not tear the VM down (killing the process) while the client still holds the proxy.
+ process->SetKeepAliveToken(m_wslcSession.CreateActivityToken());
+
THROW_IF_FAILED(process.CopyTo(__uuidof(IWSLCProcess), (void**)Process));
}
CATCH_AND_THROW_DOCKER_USER_ERROR("Failed to exec process in container %hs", m_id.c_str());
@@ -2129,11 +2150,54 @@ __requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerSta
m_state = State;
m_stateChangedAt = stateChangedAt.value_or(static_cast(std::time(nullptr)));
+
+ // A container transitioning to a terminal state (e.g. Exited) may leave the session idle.
+ // Ask the session to re-evaluate whether the VM can be torn down. This is a non-blocking signal.
+ m_wslcSession.RequestIdleCheck();
}
WSLCContainer::WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std::function&& OnDeleted) :
COMImplClass(impl), m_session(session), m_onDeleted(std::move(OnDeleted))
{
+ // Bind the idle-check signaler to the session's shared idle state rather than to m_session, so
+ // Release() can wake the idle worker without dereferencing the (possibly torn-down) session. The
+ // captured shared_ptr keeps the idle state alive independently of the session's lifetime.
+ std::shared_ptr idleState = session.m_idleState;
+ m_requestIdleCheck = [idleState = std::move(idleState)]() { idleState->IdleCheckEvent.SetEvent(); };
+}
+
+ULONG STDMETHODCALLTYPE WSLCContainer::Release()
+{
+ // Snapshot the signaler on the stack BEFORE dropping our reference. Once Release() returns, this
+ // object may already be gone: a concurrent owner of the last remaining reference (e.g. container
+ // deletion releasing WSLCContainerImpl::m_comWrapper) can destroy it, and the session itself may
+ // be torn down while a client still holds this proxy. The captured shared state keeps the wake
+ // valid in both cases, so we never touch a member after Release().
+ const std::function requestIdleCheck = m_requestIdleCheck;
+
+ const ULONG count = RuntimeClassBase::Release();
+
+ // A count of 1 means only WSLCContainerImpl::m_comWrapper (the single internal reference) is
+ // left, i.e. a client just released its last proxy. Wake the idle worker so the now-idle VM can
+ // be reclaimed. N.B. at count 0 the object has already been destroyed; we deliberately signal
+ // only through the stack-local snapshot, never a member, on any post-Release path.
+ if (count == 1 && requestIdleCheck)
+ {
+ requestIdleCheck();
+ }
+
+ return count;
+}
+
+bool WSLCContainer::HasExternalReference() noexcept
+{
+ // Read the current reference count without retaining a lasting reference. Call the base
+ // Release() directly (not the override above) so this query never triggers a spurious idle
+ // check, which would otherwise re-arm the idle worker in a busy loop. Safe because the caller
+ // (WSLCContainerImpl, via its m_comWrapper reference) guarantees the count cannot reach zero
+ // and destroy the object here.
+ AddRef();
+ return RuntimeClassBase::Release() > 1;
}
HRESULT WSLCContainer::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* Stdout, WSLCHandle* Stderr)
@@ -2274,6 +2338,11 @@ try
THROW_HR_IF_MSG(E_INVALIDARG, WI_IsAnyFlagSet(Flags, ~WSLCDeleteFlagsValid), "Invalid flags: 0x%x", Flags);
// Special case for Delete(): If deletion is successful, notify the WSLCSession that the container has been deleted.
+ // Hold a VM lease across the whole operation: deleting a container makes it inactive and
+ // can trigger an idle teardown. Without the lease the idle worker could take the session
+ // lock exclusively and clear m_containers (destroying this container) concurrently, racing
+ // the delete and inverting the container->session lock order.
+ auto vmLease = m_session.AcquireVmLease();
auto [lock, impl] = LockImpl();
impl->Delete(Flags);
diff --git a/src/windows/wslcsession/WSLCContainer.h b/src/windows/wslcsession/WSLCContainer.h
index c3ad79288c..00c5e82c9c 100644
--- a/src/windows/wslcsession/WSLCContainer.h
+++ b/src/windows/wslcsession/WSLCContainer.h
@@ -110,6 +110,12 @@ class WSLCContainerImpl
void CopyTo(IWSLCContainer** Container) const;
+ // Returns true if a client still holds a reference to this container's COM wrapper (i.e. the
+ // wrapper's reference count exceeds the single reference owned internally by the impl). Used by
+ // the idle worker so the VM is not torn down out from under an outstanding container proxy,
+ // which would otherwise leave the client with RPC_E_DISCONNECTED.
+ bool IsExternallyReferenced() const noexcept;
+
const std::string& Image() const noexcept;
const std::string& Name() const noexcept;
WSLCContainerState State() const noexcept;
@@ -222,6 +228,9 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer
{
public:
+ using RuntimeClassBase =
+ Microsoft::WRL::RuntimeClass, IWSLCContainer, IFastRundown, ISupportErrorInfo>;
+
WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std::function&& OnDeleted);
IFACEMETHOD(Attach)(_In_opt_ LPCSTR DetachKeys, _Out_ WSLCHandle* Stdin, _Out_ WSLCHandle* Stdout, _Out_ WSLCHandle* Stderr) override;
@@ -244,6 +253,21 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer
IFACEMETHOD(InterfaceSupportsErrorInfo)(REFIID riid);
+ // RuntimeClass reference-count override. When a client releases its last proxy (leaving only the
+ // single internal reference owned by WSLCContainerImpl::m_comWrapper), wake the idle worker so
+ // the VM can be reclaimed. This pairs with WSLCContainerImpl::IsExternallyReferenced(), which
+ // keeps the VM alive while a client still holds a container proxy; without this signal the idle
+ // worker would never re-evaluate after the proxy was released and the VM would stay up forever.
+ // The wake is delivered through the captured m_idleState (not m_session) so it stays safe even
+ // if the wrapper outlives the session or is concurrently destroyed once our reference drops.
+ ULONG STDMETHODCALLTYPE Release() override;
+
+ // Returns true if a client still holds a reference to this wrapper, i.e. the reference count
+ // exceeds the single internal reference owned by WSLCContainerImpl::m_comWrapper. Reads the
+ // count via an AddRef + base Release round-trip that deliberately bypasses the Release() override
+ // above, so querying does not itself wake the idle worker.
+ bool HasExternalReference() noexcept;
+
// Cache read-only properties so they remain accessible after the impl is disconnected.
// Called from WSLCContainerImpl::PrepareDisconnectComWrapper() while m_lock is held exclusively.
void CacheState(const std::string& id, const std::string& name, WSLCContainerState state, const Microsoft::WRL::ComPtr& initProcess) noexcept;
@@ -252,6 +276,14 @@ class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer
WSLCSession& m_session;
std::function m_onDeleted;
+ // Wakes the idle worker when the last client proxy is released (see Release()). Bound at
+ // construction to a lambda that captures the session's shared idle state (WSLCSession::IdleState
+ // held via shared_ptr), so signalling never dereferences m_session: the wrapper can outlive the
+ // session (a client keeps this proxy past releasing the session) and can be concurrently
+ // destroyed the instant Release() drops our reference. The captured shared_ptr keeps the idle
+ // state alive and valid in both cases.
+ std::function m_requestIdleCheck;
+
// Cached read-only properties populated by CacheState() so they remain
// accessible after the impl is disconnected.
mutable wil::srwlock m_cacheLock;
diff --git a/src/windows/wslcsession/WSLCExecutionContext.h b/src/windows/wslcsession/WSLCExecutionContext.h
index 5d75bfed14..b3abec5a0e 100644
--- a/src/windows/wslcsession/WSLCExecutionContext.h
+++ b/src/windows/wslcsession/WSLCExecutionContext.h
@@ -27,7 +27,19 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon
protected:
bool CollectUserWarning(const std::wstring& warning) override
{
- if (m_warningCallback != nullptr)
+ IWarningCallback* callback = m_warningCallback;
+
+ // When the operation carries no explicit callback, fall back to the callback supplied when
+ // the session was created/entered. This routes warnings emitted outside a callback-bearing
+ // operation (e.g. resource recovery during the lazy VM start) back to the session creator.
+ wil::com_ptr sessionCallback;
+ if (callback == nullptr && m_session != nullptr)
+ {
+ sessionCallback = m_session->AcquireWarningCallback();
+ callback = sessionCallback.get();
+ }
+
+ if (callback != nullptr)
{
std::unique_ptr comCallback;
if (m_session != nullptr)
@@ -35,7 +47,7 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon
comCallback = std::make_unique(m_session->RegisterUserCOMCallback());
}
- auto hr = m_warningCallback->OnWarning(warning.c_str());
+ auto hr = callback->OnWarning(warning.c_str());
if (SUCCEEDED(hr) || hr == RPC_E_CALL_CANCELED || hr == HRESULT_FROM_WIN32(ERROR_CANCELLED))
{
return true;
diff --git a/src/windows/wslcsession/WSLCProcess.h b/src/windows/wslcsession/WSLCProcess.h
index cce5543d91..bc4bc0c04d 100644
--- a/src/windows/wslcsession/WSLCProcess.h
+++ b/src/windows/wslcsession/WSLCProcess.h
@@ -41,9 +41,19 @@ class DECLSPEC_UUID("AFBEA6D6-D8A4-4F81-8FED-F947EB74B33B") WSLCProcess
HANDLE GetExitEvent();
int GetPid() const;
+ // Attaches an opaque keep-alive token whose lifetime is bound to this process object. A
+ // root-namespace process is not tracked as a container, so it relies on this token to hold an
+ // activity reference on the owning session for as long as the client keeps the process alive,
+ // preventing the idle worker from tearing the VM down (and killing the process) underneath it.
+ void SetKeepAliveToken(Microsoft::WRL::ComPtr&& Token) noexcept
+ {
+ m_keepAliveToken = std::move(Token);
+ }
+
private:
WSLCProcessFlags m_flags;
std::shared_ptr m_control;
std::unique_ptr m_io;
+ Microsoft::WRL::ComPtr m_keepAliveToken;
};
} // namespace wsl::windows::service::wslc
\ No newline at end of file
diff --git a/src/windows/wslcsession/WSLCProcessControl.cpp b/src/windows/wslcsession/WSLCProcessControl.cpp
index 04e8ff2f8a..4e6da4e045 100644
--- a/src/windows/wslcsession/WSLCProcessControl.cpp
+++ b/src/windows/wslcsession/WSLCProcessControl.cpp
@@ -101,7 +101,15 @@ void DockerContainerProcessControl::OnContainerReleased() noexcept
// Signal the exit event to prevent callers from being blocked on it.
if (!m_exitEvent.is_signaled())
{
- m_exitedCode = 128 + WSLCSignalSIGKILL;
+ // If the container already produced a real exit code (recorded by SetExitCode but not yet
+ // signaled — e.g. an --rm container whose init-exit signal is deferred to the Destroy
+ // event), preserve it. Only synthesize SIGKILL when the container is released without ever
+ // having produced an exit code (an abrupt teardown of a still-running container).
+ if (!m_exitedCode.has_value())
+ {
+ m_exitedCode = 128 + WSLCSignalSIGKILL;
+ }
+
m_exitEvent.SetEvent();
}
}
diff --git a/src/windows/wslcsession/WSLCSession.cpp b/src/windows/wslcsession/WSLCSession.cpp
index 2be375fe99..36fe67bb66 100644
--- a/src/windows/wslcsession/WSLCSession.cpp
+++ b/src/windows/wslcsession/WSLCSession.cpp
@@ -37,6 +37,12 @@ constexpr auto c_dockerdReadyLogLine = "API listen on /var/run/docker.sock";
constexpr DWORD c_processTerminateTimeoutMs = 30 * 1000;
constexpr DWORD c_processKillTimeoutMs = 10 * 1000;
+// Grace period to keep an otherwise-idle VM running before tearing it down. This avoids
+// thrashing the VM (repeated teardown/recreate) when containers are created and destroyed,
+// or operations issued, in quick succession. The clock restarts whenever the VM is observed
+// to be non-idle, so a full grace period of continuous idleness is required before teardown.
+constexpr auto c_vmIdleGracePeriod = std::chrono::seconds(30);
+
namespace {
// Group policy: WSLContainerRegistryAllowlist restricts which container-image
@@ -296,7 +302,7 @@ HRESULT WSLCSession::Initialize(
try
{
RETURN_HR_IF(E_POINTER, Settings == nullptr || VmFactory == nullptr);
- RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_virtualMachine.has_value());
+ RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_vmFactoryGitCookie != 0);
THROW_HR_IF_MSG(
E_INVALIDARG, WI_IsAnyFlagSet(Settings->FeatureFlags, ~WSLCFeatureFlagsValid), "Invalid feature flags: 0x%x", Settings->FeatureFlags);
@@ -307,9 +313,23 @@ try
Settings->StorageFlags);
// Set up a warning context for the duration of initialization so that non-fatal
- // failures (e.g., container/volume/network recovery) are streamed to the CLI.
+ // failures are streamed to the CLI.
WSLCExecutionContext warningContext(this, WarningCallback);
+ // The VM (and storage VHD) is created lazily on the first operation. Validate the storage
+ // configuration eagerly here so misconfiguration is reported at session creation rather than
+ // surfacing later on the first VM-starting operation. With WSLCSessionStorageFlagsNoCreate the
+ // storage VHD must already exist (ConfigureStorage will not create it).
+ if (Settings->StoragePath != nullptr && WI_IsFlagSet(Settings->StorageFlags, WSLCSessionStorageFlagsNoCreate))
+ {
+ const std::filesystem::path storagePath{Settings->StoragePath};
+ THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Settings->StoragePath), !storagePath.is_absolute());
+ THROW_HR_WITH_USER_ERROR_IF(
+ HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND),
+ Localization::MessageWslcSessionStorageNotFound(Settings->StoragePath),
+ !std::filesystem::exists(storagePath / "storage.vhdx"));
+ }
+
// N.B. No locking is required because Initialize() is always called before the session is returned to the caller.
m_id = Settings->SessionId;
m_displayName = Settings->DisplayName ? Settings->DisplayName : L"";
@@ -317,8 +337,24 @@ try
m_featureFlags = Settings->FeatureFlags;
m_pluginNotifier = PluginNotifier;
- // Get user token for the current process
+ // Park the VM factory in the Global Interface Table. It is supplied here (on the call that
+ // creates the session) but used on demand from other threads/apartments; storing the raw
+ // proxy and calling it later would raise RPC_E_WRONG_THREAD.
+ m_git = wil::CoCreateInstance(CLSID_StdGlobalInterfaceTable, CLSCTX_INPROC_SERVER);
+ THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(VmFactory, __uuidof(IWSLCVirtualMachineFactory), &m_vmFactoryGitCookie));
+
+ // Park the warning callback too. The VM (and resource recovery) is created lazily on the
+ // first operation, which may not carry its own warning callback, so recovery warnings are
+ // routed back to this callback via AcquireWarningCallback()/WSLCExecutionContext.
+ if (WarningCallback != nullptr)
+ {
+ THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(WarningCallback, __uuidof(IWarningCallback), &m_warningCallbackGitCookie));
+ }
+
+ // Persist a deep copy of the settings (and the creating user's SID) required to
+ // (re)create the VM on demand.
const auto tokenInfo = wil::get_token_information(GetCurrentProcessToken());
+ PersistSettings(*Settings, tokenInfo->User.Sid);
WSL_LOG(
"SessionInitialized",
@@ -326,58 +362,521 @@ try
TraceLoggingValue(m_displayName.c_str(), "DisplayName"),
TraceLoggingValue(m_creatorProcessName.c_str(), "CreatorProcess"));
- // Create the VM through the factory. The VM produces crash events; the session multiplexes
- // them out to any registered ICrashDumpCallback subscribers via OnCrashDumpWritten.
+ // The VM is created lazily on the first operation that requires it (see EnsureVmRunning)
+ // and torn down when the session becomes idle. Start the worker that performs idle teardown.
+ // The body is wrapped so an unexpected throw (e.g. COM initialization failure before the
+ // worker's own try/catch loop) is logged rather than escaping the thread and calling
+ // std::terminate(), which would crash the session process.
+ m_idleThread = std::thread([this]() {
+ try
+ {
+ IdleWorker();
+ }
+ CATCH_LOG()
+ });
+
+ return S_OK;
+}
+CATCH_RETURN()
+
+void WSLCSession::PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid)
+{
+ m_settings = Settings;
+
+ // Repoint the string fields at storage owned by the session so they outlive the caller's buffers.
+ m_settings.DisplayName = m_displayName.c_str();
+
+ if (Settings.CreatorProcessName != nullptr)
+ {
+ m_settingsCreatorProcessName = Settings.CreatorProcessName;
+ m_settings.CreatorProcessName = m_settingsCreatorProcessName->c_str();
+ }
+ else
+ {
+ m_settings.CreatorProcessName = nullptr;
+ }
+
+ if (Settings.StoragePath != nullptr)
+ {
+ m_settingsStoragePath = Settings.StoragePath;
+ m_settings.StoragePath = m_settingsStoragePath->c_str();
+ }
+ else
+ {
+ m_settings.StoragePath = nullptr;
+ }
+
+ if (Settings.RootVhdTypeOverride != nullptr)
+ {
+ m_settingsRootVhdTypeOverride = Settings.RootVhdTypeOverride;
+ m_settings.RootVhdTypeOverride = m_settingsRootVhdTypeOverride->c_str();
+ }
+ else
+ {
+ m_settings.RootVhdTypeOverride = nullptr;
+ }
+
+ if (UserSid != nullptr)
+ {
+ const auto length = GetLengthSid(UserSid);
+ const auto* bytes = reinterpret_cast(UserSid);
+ m_userSid.assign(bytes, bytes + length);
+ }
+ else
+ {
+ m_userSid.clear();
+ }
+}
+
+bool WSLCSession::IdleTerminationEnabled() const noexcept
+{
+ // Only tear the VM down when there is persistent storage to recover from. A tmpfs-backed
+ // session would lose all image/container state on teardown, so its VM is kept alive once started.
+ return m_settings.StoragePath != nullptr;
+}
+
+void WSLCSession::EnsureVmRunning()
+{
+ if (m_vmState.load() == VmState::Running)
+ {
+ return;
+ }
+
+ auto lock = m_lock.lock_exclusive();
+
+ // Do not (re)start the VM once the session is terminating or has terminated. This also
+ // bounds VmLease's retry loop: a lease that races with Terminate() fails here instead of
+ // restarting a VM that is being permanently torn down.
+ THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), m_terminating.load() || m_sessionTerminatedEvent.is_signaled());
+
+ if (m_vmState.load() == VmState::Running)
+ {
+ return;
+ }
+
+ StartVmLockHeld();
+}
+
+void WSLCSession::StartVmLockHeld()
+{
+ WI_ASSERT(m_vmState.load() != VmState::Running);
+
+ WSL_LOG("WslcVmStarting", TraceLoggingValue(m_id, "SessionId"));
+
+ m_vmState.store(VmState::Starting);
+ m_vmStopRequested.store(false);
+
+ // Tear everything back down if bring-up fails partway through.
+ auto startCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() {
+ TearDownVmLockHeld();
+ m_vmState.store(VmState::None);
+ });
+
+ // Create a fresh IO relay for this VM instance. The previous one (if any) was stopped
+ // during teardown and cannot be restarted.
+ m_ioRelay.emplace();
+
+ // Create the VM via the factory. Re-fetch the factory from the GIT so we call it through a
+ // proxy marshalled into this thread's apartment (see m_git). The VM produces crash events;
+ // the session multiplexes them out to any registered ICrashDumpCallback subscribers via
+ // OnCrashDumpWritten.
+ wil::com_ptr vmFactory;
+ THROW_IF_FAILED(m_git->GetInterfaceFromGlobal(m_vmFactoryGitCookie, __uuidof(IWSLCVirtualMachineFactory), vmFactory.put_void()));
+
wil::com_ptr vm;
- THROW_IF_FAILED(VmFactory->CreateVirtualMachine(&vm));
+ THROW_IF_FAILED(vmFactory->CreateVirtualMachine(&vm));
m_virtualMachine.emplace(
vm.get(),
- Settings,
+ &m_settings,
m_sessionTerminatingEvent.get(),
std::bind(&WSLCSession::OnCrashDumpWritten, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5));
-
- // Make sure that everything is destroyed correctly if an exception is thrown.
- auto errorCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { LOG_IF_FAILED(Terminate()); });
-
m_virtualMachine->Initialize();
// Get an event from the service that is signaled when the VM exits.
+ m_vmExitedEvent.reset();
THROW_IF_FAILED(vm->GetTerminationEvent(&m_vmExitedEvent));
// Configure storage.
- ConfigureStorage(*Settings, tokenInfo->User.Sid);
+ ConfigureStorage(m_settings, m_userSid.empty() ? nullptr : reinterpret_cast(m_userSid.data()));
- // Launch containerd first
+ // Launch containerd first, then dockerd with the external containerd socket.
StartContainerd();
- // Launch dockerd with external containerd socket
+ // Reset the readiness event before (re)starting dockerd so a stale signal from a prior
+ // VM instance is not observed.
+ m_dockerdReadyEvent.ResetEvent();
StartDockerd();
// Wait for dockerd to be ready before starting the event tracker.
THROW_WIN32_IF_MSG(
- ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(Settings->BootTimeoutMs), "Timed out waiting for dockerd to start");
+ ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(m_settings.BootTimeoutMs), "Timed out waiting for dockerd to start");
auto [_, __, channel] = m_virtualMachine->Fork(WSLC_FORK::Thread);
m_dockerClient.emplace(std::move(channel), m_virtualMachine->TerminatingEvent(), m_virtualMachine->VmId(), 10 * 1000);
// Start the event tracker.
- m_eventTracker.emplace(m_dockerClient.value(), *this, m_ioRelay);
+ m_eventTracker.emplace(m_dockerClient.value(), *this, *m_ioRelay);
m_volumes.emplace(m_dockerClient.value(), m_virtualMachine.value(), m_eventTracker.value(), m_storageVhdPath.parent_path());
// Monitor for unexpected VM exit.
- m_ioRelay.AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this)));
+ m_ioRelay->AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this)));
// Recover any existing resources from storage.
RecoverExistingNetworks();
RecoverExistingContainers();
- errorCleanup.release();
- return S_OK;
+ m_vmState.store(VmState::Running);
+ m_vmStartCount.fetch_add(1);
+ startCleanup.release();
+
+ WSL_LOG("WslcVmStarted", TraceLoggingValue(m_id, "SessionId"));
+}
+
+void WSLCSession::StopVmLockHeld()
+{
+ if (m_vmState.load() != VmState::Running)
+ {
+ return;
+ }
+
+ WSL_LOG("WslcVmIdleStop", TraceLoggingValue(m_id, "SessionId"));
+
+ // Flag the teardown as intentional so VM/dockerd/containerd exit callbacks (which fire
+ // from the IO relay thread while we hold the lock) do not treat it as a crash.
+ m_vmStopRequested.store(true);
+ m_vmState.store(VmState::Stopping);
+
+ TearDownVmLockHeld();
+
+ m_vmState.store(VmState::None);
+ m_vmStopRequested.store(false);
+}
+
+void WSLCSession::TearDownVmLockHeld(bool CaptureTerminationReason)
+{
+ std::lock_guard containersLock(m_containersLock);
+ std::lock_guard networksLock(m_networksLock);
+
+ m_containers.clear();
+ m_volumes.reset();
+ m_networks.clear();
+
+ // Stop the IO relay.
+ // This stops:
+ // - container state monitoring.
+ // - container init process relays
+ // - execs relays
+ // - container logs relays
+ if (m_ioRelay)
+ {
+ m_ioRelay->Stop();
+ }
+
+ {
+ std::lock_guard allocatedPortsLock(m_allocatedPortsLock);
+ m_allocatedPorts.clear();
+ }
+
+ m_eventTracker.reset();
+ m_dockerClient.reset();
+
+ if (CaptureTerminationReason)
+ {
+ // Default: an explicit/graceful teardown is a shutdown (the VM is still alive and we are
+ // bringing it down). Overridden below if the VM exited on its own and recorded a cause.
+ m_terminationReason = WSLCVirtualMachineTerminationReasonShutdown;
+ }
+
+ // Check if the VM has already exited (e.g., killed externally).
+ // If so, skip operations that require a live VM to avoid unnecessary waits.
+ // N.B. m_vmExitedEvent may be uninitialized if teardown runs before GetTerminationEvent() succeeds.
+ if (m_vmExitedEvent && m_vmExitedEvent.is_signaled())
+ {
+ WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId"));
+
+ // The VM exited on its own, so it recorded the cause.
+ if (CaptureTerminationReason && m_virtualMachine)
+ {
+ wil::unique_cotaskmem_string details;
+ LOG_IF_FAILED(m_virtualMachine->GetTerminationReason(&m_terminationReason, &details));
+ m_terminationDetails = details ? details.get() : L"";
+ }
+ }
+ else if (m_virtualMachine)
+ {
+ m_virtualMachine->OnSessionTerminated();
+
+ // Stop dockerd first, then containerd (dockerd is a client of containerd).
+ // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened.
+ if (m_dockerdProcess.has_value())
+ {
+ auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs);
+ WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code"));
+ }
+
+ if (m_containerdProcess.has_value())
+ {
+ auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs);
+ WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code"));
+ }
+
+ // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running.
+ try
+ {
+ m_virtualMachine->Unmount(c_containerdStorage);
+ }
+ CATCH_LOG();
+ }
+
+ m_dockerdProcess.reset();
+ m_containerdProcess.reset();
+ m_virtualMachine.reset();
+
+ // Destroy the (stopped) relay so the next StartVm can create a fresh one.
+ //
+ // N.B. The relay must NOT be destroyed from its own thread: ~IORelay joins the relay
+ // thread, and joining a thread from itself calls std::terminate(). This situation arises
+ // on the unexpected-VM-exit path, where OnVmExited() runs on the relay thread and drives
+ // Terminate() -> TearDownVmLockHeld(). In that (terminal) case the stopped relay and the
+ // VM-exit event it watches are left in place and destroyed later by ~WSLCSession on a
+ // different thread. On the idle-stop and external-terminate paths this runs on a non-relay
+ // thread, so both are destroyed here. (StartVmLockHeld resets m_vmExitedEvent before reuse.)
+ if (!m_ioRelay || !m_ioRelay->IsRelayThread())
+ {
+ m_ioRelay.reset();
+ m_vmExitedEvent.reset();
+ }
+
+ // Delete the ephemeral swap VHD now that the VM is gone.
+ if (!m_swapVhdPath.empty())
+ {
+ LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str()));
+ m_swapVhdPath.clear();
+ }
+}
+
+bool WSLCSession::HasActiveContainerLockHeld()
+{
+ std::lock_guard containersLock(m_containersLock);
+
+ // A container in the Created or Running state keeps the VM alive (it is non-terminal and
+ // may still be started/used). Exited containers do not -- unless a client still holds a proxy
+ // to the container's COM wrapper, in which case tearing the VM down would disconnect that proxy
+ // (RPC_E_DISCONNECTED). Keep the VM alive while any container is non-terminal or still
+ // externally referenced.
+ return std::ranges::any_of(m_containers, [](const auto& entry) {
+ const auto state = entry.second->State();
+ if (state == WslcContainerStateCreated || state == WslcContainerStateRunning)
+ {
+ return true;
+ }
+
+ return entry.second->IsExternallyReferenced();
+ });
+}
+
+void WSLCSession::RequestIdleCheck() noexcept
+{
+ if (m_idleState->IdleCheckEvent)
+ {
+ m_idleState->IdleCheckEvent.SetEvent();
+ }
+}
+
+void WSLCSession::IdleWorker()
+{
+ // Idle teardown releases cross-process COM proxies (the VM and its VM-scoped state), so this
+ // thread must join the process MTA; otherwise those Release/calls fail with RPC_E_WRONG_THREAD.
+ const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED);
+
+ const HANDLE handles[] = {m_idleState->IdleCheckEvent.get(), m_sessionTerminatingEvent.get()};
+
+ // Absolute time at which a continuously-idle VM becomes eligible for teardown. Unset while
+ // the VM is non-idle; (re)armed when the VM is first observed idle. The wait below times out
+ // at this deadline so teardown happens promptly once the grace period elapses.
+ std::optional idleDeadline;
+
+ for (;;)
+ {
+ DWORD timeout = INFINITE;
+ if (idleDeadline.has_value())
+ {
+ const auto now = std::chrono::steady_clock::now();
+ timeout = (*idleDeadline <= now)
+ ? 0
+ : static_cast(std::chrono::duration_cast(*idleDeadline - now).count());
+ }
+
+ const auto wait = WaitForMultipleObjects(ARRAYSIZE(handles), handles, FALSE, timeout);
+
+ // handles[1] (session terminating) or a wait failure ends the worker. handles[0] (idle
+ // check) and WAIT_TIMEOUT (grace period may have elapsed) both trigger a re-evaluation.
+ if (wait != WAIT_OBJECT_0 && wait != WAIT_TIMEOUT)
+ {
+ break;
+ }
+
+ if (wait == WAIT_OBJECT_0)
+ {
+ m_idleState->IdleCheckEvent.ResetEvent();
+
+ // An explicit idle-check signal means an operation or container state change just
+ // completed (it is raised on every lease/token release and terminal state change).
+ // Restart the grace clock so teardown happens a full grace period after the last
+ // activity, not after the first time the VM was ever observed idle.
+ idleDeadline.reset();
+ }
+
+ if (m_terminating.load())
+ {
+ break;
+ }
+
+ if (!IdleTerminationEnabled())
+ {
+ idleDeadline.reset();
+ continue;
+ }
+
+ try
+ {
+ // Use a non-blocking acquire. A blocking exclusive acquire would queue behind any
+ // in-flight operation's shared VmLease and, because SRW locks favor a waiting writer,
+ // would stall every new operation behind it until that operation completed. A
+ // long-running operation (e.g. a blocking SaveImage/Export) would therefore serialize
+ // all concurrent operations. If the lock is currently held an operation is in flight,
+ // so treat it as activity and re-evaluate on the next idle-check signal (raised when
+ // that operation releases its lease).
+ auto lock = m_lock.try_lock_exclusive();
+ if (!lock)
+ {
+ idleDeadline.reset();
+ continue;
+ }
+ if (m_terminating.load() || m_vmState.load() != VmState::Running)
+ {
+ idleDeadline.reset();
+ continue;
+ }
+
+ // If the VM's exit event is already signaled, the VM has crashed and OnVmExited()
+ // (running on the IO relay thread) is about to drive Terminate(). Do NOT tear the VM
+ // down here: StopVmLockHeld() would join the relay thread while this exclusive lock is
+ // held, and the relay thread can be simultaneously blocked acquiring that same lock
+ // inside Terminate()'s retry loop — a deadlock. Leave crash handling to the relay-thread
+ // path, whose Stop() self-join is a no-op and therefore cannot deadlock.
+ if (m_vmExitedEvent && m_vmExitedEvent.is_signaled())
+ {
+ idleDeadline.reset();
+ continue;
+ }
+
+ // Keep the VM alive while any operation is in flight or any container is non-terminal,
+ // and restart the grace clock so a fresh idle period is required afterwards.
+ if (m_idleState->ActivityCount.load() != 0 || HasActiveContainerLockHeld())
+ {
+ idleDeadline.reset();
+ continue;
+ }
+
+ // The VM is idle. Arm the grace period if the clock is not already running, then
+ // defer teardown until it has fully elapsed. The clock is reset by any non-idle
+ // observation or explicit idle-check signal (see the WAIT_OBJECT_0 handling above),
+ // so a WAIT_TIMEOUT wake here means the VM has been idle for the whole grace period.
+ const auto now = std::chrono::steady_clock::now();
+ if (!idleDeadline.has_value())
+ {
+ idleDeadline = now + c_vmIdleGracePeriod;
+ }
+
+ if (now < *idleDeadline)
+ {
+ continue;
+ }
+
+ idleDeadline.reset();
+ StopVmLockHeld();
+ }
+ CATCH_LOG();
+ }
+}
+
+WSLCSession::VmLease WSLCSession::AcquireVmLease()
+{
+ return VmLease(*this);
+}
+
+WSLCSession::VmLease::VmLease(WSLCSession& Session) : m_session(&Session)
+{
+ // Record an in-flight operation before bringing the VM up so the idle worker cannot tear
+ // it down between EnsureVmRunning() and acquiring the shared lock.
+ m_session->m_idleState->ActivityCount.fetch_add(1);
+
+ auto countCleanup = wil::scope_exit([this]() {
+ m_session->m_idleState->ActivityCount.fetch_sub(1);
+ m_session = nullptr;
+ });
+
+ // The idle worker may complete a teardown in the window between EnsureVmRunning() and our
+ // shared-lock acquisition (it could have committed to the stop before our activity-count
+ // increment was visible). Our increment prevents any *future* idle teardown, so retry until
+ // we hold the shared lock with the VM running. This is bounded: once the increment is visible
+ // the idle worker will not stop the VM again, so at most one restart is needed.
+ // N.B. EnsureVmRunning() throws if the session has been terminated, which breaks the loop.
+ for (;;)
+ {
+ m_session->EnsureVmRunning();
+
+ m_lock = m_session->m_lock.lock_shared();
+
+ if (m_session->m_vmState.load() == VmState::Running)
+ {
+ break;
+ }
+
+ m_lock.reset();
+ }
+
+ countCleanup.release();
+}
+
+WSLCSession::VmLease::VmLease(VmLease&& Other) noexcept :
+ m_session(std::exchange(Other.m_session, nullptr)), m_lock(std::move(Other.m_lock))
+{
+}
+
+WSLCSession::VmLease& WSLCSession::VmLease::operator=(VmLease&& Other) noexcept
+{
+ if (this != &Other)
+ {
+ if (m_session != nullptr)
+ {
+ m_lock.reset();
+ m_session->m_idleState->ActivityCount.fetch_sub(1);
+ m_session->RequestIdleCheck();
+ }
+
+ m_session = std::exchange(Other.m_session, nullptr);
+ m_lock = std::move(Other.m_lock);
+ }
+
+ return *this;
+}
+
+WSLCSession::VmLease::~VmLease()
+{
+ if (m_session != nullptr)
+ {
+ // Release the shared lock before triggering the idle check so the idle worker can
+ // immediately take the exclusive lock if the session is now idle.
+ m_lock.reset();
+ m_session->m_idleState->ActivityCount.fetch_sub(1);
+ m_session->RequestIdleCheck();
+ }
}
-CATCH_RETURN()
WSLCSession::~WSLCSession()
{
@@ -496,7 +995,7 @@ HRESULT WSLCSession::GetId(ULONG* Id)
void WSLCSession::OnDockerdExited()
{
- if (!m_sessionTerminatingEvent.is_signaled())
+ if (!m_sessionTerminatingEvent.is_signaled() && !m_vmStopRequested.load())
{
WSL_LOG("UnexpectedDockerdExit", TraceLoggingValue(m_displayName.c_str(), "Name"));
}
@@ -504,7 +1003,7 @@ void WSLCSession::OnDockerdExited()
void WSLCSession::OnContainerdExited()
{
- if (!m_sessionTerminatingEvent.is_signaled())
+ if (!m_sessionTerminatingEvent.is_signaled() && !m_vmStopRequested.load())
{
WSL_LOG("UnexpectedContainerdExit", TraceLoggingValue(m_displayName.c_str(), "Name"));
}
@@ -512,6 +1011,15 @@ void WSLCSession::OnContainerdExited()
void WSLCSession::OnVmExited()
{
+ // A teardown we initiated (idle shutdown) is in progress — the VM exit is expected and
+ // must not terminate the session. N.B. This runs on the IO relay thread; the flag is set
+ // under the exclusive lock before the relay is stopped, so it is visible here.
+ if (m_vmStopRequested.load())
+ {
+ WSL_LOG("WslcVmExitedDuringStop", TraceLoggingValue(m_id, "SessionId"));
+ return;
+ }
+
WSL_LOG(
"VmExited",
TraceLoggingLevel(WINEVENT_LEVEL_WARNING),
@@ -554,13 +1062,13 @@ ServiceRunningProcess WSLCSession::StartProcess(
auto process = launcher.Launch(*m_virtualMachine);
- m_ioRelay.AddHandle(std::make_unique(
+ m_ioRelay->AddHandle(std::make_unique(
process.GetStdHandle(1), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false));
- m_ioRelay.AddHandle(std::make_unique(
+ m_ioRelay->AddHandle(std::make_unique(
process.GetStdHandle(2), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false));
- m_ioRelay.AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback)));
+ m_ioRelay->AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback)));
return process;
}
@@ -733,7 +1241,7 @@ try
auto [repo, tagOrDigest] = wslutil::ParseImage(Image);
EnforceRegistryAllowlist(repo);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
if (!tagOrDigest.has_value())
@@ -790,7 +1298,7 @@ try
comCall = RegisterUserCOMCallback();
}
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
@@ -1100,7 +1608,7 @@ try
WSLCExecutionContext context(this, WarningCallback);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1126,7 +1634,7 @@ try
THROW_HR_IF_MSG(E_INVALIDARG, !tagOrDigest.has_value(), "Expected tag for image import: %hs", ImageName);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1241,7 +1749,7 @@ try
RETURN_HR_IF_NULL(E_POINTER, ImageNameOrID);
RETURN_HR_IF(E_INVALIDARG, strlen(ImageNameOrID) > WSLC_MAX_IMAGE_NAME_LENGTH);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1274,7 +1782,7 @@ try
names.emplace_back(ImageNames->Values[i]);
}
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1352,7 +1860,7 @@ try
filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount);
}
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1461,7 +1969,7 @@ try
*DeletedImages = nullptr;
*Count = 0;
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1535,7 +2043,7 @@ try
RETURN_HR_IF_NULL(E_POINTER, Options->Tag);
RETURN_HR_IF(E_INVALIDARG, strlen(Options->Repo) + strlen(Options->Tag) + 1 > WSLC_MAX_IMAGE_NAME_LENGTH);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
@@ -1572,7 +2080,7 @@ try
auto [repo, tagOrDigest] = wslutil::ParseImage(Image);
EnforceRegistryAllowlist(repo);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
auto requestContext = m_dockerClient->PushImage(repo, tagOrDigest, RegistryAuthenticationInformation);
@@ -1593,7 +2101,7 @@ try
*Output = nullptr;
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
*Output = wil::make_unique_ansistring(InspectImageLockHeld(ImageNameOrId).c_str()).release();
@@ -1641,7 +2149,7 @@ try
*IdentityToken = nullptr;
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
wil::unique_cotaskmem_ansistring token;
@@ -1673,7 +2181,7 @@ try
auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
docker_schema::PruneImageResult pruneResult;
@@ -1734,7 +2242,7 @@ try
"Invalid process flags: 0x%x",
containerOptions->InitProcessOptions.Flags);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
auto result = wil::ResultFromException([&]() { CreateContainerImpl(containerOptions, Container); });
@@ -1810,7 +2318,7 @@ void WSLCSession::CreateContainerImpl(const WSLCContainerOptions* containerOptio
std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1),
m_eventTracker.value(),
m_dockerClient.value(),
- m_ioRelay);
+ *m_ioRelay);
// Key the map by Docker's container ID, which is set in the WSLCContainerImpl constructor and stable for its lifetime.
auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container));
@@ -1843,7 +2351,7 @@ try
ValidateName(Id, WSLC_MAX_CONTAINER_NAME_LENGTH);
// Look for an exact ID match first.
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
std::lock_guard containersLock{m_containersLock};
// Purge containers that were auto-deleted via OnEvent (--rm).
@@ -1882,6 +2390,90 @@ try
}
CATCH_RETURN();
+namespace {
+
+ // Activity token created by WSLCSession::CreateActivityToken (returned directly by
+ // BeginContainerOperation, and bound to a root-namespace process's lifetime by
+ // CreateRootNamespaceProcess). While a client holds it, the session's activity count is
+ // non-zero, so the idle worker will not tear the VM down. It runs a release callback (which
+ // decrements the activity count via the shared IdleState, without keeping the session alive)
+ // when the client releases it or exits. It implements IFastRundown so that if the holding
+ // client crashes, COM reclaims the stub promptly (rather than via slow default rundown) and the
+ // VM can idle-terminate without a multi-minute delay.
+ class ContainerOperation
+ : public Microsoft::WRL::RuntimeClass, IUnknown, IFastRundown>
+ {
+ public:
+ // Adopts an activity-count reference already taken by CreateActivityToken; the callback
+ // releases it.
+ void Initialize(std::function&& onRelease) noexcept
+ {
+ m_onRelease = std::move(onRelease);
+ }
+
+ ~ContainerOperation() override
+ {
+ if (m_onRelease)
+ {
+ m_onRelease();
+ }
+ }
+
+ private:
+ std::function m_onRelease;
+ };
+
+} // namespace
+
+Microsoft::WRL::ComPtr WSLCSession::CreateActivityToken()
+{
+ // Record the in-flight activity up front so the VM cannot idle-terminate before the caller
+ // takes ownership of the returned token.
+ m_idleState->ActivityCount.fetch_add(1);
+ auto countCleanup = wil::scope_exit([this]() {
+ m_idleState->ActivityCount.fetch_sub(1);
+ RequestIdleCheck();
+ });
+
+ auto operation = Microsoft::WRL::Make();
+ THROW_IF_NULL_ALLOC(operation.Get());
+
+ // Capture the shared idle state rather than the session itself: the token may outlive the
+ // session (e.g. a client keeps a root-namespace process proxy past releasing the session), and
+ // it must not keep the session alive. On release it decrements the activity count and wakes the
+ // idle worker; if the session is already gone this is a harmless no-op (no idle worker waits on
+ // the event). N.B. releasing the token therefore never blocks an explicit session teardown.
+ std::shared_ptr idleState = m_idleState;
+ operation->Initialize([idleState = std::move(idleState)]() {
+ idleState->ActivityCount.fetch_sub(1);
+ idleState->IdleCheckEvent.SetEvent();
+ });
+
+ // The token now owns the activity-count reference and will release it on destruction.
+ countCleanup.release();
+
+ Microsoft::WRL::ComPtr token;
+ THROW_IF_FAILED(operation.As(&token));
+ return token;
+}
+
+HRESULT WSLCSession::BeginContainerOperation(IUnknown** Operation)
+try
+{
+ WSLCExecutionContext context(this);
+
+ RETURN_HR_IF_NULL(E_POINTER, Operation);
+ *Operation = nullptr;
+
+ // Record the in-flight operation up front so the VM cannot idle-terminate before the client
+ // resolves the container and issues the operation (and streams any output).
+ auto token = CreateActivityToken();
+
+ RETURN_IF_FAILED(token.CopyTo(Operation));
+ return S_OK;
+}
+CATCH_RETURN();
+
HRESULT WSLCSession::ListContainers(
const WSLCListContainersOptions* Options, WSLCContainerEntry** Containers, ULONG* Count, WSLCContainerPortMapping** Ports, ULONG* PortsCount)
try
@@ -1916,7 +2508,7 @@ try
filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount);
}
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
std::vector dockerContainers;
@@ -1995,7 +2587,7 @@ try
auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value());
std::lock_guard containersLock{m_containersLock};
@@ -2066,10 +2658,18 @@ try
*Errno = -1; // Make sure not to return 0 if something fails.
}
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
auto process = m_virtualMachine->CreateLinuxProcess(Executable, *Options, TtyRows, TtyColumns, Errno);
+
+ // The VmLease above is released when this call returns, but the process keeps running in the
+ // VM and the client holds the returned proxy. A root-namespace process is not tracked as a
+ // container, so attach an activity token bound to the process's lifetime; this keeps the VM
+ // alive for as long as the client holds the process, preventing the idle worker from tearing
+ // the VM down and killing the process out from under the client.
+ process->SetKeepAliveToken(CreateActivityToken());
+
THROW_IF_FAILED(process.CopyTo(Process));
return S_OK;
@@ -2092,7 +2692,7 @@ try
THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Path), !std::filesystem::path(Path).is_absolute());
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
// Attach the disk to the VM (AttachDisk() performs the access check for the VHD file).
@@ -2120,7 +2720,7 @@ try
auto driverOpts = wslutil::ParseKeyValuePairs(Options->DriverOpts, Options->DriverOptsCount);
auto labels = wslutil::ParseKeyValuePairs(Options->Labels, Options->LabelsCount, WSLCVolumeMetadataLabel);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes);
if (Options->Name != nullptr && Options->Name[0] != '\0')
@@ -2140,7 +2740,7 @@ try
RETURN_HR_IF_NULL(E_POINTER, Name);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes);
m_volumes->DeleteVolume(Name);
@@ -2161,7 +2761,7 @@ try
auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes);
auto volumeList = m_volumes->ListVolumes(std::move(filters));
@@ -2193,7 +2793,7 @@ try
std::string name = Name;
ValidateName(name.c_str(), WSLC_MAX_VOLUME_NAME_LENGTH);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes);
std::string json = m_volumes->InspectVolume(name);
@@ -2218,7 +2818,7 @@ try
auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes);
WSLCVolumes::PruneVolumesResult pruneResult;
@@ -2298,7 +2898,7 @@ try
auto driverOpts = wslutil::ParseKeyValuePairs(Options->DriverOpts, Options->DriverOptsCount);
auto labels = wslutil::ParseKeyValuePairs(Options->Labels, Options->LabelsCount, WSLCNetworkManagedLabel);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient);
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
@@ -2399,7 +2999,7 @@ try
std::string name = Name;
ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient);
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
@@ -2439,7 +3039,7 @@ try
*Networks = nullptr;
*Count = 0;
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
std::lock_guard networksLock(m_networksLock);
if (m_networks.empty())
@@ -2478,7 +3078,7 @@ try
std::string name = Name;
ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
std::lock_guard networksLock(m_networksLock);
auto it = m_networks.find(name);
@@ -2589,81 +3189,56 @@ try
// Acquire an exclusive lock to ensure that no operation is running.
WI_VERIFY(sessionLock);
- std::lock_guard containersLock(m_containersLock);
- std::lock_guard networksLock(m_networksLock);
+ // Tear down the VM (if running) and all VM-scoped state, capturing the termination reason.
+ // This mirrors the soft teardown used for idle shutdown, but here it is permanent.
+ TearDownVmLockHeld(/* CaptureTerminationReason */ true);
- m_containers.clear();
- m_volumes.reset();
- m_networks.clear();
+ m_vmState.store(VmState::None);
- // Stop the IO relay.
- // This stops:
- // - container state monitoring.
- // - container init process relays
- // - execs relays
- // - container logs relays
- m_ioRelay.Stop();
+ // Signal completion last so any observer of the terminated event sees a fully torn-down
+ // session and a populated termination reason.
+ m_sessionTerminatedEvent.SetEvent();
+
+ // Release the exclusive lock before joining the idle worker. If the worker is currently
+ // blocked acquiring the exclusive lock (about to evaluate idle teardown), it must be able
+ // to obtain it, observe m_terminating, and exit — otherwise the join below would deadlock.
+ sessionLock.reset();
+ if (m_idleThread.joinable())
{
- std::lock_guard allocatedPortsLock(m_allocatedPortsLock);
- m_allocatedPorts.clear();
+ m_idleThread.join();
}
- m_eventTracker.reset();
- m_dockerClient.reset();
-
- // Check if the VM has already exited (e.g., killed externally).
- // If so, skip operations that require a live VM to avoid unnecessary waits.
- // N.B. m_vmExitedEvent may be uninitialized if Terminate() is called from the
- // Initialize() error path before GetTerminationEvent() succeeds.
- if (m_vmExitedEvent && m_vmExitedEvent.is_signaled())
+ // The idle worker has exited and no operation can run past termination, so the parked VM
+ // factory can no longer be re-fetched; revoke it from the GIT.
+ if (m_vmFactoryGitCookie != 0)
{
- WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId"));
+ LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_vmFactoryGitCookie));
+ m_vmFactoryGitCookie = 0;
}
- else
- {
- if (m_virtualMachine)
- {
- m_virtualMachine->OnSessionTerminated();
- // Stop dockerd first, then containerd (dockerd is a client of containerd).
- // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened.
- if (m_dockerdProcess.has_value())
- {
- auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs);
- WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code"));
- }
-
- if (m_containerdProcess.has_value())
- {
- auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs);
- WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code"));
- }
-
- // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running.
- try
- {
- m_virtualMachine->Unmount(c_containerdStorage);
- }
- CATCH_LOG();
- }
+ if (m_warningCallbackGitCookie != 0)
+ {
+ LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_warningCallbackGitCookie));
+ m_warningCallbackGitCookie = 0;
}
- m_dockerdProcess.reset();
- m_containerdProcess.reset();
- m_virtualMachine.reset();
+ return S_OK;
+}
+CATCH_RETURN();
- // Delete the ephemeral swap VHD now that the VM is gone.
- if (!m_swapVhdPath.empty())
+wil::com_ptr WSLCSession::AcquireWarningCallback() const
+{
+ wil::com_ptr callback;
+ if (m_warningCallbackGitCookie != 0)
{
- LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str()));
- m_swapVhdPath.clear();
+ // Best-effort: the creating client's proxy may already be gone (e.g. the CLI exited before
+ // a later VM restart), in which case the warning falls through to the default sink.
+ LOG_IF_FAILED(m_git->GetInterfaceFromGlobal(m_warningCallbackGitCookie, __uuidof(IWarningCallback), callback.put_void()));
}
- m_terminated = true;
- return S_OK;
+ return callback;
}
-CATCH_RETURN();
HRESULT WSLCSession::RegisterCrashDumpCallback(_In_ ICrashDumpCallback* Callback, _Out_ IUnknown** Subscription)
try
@@ -2728,7 +3303,7 @@ try
RETURN_HR_IF_NULL(E_POINTER, WindowsPath);
RETURN_HR_IF_NULL(E_POINTER, LinuxPath);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
return m_virtualMachine->MountWindowsFolder(WindowsPath, LinuxPath, ReadOnly);
@@ -2742,7 +3317,7 @@ try
RETURN_HR_IF_NULL(E_POINTER, LinuxPath);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
return m_virtualMachine->UnmountWindowsFolder(LinuxPath);
@@ -2754,7 +3329,7 @@ try
{
WSLCExecutionContext context(this);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
std::lock_guard allocatedPortsLock(m_allocatedPortsLock);
@@ -2800,7 +3375,7 @@ try
{
WSLCExecutionContext context(this);
- auto lock = m_lock.lock_shared();
+ auto lock = AcquireVmLease();
THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine);
std::lock_guard allocatedPortsLock(m_allocatedPortsLock);
@@ -2948,7 +3523,11 @@ void WSLCSession::CancelUserCOMCallbacks()
void WSLCSession::OnContainerDeleted(const WSLCContainerImpl* Container)
{
- auto lock = m_lock.lock_shared();
+ // N.B. Invoked only from WSLCContainer::Delete, which already holds a VmLease (the shared
+ // session lock). The lease prevents a concurrent idle teardown from clearing m_containers,
+ // so this only needs m_containersLock. It must NOT re-acquire the shared session lock here:
+ // doing so while the idle worker is queued for the exclusive lock would deadlock (recursive
+ // shared acquire behind a pending writer).
std::lock_guard containersLock(m_containersLock);
WI_VERIFY(m_containers.erase(Container->ID()) == 1);
@@ -2958,7 +3537,51 @@ HRESULT WSLCSession::GetState(_Out_ WSLCSessionState* State)
{
RETURN_HR_IF_NULL(E_POINTER, State);
- *State = m_terminated ? WSLCSessionStateTerminated : WSLCSessionStateRunning;
+ *State = m_sessionTerminatedEvent.is_signaled() ? WSLCSessionStateTerminated : WSLCSessionStateRunning;
+ return S_OK;
+}
+
+HRESULT WSLCSession::GetTerminationEvent(_Out_ HANDLE* Event)
+try
+{
+ RETURN_HR_IF(E_POINTER, Event == nullptr);
+
+ *Event = nullptr;
+
+ // Duplicate the "terminated" event. The caller owns the returned handle, which stays valid even after the session is released.
+ *Event = wsl::windows::common::wslutil::DuplicateHandle(m_sessionTerminatedEvent.get(), SYNCHRONIZE);
+
+ return S_OK;
+}
+CATCH_RETURN();
+
+HRESULT WSLCSession::GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details)
+try
+{
+ RETURN_HR_IF(E_POINTER, Reason == nullptr || Details == nullptr);
+
+ *Reason = WSLCVirtualMachineTerminationReasonUnknown;
+ *Details = nullptr;
+
+ // m_terminationReason/m_terminationDetails are written once before m_sessionTerminatedEvent is
+ // signaled and never modified afterward, so observing the signaled event safely publishes them.
+ RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_sessionTerminatedEvent.is_signaled());
+
+ *Reason = m_terminationReason;
+ *Details = wil::make_cotaskmem_string(m_terminationDetails.c_str()).release();
+
+ return S_OK;
+}
+CATCH_RETURN();
+
+HRESULT WSLCSession::GetVmDiagnostics(_Out_ WSLCVmDiagnostics* Diagnostics)
+{
+ RETURN_HR_IF_NULL(E_POINTER, Diagnostics);
+
+ // Reads atomics only: this must not acquire a VM lease or otherwise bring the VM up,
+ // so callers can observe idle termination without keeping the VM alive.
+ Diagnostics->Running = m_vmState.load() == VmState::Running;
+ Diagnostics->StartCount = m_vmStartCount.load();
return S_OK;
}
@@ -2983,7 +3606,7 @@ void WSLCSession::RecoverExistingContainers()
std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1),
m_eventTracker.value(),
m_dockerClient.value(),
- m_ioRelay);
+ *m_ioRelay);
auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container));
WI_ASSERT(inserted);
diff --git a/src/windows/wslcsession/WSLCSession.h b/src/windows/wslcsession/WSLCSession.h
index 5c7bee2227..f8e0a2c907 100644
--- a/src/windows/wslcsession/WSLCSession.h
+++ b/src/windows/wslcsession/WSLCSession.h
@@ -22,7 +22,10 @@ Module Name:
#include "DockerEventTracker.h"
#include "DockerHTTPClient.h"
#include "IORelay.h"
+#include
#include
+#include
+#include
#include
namespace wsl::windows::service::wslc {
@@ -70,11 +73,16 @@ class UserCOMCallback
//
// WSLCSession - Implements IWSLCSession for container management.
// Runs in a per-user COM server process for security isolation.
-// The SYSTEM service creates the VM and passes IWSLCVirtualMachine to Initialize().
+// The SYSTEM service passes an IWSLCVirtualMachineFactory to Initialize(); the VM is created
+// lazily on first use and may be torn down when idle and recreated on demand.
//
class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
: public Microsoft::WRL::RuntimeClass, IWSLCSession, IFastRundown, ISupportErrorInfo>
{
+ // WSLCContainer::Delete acquires a VmLease to keep the VM alive (and block idle
+ // teardown) for the duration of a container deletion.
+ friend class WSLCContainer;
+
public:
WSLCSession() = default;
@@ -98,6 +106,9 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
IFACEMETHOD(GetId)(_Out_ ULONG* Id) override;
IFACEMETHOD(GetState)(_Out_ WSLCSessionState* State) override;
+ IFACEMETHOD(GetTerminationEvent)(_Out_ HANDLE* Event) override;
+ IFACEMETHOD(GetTerminationReason)(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) override;
+ IFACEMETHOD(GetVmDiagnostics)(_Out_ WSLCVmDiagnostics* Diagnostics) override;
// Image management.
IFACEMETHOD(PullImage)(
@@ -135,6 +146,7 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
// Container management.
IFACEMETHOD(CreateContainer)(_In_ const WSLCContainerOptions* Options, _In_opt_ IWarningCallback* WarningCallback, _Out_ IWSLCContainer** Container) override;
IFACEMETHOD(OpenContainer)(_In_ LPCSTR Id, _In_ IWSLCContainer** Container) override;
+ IFACEMETHOD(BeginContainerOperation)(_Outptr_ IUnknown** Operation) override;
IFACEMETHOD(ListContainers)(
_In_opt_ const WSLCListContainersOptions* Options,
_Out_ WSLCContainerEntry** Containers,
@@ -201,6 +213,13 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
UserCOMCallback RegisterUserCOMCallback();
void UnregisterUserCOMCallback(DWORD ThreadId);
+ // Returns the warning callback supplied when the session was created/entered, re-marshalled
+ // into the calling apartment. Used as a fallback by WSLCExecutionContext so that warnings
+ // emitted by operations that carry no explicit callback (e.g. resource recovery during the
+ // lazy VM start) still reach the session creator. Returns null if no callback was supplied
+ // or the creating client's proxy is no longer reachable.
+ wil::com_ptr AcquireWarningCallback() const;
+
HANDLE SessionTerminatingEvent() const noexcept
{
return m_sessionTerminatingEvent.get();
@@ -213,9 +232,79 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
bool WaitForEventOrSessionTerminating(HANDLE Event, std::chrono::milliseconds Timeout) const;
+ // Signals the idle worker to re-evaluate whether the VM can be torn down.
+ // Safe to call from any thread, including IO relay / container callbacks.
+ void RequestIdleCheck() noexcept;
+
+ // Creates an opaque activity token that holds a reference on this session's activity count for
+ // its lifetime, deferring idle teardown of the VM until every outstanding token is released.
+ // Used both for transient client operations (BeginContainerOperation) and to keep the VM alive
+ // for the lifetime of a process whose wrapper a client may keep (root-namespace and exec'd
+ // processes).
+ Microsoft::WRL::ComPtr CreateActivityToken();
+
+ // Idle-activity state shared between the session and any outstanding activity tokens. Held via
+ // shared_ptr so a token (or a container COM wrapper) can outlive the session (e.g. a client
+ // keeps a root-namespace process or container proxy past releasing the session) and still
+ // safely release its activity reference / wake the idle worker without keeping the session
+ // object alive. Tearing down the session therefore proceeds normally; a late token release
+ // simply decrements the count and signals an event with no waiter. Public so the container COM
+ // wrapper (WSLCContainer) can hold a shared_ptr to it; see WSLCContainer::Release().
+ struct IdleState
+ {
+ std::atomic ActivityCount{0};
+ wil::unique_event IdleCheckEvent{wil::EventOptions::ManualReset};
+ };
+
private:
ULONG m_id = 0;
+ // VM lifecycle state for on-demand creation / idle termination.
+ enum class VmState
+ {
+ None,
+ Starting,
+ Running,
+ Stopping,
+ };
+
+ _Requires_exclusive_lock_held_(m_lock)
+ void StartVmLockHeld();
+ _Requires_exclusive_lock_held_(m_lock)
+ void StopVmLockHeld();
+ _Requires_exclusive_lock_held_(m_lock)
+ void TearDownVmLockHeld(bool CaptureTerminationReason = false);
+ _Requires_exclusive_lock_held_(m_lock)
+ bool HasActiveContainerLockHeld();
+ void EnsureVmRunning();
+
+ void IdleWorker();
+ bool IdleTerminationEnabled() const noexcept;
+ void PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid);
+
+ // RAII lease taken at the top of every VM-requiring operation. On construction it
+ // ensures the VM is running and records an in-flight operation so idle teardown is
+ // deferred; it then holds the shared session lock for the operation's duration. On
+ // destruction it releases the lock and triggers an idle check.
+ class VmLease
+ {
+ public:
+ VmLease() = default;
+ explicit VmLease(WSLCSession& Session);
+ VmLease(VmLease&& Other) noexcept;
+ VmLease& operator=(VmLease&& Other) noexcept;
+ ~VmLease();
+
+ VmLease(const VmLease&) = delete;
+ VmLease& operator=(const VmLease&) = delete;
+
+ private:
+ WSLCSession* m_session{};
+ wil::rwlock_release_shared_scope_exit m_lock;
+ };
+
+ [[nodiscard]] VmLease AcquireVmLease();
+
__requires_lock_held(m_userHandlesLock) void CancelUserHandleIO();
__requires_lock_held(m_userCOMCallbacksLock) void CancelUserCOMCallbacks();
@@ -253,6 +342,19 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
void StreamImageOperation(DockerHTTPClient::HTTPRequestContext& requestContext, LPCSTR Image, LPCSTR OperationName, IProgressCallback* ProgressCallback);
std::optional m_dockerClient;
+
+ // The VM factory is a cross-process proxy supplied by the SYSTEM service at Initialize() time
+ // but first used later (on demand) from a different thread/apartment. A directly stored proxy
+ // would fail with RPC_E_WRONG_THREAD, so it is parked in the process Global Interface Table and
+ // re-fetched (re-marshalled into the calling apartment) each time a VM is created.
+ wil::com_ptr m_git;
+ DWORD m_vmFactoryGitCookie{};
+
+ // The warning callback supplied at Initialize() is parked in the GIT for the same reason as
+ // the VM factory: it is used later, on demand, from other threads/apartments (a directly
+ // stored proxy would fail with RPC_E_WRONG_THREAD). Zero if no callback was supplied.
+ DWORD m_warningCallbackGitCookie{};
+
std::optional m_virtualMachine;
std::optional m_eventTracker;
wil::unique_event m_dockerdReadyEvent{wil::EventOptions::ManualReset};
@@ -271,15 +373,38 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
std::mutex m_networksLock;
std::unordered_map m_networks;
wil::unique_event m_sessionTerminatingEvent{wil::EventOptions::ManualReset};
+ wil::unique_event m_sessionTerminatedEvent{wil::EventOptions::ManualReset};
wil::unique_event m_vmExitedEvent;
+
+ WSLCVirtualMachineTerminationReason m_terminationReason{WSLCVirtualMachineTerminationReasonUnknown};
+ std::wstring m_terminationDetails;
wil::srwlock m_lock;
- IORelay m_ioRelay;
+ std::optional m_ioRelay;
+
+ // VM lifecycle / idle-termination state.
+ std::atomic m_vmState{VmState::None};
+ std::atomic m_vmStopRequested{false};
+ // Number of times the VM has been (re)created; surfaced via GetVmDiagnostics.
+ std::atomic m_vmStartCount{0};
+ // In-flight activity count and idle-worker wake event, decoupled from this object's lifetime
+ // (see IdleState) so activity tokens never extend the session's lifetime.
+ std::shared_ptr m_idleState{std::make_shared()};
+ std::thread m_idleThread;
+
+ // Persisted settings required to (re)create the VM on demand. The string fields point
+ // into the owned storage members below (or m_displayName) so they remain valid for the
+ // lifetime of the session.
+ WSLCSessionInitSettings m_settings{};
+ std::optional m_settingsCreatorProcessName;
+ std::optional m_settingsStoragePath;
+ std::optional m_settingsRootVhdTypeOverride;
+ std::vector m_userSid;
+
std::optional m_containerdProcess;
std::optional m_dockerdProcess;
WSLCFeatureFlags m_featureFlags{};
std::function m_destructionCallback;
std::atomic m_terminating{false};
- std::atomic m_terminated{false};
wil::com_ptr m_pluginNotifier;
diff --git a/src/windows/wslcsession/WSLCVirtualMachine.h b/src/windows/wslcsession/WSLCVirtualMachine.h
index dbdf1e2483..f6e438c5c0 100644
--- a/src/windows/wslcsession/WSLCVirtualMachine.h
+++ b/src/windows/wslcsession/WSLCVirtualMachine.h
@@ -170,6 +170,12 @@ class WSLCVirtualMachine
return m_vmTerminatingEvent.get();
}
+ // Retrieves the cached termination reason and details from the underlying VM.
+ HRESULT GetTerminationReason(_Out_ WSLCVirtualMachineTerminationReason* Reason, _Out_ LPWSTR* Details) const
+ {
+ return m_vm->GetTerminationReason(Reason, Details);
+ }
+
GUID VmId() const
{
return m_vmId;
diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp
index 1327dbb907..a30a8911ab 100644
--- a/test/windows/WSLCTests.cpp
+++ b/test/windows/WSLCTests.cpp
@@ -2876,46 +2876,26 @@ class WSLCTests
}
}
- WSLC_TEST_METHOD(TerminationCallback)
+ WSLC_TEST_METHOD(TerminationEvent)
{
- class DECLSPEC_UUID("7BC4E198-6531-4FA6-ADE2-5EF3D2A04DFF") CallbackInstance
- : public Microsoft::WRL::RuntimeClass, ITerminationCallback, IFastRundown>
- {
-
- public:
- CallbackInstance(std::function&& callback) :
- m_callback(std::move(callback))
- {
- }
+ auto session = CreateSession(GetDefaultSessionSettings(L"termination-event-test"));
- HRESULT OnTermination(WSLCVirtualMachineTerminationReason Reason, LPCWSTR Details) override
- {
- m_callback(Reason, Details);
- return S_OK;
- }
+ wil::unique_handle terminationEvent;
+ VERIFY_SUCCEEDED(session->GetTerminationEvent(&terminationEvent));
+ VERIFY_IS_NOT_NULL(terminationEvent.get());
- private:
- std::function m_callback;
- };
+ // The reason is unavailable until the session has terminated.
+ WSLCVirtualMachineTerminationReason reason{};
+ wil::unique_cotaskmem_string details;
+ VERIFY_ARE_EQUAL(session->GetTerminationReason(&reason, &details), HRESULT_FROM_WIN32(ERROR_INVALID_STATE));
- std::promise> promise;
+ // Terminating the session should signal the event and record a graceful shutdown reason.
+ VERIFY_SUCCEEDED(session->Terminate());
- CallbackInstance callback{[&](WSLCVirtualMachineTerminationReason reason, LPCWSTR details) {
- promise.set_value(std::make_pair(reason, details));
- }};
+ VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0));
- WSLCSessionSettings sessionSettings = GetDefaultSessionSettings(L"termination-callback-test");
- sessionSettings.TerminationCallback = &callback;
-
- auto session = CreateSession(sessionSettings);
-
- session.reset();
- auto future = promise.get_future();
- auto result = future.wait_for(std::chrono::seconds(30));
- VERIFY_ARE_EQUAL(result, std::future_status::ready);
- auto [reason, details] = future.get();
+ VERIFY_SUCCEEDED(session->GetTerminationReason(&reason, &details));
VERIFY_ARE_EQUAL(reason, WSLCVirtualMachineTerminationReasonShutdown);
- VERIFY_ARE_NOT_EQUAL(details, L"");
}
WSLC_TEST_METHOD(CrashDumpCallback)
@@ -10358,6 +10338,10 @@ class WSLCTests
auto settings = GetDefaultSessionSettings(c_sessionName);
auto session = CreateSession(settings);
+ // Session creation is lazy, so start the VM by launching a process before killing it.
+ WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "60"});
+ auto process = launcher.Launch(*session);
+
KillVmByOwner(c_sessionName);
WaitForSessionTermination(session.get());
@@ -10449,6 +10433,10 @@ class WSLCTests
VERIFY_SUCCEEDED(sessionManager2->CreateSession(&settings2, WSLCSessionFlagsNone, warningCallback.Get(), &session2));
wsl::windows::common::security::ConfigureForCOMImpersonation(session2.get());
+ // The VM (and container recovery) starts lazily on the first operation. Trigger it so
+ // recovery runs and its warning is delivered to the session's warning callback.
+ WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session2).GetExitEvent().wait(30000);
+
// Verify the warning matches the expected localized message for the corrupt container.
auto warnings = warningCallback->GetWarnings();
auto expectedWarning = std::format(
@@ -10517,6 +10505,10 @@ class WSLCTests
VERIFY_SUCCEEDED(sessionManager->CreateSession(&settings, WSLCSessionFlagsNone, warningCallback.Get(), &session));
wsl::windows::common::security::ConfigureForCOMImpersonation(session.get());
+ // The VM (and volume recovery) starts lazily on the first operation. Trigger it so
+ // recovery runs and its warning is delivered to the session's warning callback.
+ WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session).GetExitEvent().wait(30000);
+
// Verify the warning matches the expected localized message for the missing volume.
auto warnings = warningCallback->GetWarnings();
auto expectedWarning =
diff --git a/test/windows/WslcSdkTests.cpp b/test/windows/WslcSdkTests.cpp
index 2588a13fe2..28b6be3e1e 100644
--- a/test/windows/WslcSdkTests.cpp
+++ b/test/windows/WslcSdkTests.cpp
@@ -267,60 +267,53 @@ class WslcSdkTests
VERIFY_ARE_EQUAL(WslcCreateSession(nullptr, &session2, nullptr), E_POINTER);
}
- WSLC_TEST_METHOD(TerminationCallbackViaTerminate)
+ WSLC_TEST_METHOD(TerminationEventViaTerminate)
{
- std::promise promise;
-
- auto callback = [](WslcSessionTerminationReason reason, PVOID context) {
- auto* p = static_cast*>(context);
- p->set_value(reason);
- };
-
- std::filesystem::path extraStorage = m_storagePath / "wslc-termcb-term-storage";
+ std::filesystem::path extraStorage = m_storagePath / "wslc-termevt-term-storage";
WslcSessionSettings sessionSettings;
- VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termcb-term-test", extraStorage.c_str(), &sessionSettings));
+ VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termevt-term-test", extraStorage.c_str(), &sessionSettings));
VERIFY_SUCCEEDED(WslcSetSessionSettingsTimeout(&sessionSettings, 30 * 1000));
- VERIFY_SUCCEEDED(WslcSetSessionSettingsTerminationCallback(&sessionSettings, callback, &promise));
UniqueSession session;
VERIFY_SUCCEEDED(WslcCreateSession(&sessionSettings, &session, nullptr));
- // Terminating the session should trigger a graceful shutdown and fire the callback.
+ wil::unique_handle terminationEvent;
+ VERIFY_SUCCEEDED(WslcGetSessionTerminationEvent(session.get(), &terminationEvent));
+ VERIFY_IS_NOT_NULL(terminationEvent.get());
+
+ // Terminating the session should trigger a graceful shutdown and signal the event.
VERIFY_SUCCEEDED(WslcTerminateSession(session.get()));
- auto future = promise.get_future();
- VERIFY_ARE_EQUAL(future.wait_for(std::chrono::seconds(30)), std::future_status::ready);
- VERIFY_ARE_EQUAL(future.get(), WSLC_SESSION_TERMINATION_REASON_SHUTDOWN);
+ VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0));
+
+ WslcSessionTerminationReason reason = WSLC_SESSION_TERMINATION_REASON_UNKNOWN;
+ VERIFY_SUCCEEDED(WslcGetSessionTerminationReason(session.get(), &reason));
+ VERIFY_ARE_EQUAL(reason, WSLC_SESSION_TERMINATION_REASON_SHUTDOWN);
}
- WSLC_TEST_METHOD(TerminationCallbackViaRelease)
+ WSLC_TEST_METHOD(TerminationEventViaRelease)
{
- std::promise promise;
-
- auto callback = [](WslcSessionTerminationReason reason, PVOID context) {
- auto* p = static_cast*>(context);
- p->set_value(reason);
- };
-
- std::filesystem::path extraStorage = m_storagePath / "wslc-termcb-release-storage";
+ std::filesystem::path extraStorage = m_storagePath / "wslc-termevt-release-storage";
WslcSessionSettings sessionSettings;
- VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termcb-release-test", extraStorage.c_str(), &sessionSettings));
+ VERIFY_SUCCEEDED(WslcInitSessionSettings(L"wslc-termevt-release-test", extraStorage.c_str(), &sessionSettings));
VERIFY_SUCCEEDED(WslcSetSessionSettingsTimeout(&sessionSettings, 30 * 1000));
- VERIFY_SUCCEEDED(WslcSetSessionSettingsTerminationCallback(&sessionSettings, callback, &promise));
UniqueSession session;
VERIFY_SUCCEEDED(WslcCreateSession(&sessionSettings, &session, nullptr));
- // Releasing the session should trigger a graceful shutdown and fire the callback.
+ // The termination event is owned by the caller and stays valid even after the session is released.
+ wil::unique_handle terminationEvent;
+ VERIFY_SUCCEEDED(WslcGetSessionTerminationEvent(session.get(), &terminationEvent));
+ VERIFY_IS_NOT_NULL(terminationEvent.get());
+
+ // Releasing the session should trigger a graceful shutdown and signal the event.
VERIFY_SUCCEEDED(WslcReleaseSession(session.get()));
- // Calling WslcSessionRelease will destroy the session
+ // Calling WslcReleaseSession will destroy the session.
session.release();
- auto future = promise.get_future();
- VERIFY_ARE_EQUAL(future.wait_for(std::chrono::seconds(30)), std::future_status::ready);
- VERIFY_ARE_EQUAL(future.get(), WSLC_SESSION_TERMINATION_REASON_SHUTDOWN);
+ VERIFY_ARE_EQUAL(WaitForSingleObject(terminationEvent.get(), 30 * 1000), static_cast(WAIT_OBJECT_0));
}
WSLC_TEST_METHOD(CrashDumpCallback)
diff --git a/test/windows/wslc/e2e/WSLCE2EHelpers.h b/test/windows/wslc/e2e/WSLCE2EHelpers.h
index d05940c58e..79c6d2f8ff 100644
--- a/test/windows/wslc/e2e/WSLCE2EHelpers.h
+++ b/test/windows/wslc/e2e/WSLCE2EHelpers.h
@@ -111,6 +111,11 @@ struct TestSession
return m_storagePath;
}
+ IWSLCSession* Session() const
+ {
+ return m_session.get();
+ }
+
private:
std::wstring m_name;
std::filesystem::path m_storagePath;
diff --git a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp
index c10f432951..c512452cf8 100644
--- a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp
+++ b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp
@@ -109,8 +109,14 @@ class WSLCE2ESessionEnterTests
WSLC_TEST_METHOD(WSLCE2E_SessionEnter_StoragePathNotFound)
{
auto result = RunWslc(L"system session enter does-not-exist");
+
+ // The CLI resolves the storage argument to an absolute path (see EnterSession task) and the
+ // service validates it eagerly at session creation, reporting the friendly "No WSLC session
+ // found in ''" message rather than a bare system error.
+ const auto storagePath = std::filesystem::absolute(L"does-not-exist").wstring();
result.Verify({
- .Stderr = L"The system cannot find the path specified. \r\nError code: ERROR_PATH_NOT_FOUND\r\n",
+ .Stderr = wsl::shared::Localization::MessageWslcSessionStorageNotFound(storagePath) +
+ L"\r\nError code: ERROR_PATH_NOT_FOUND\r\n",
.ExitCode = 1,
});
}
diff --git a/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp b/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp
new file mode 100644
index 0000000000..2c10b02698
--- /dev/null
+++ b/test/windows/wslc/e2e/WSLCE2EVmIdleTests.cpp
@@ -0,0 +1,213 @@
+/*++
+
+Copyright (c) Microsoft. All rights reserved.
+
+Module Name:
+
+ WSLCE2EVmIdleTests.cpp
+
+Abstract:
+
+ End-to-end tests for on-demand / idle-terminating wslc VMs. A session's backing VM is
+ created lazily on the first VM-requiring operation and torn down again once there are no
+ active (Created or Running) containers and no in-flight operations, while the per-user
+ session survives across VM restarts. VM lifecycle is observed via
+ IWSLCSession::GetVmDiagnostics, which reads state without bringing the VM up.
+
+--*/
+
+#include "precomp.h"
+#include "windows/Common.h"
+#include "WSLCExecutor.h"
+#include "WSLCE2EHelpers.h"
+#include
+
+namespace WSLCE2ETests {
+using namespace wsl::shared;
+
+class WSLCE2EVmIdleTests
+{
+ WSLC_TEST_CLASS(WSLCE2EVmIdleTests)
+
+ const TestImage& AlpineImage = AlpineTestImage();
+
+ static WSLCVmDiagnostics QueryDiagnostics(const TestSession& session)
+ {
+ WSLCVmDiagnostics diagnostics{};
+ VERIFY_SUCCEEDED(session.Session()->GetVmDiagnostics(&diagnostics));
+ return diagnostics;
+ }
+
+ // Polls VM diagnostics until the VM reaches the desired running state. Idle teardown and
+ // on-demand bring-up happen asynchronously, so callers must wait rather than assume.
+ static void WaitForVmRunningState(const TestSession& session, bool running)
+ {
+ retry::RetryWithTimeout(
+ [&]() {
+ const auto diagnostics = QueryDiagnostics(session);
+ THROW_HR_IF(E_FAIL, static_cast(diagnostics.Running) != running);
+ },
+ std::chrono::milliseconds(250),
+ std::chrono::seconds(60));
+ }
+
+ // A freshly created session has no VM until the first VM-requiring operation arrives, and
+ // the VM idle-terminates once that operation completes with nothing left active.
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_LazyStartAndIdleStop)
+ {
+ auto session = TestSession::Create(L"wslc-vmidle-lazy");
+
+ const auto initial = QueryDiagnostics(session);
+ VERIFY_IS_FALSE(static_cast(initial.Running));
+ VERIFY_ARE_EQUAL(initial.StartCount, 0ul);
+
+ // The first VM-requiring operation brings the VM up on demand.
+ EnsureImageIsLoaded(AlpineImage, session.Name());
+
+ // With no Created/Running containers and no in-flight operations, the VM tears down.
+ WaitForVmRunningState(session, false);
+
+ const auto afterIdle = QueryDiagnostics(session);
+ VERIFY_IS_TRUE(afterIdle.StartCount >= 1ul);
+ }
+
+ // After the VM idle-terminates, a subsequent operation recreates it from scratch and any
+ // previously loaded images remain available (storage persists across VM restarts).
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_RecreateOnDemandAndPersistState)
+ {
+ auto session = TestSession::Create(L"wslc-vmidle-recreate");
+
+ EnsureImageIsLoaded(AlpineImage, session.Name());
+ WaitForVmRunningState(session, false);
+ const auto startCountBeforeRecreate = QueryDiagnostics(session).StartCount;
+
+ // Running a container recreates the VM, runs to completion, then idles again.
+ RunWslcAndVerify(
+ std::format(L"container run --session {} --rm {} echo hello", session.Name(), AlpineImage.NameAndTag()),
+ {.Stderr = L"", .ExitCode = 0});
+
+ WaitForVmRunningState(session, false);
+ const auto startCountAfterRecreate = QueryDiagnostics(session).StartCount;
+ VERIFY_IS_TRUE(startCountAfterRecreate > startCountBeforeRecreate);
+
+ // The image loaded before the restart survived the VM teardown/recreate cycle.
+ auto images = RunWslc(std::format(L"image list --session {}", session.Name()));
+ images.Verify({.Stderr = L"", .ExitCode = 0});
+ VERIFY_IS_TRUE(images.StdoutContainsSubstring(L"alpine"));
+ }
+
+ // A container in the Created state (created but never started) counts as active and keeps
+ // the VM alive; removing it lets the VM idle-terminate.
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_CreatedContainerKeepsVmAlive)
+ {
+ constexpr auto containerName = L"wslc-vmidle-created";
+ auto session = TestSession::Create(L"wslc-vmidle-created-session");
+
+ EnsureImageIsLoaded(AlpineImage, session.Name());
+
+ RunWslcAndVerify(
+ std::format(L"container create --session {} --name {} {} sleep 3600", session.Name(), containerName, AlpineImage.NameAndTag()),
+ {.Stderr = L"", .ExitCode = 0});
+
+ // The VM must stay up while a Created container exists, even with nothing running.
+ WaitForVmRunningState(session, true);
+ std::this_thread::sleep_for(std::chrono::seconds(3));
+ VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running));
+
+ // Removing the only container drops the active count to zero and the VM idles.
+ RunWslcAndVerify(std::format(L"container rm --session {} {}", session.Name(), containerName), {.Stderr = L"", .ExitCode = 0});
+
+ WaitForVmRunningState(session, false);
+ }
+
+ // A long-lived root-namespace process (created via CreateRootNamespaceProcess) is not tracked
+ // as a container, so it does not contribute to the active-container check. It must nonetheless
+ // keep the VM alive for as long as the client holds the returned process, via the activity
+ // token bound to the process's lifetime. Without that token the idle worker would tear the VM
+ // down once the grace period elapsed, killing the process out from under the client.
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_RootProcessKeepsVmAlive)
+ {
+ auto session = TestSession::Create(L"wslc-vmidle-rootproc");
+
+ // Launch a long-running root-namespace process and keep the returned process object alive.
+ // This brings the VM up on demand to host the process.
+ wsl::windows::common::WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "3600"});
+ std::optional process = launcher.Launch(*session.Session());
+
+ WaitForVmRunningState(session, true);
+
+ // The VM must remain running past the idle grace period (30s) while the process is held,
+ // even though there are no containers and no in-flight operations. Without the keep-alive
+ // token the idle worker would have torn the VM down ~30s after the creating call returned,
+ // so a generous margin past the grace period reliably catches that regression.
+ std::this_thread::sleep_for(std::chrono::seconds(40));
+ VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running));
+
+ // Releasing the process proxy drops the activity count to zero and the VM idle-terminates.
+ process.reset();
+ WaitForVmRunningState(session, false);
+ }
+
+ // A client may hold a proxy to a container that has exited and is therefore no longer "active"
+ // by state. Tearing the VM down would disconnect that proxy (leaving the client with
+ // RPC_E_DISCONNECTED), so the idle worker must keep the VM alive while any container proxy is
+ // outstanding -- and reclaim it promptly once the client releases the proxy. This is the
+ // container analogue of the root-process keep-alive above.
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_HeldContainerProxyKeepsVmAlive)
+ {
+ auto session = TestSession::Create(L"wslc-vmidle-heldcontainer");
+
+ EnsureImageIsLoaded(AlpineImage, session.Name());
+
+ // Launch a container that exits almost immediately, then keep the returned proxy. Once it has
+ // exited it no longer counts as active by state, so only the held proxy can keep the VM up.
+ wsl::windows::common::WSLCContainerLauncher launcher(
+ wsl::shared::string::WideToMultiByte(AlpineImage.NameAndTag()),
+ "wslc-vmidle-heldcontainer",
+ {"/bin/true"},
+ {},
+ "none");
+
+ std::optional container = launcher.Launch(*session.Session(), WSLCContainerStartFlagsNone);
+
+ // Exercise the pure proxy-release path (not container deletion) as the trigger for teardown.
+ container->SetDeleteOnClose(false);
+
+ // Wait for the container to exit so it no longer keeps the VM alive by being Created/Running.
+ retry::RetryWithTimeout(
+ [&]() { THROW_HR_IF(E_FAIL, container->State() != WslcContainerStateExited); },
+ std::chrono::milliseconds(250),
+ std::chrono::seconds(60));
+
+ // The VM must remain running well past the idle grace period (30s) while the exited
+ // container's proxy is held. Without the pin the idle worker would tear the VM down ~30s
+ // after the launch returned, so a generous margin past the grace period catches that
+ // regression reliably.
+ std::this_thread::sleep_for(std::chrono::seconds(40));
+ VERIFY_IS_TRUE(static_cast(QueryDiagnostics(session).Running));
+
+ // Releasing the container proxy drops the last external reference and the VM idle-terminates.
+ container.reset();
+ WaitForVmRunningState(session, false);
+ }
+
+ // arrives while teardown may still be in flight. All operations must succeed (no spurious
+ // ERROR_INVALID_STATE from racing a VM that is stopping).
+ WSLC_TEST_METHOD(WSLCE2E_VmIdle_ConcurrentRecreateDoesNotFail)
+ {
+ auto session = TestSession::Create(L"wslc-vmidle-stress");
+
+ EnsureImageIsLoaded(AlpineImage, session.Name());
+
+ for (int i = 0; i < 12; i++)
+ {
+ // Intentionally do not wait between iterations so each lease races the previous
+ // run's idle teardown.
+ auto result = RunWslc(
+ std::format(L"container run --session {} --rm {} echo iteration-{}", session.Name(), AlpineImage.NameAndTag(), i));
+ result.Verify({.Stderr = L"", .ExitCode = 0});
+ }
+ }
+};
+
+} // namespace WSLCE2ETests