diff --git a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs index 0582961..62c380d 100644 --- a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs @@ -14,15 +14,20 @@ public class ScreenCapability : NodeCapabilityBase private static readonly string[] _commands = new[] { "screen.capture", - "screen.list" - // Future: "screen.record" + "screen.list", + "screen.record", + "screen.record.start", + "screen.record.stop", }; - + public override IReadOnlyList Commands => _commands; - + // Events for UI/platform-specific implementation public event Func>? CaptureRequested; public event Func>? ListRequested; + public event Func>? RecordRequested; + public event Func>? StartRequested; + public event Func>? StopRequested; public ScreenCapability(IOpenClawLogger logger) : base(logger) { @@ -32,8 +37,11 @@ public override async Task ExecuteAsync(NodeInvokeRequest re { return request.Command switch { - "screen.capture" => await HandleCaptureAsync(request), - "screen.list" => await HandleListAsync(request), + "screen.capture" => await HandleCaptureAsync(request), + "screen.list" => await HandleListAsync(request), + "screen.record" => await HandleRecordAsync(request), + "screen.record.start" => await HandleStartAsync(request), + "screen.record.stop" => await HandleStopAsync(request), _ => Error($"Unknown command: {request.Command}") }; } @@ -114,6 +122,143 @@ private async Task HandleListAsync(NodeInvokeRequest request return Error($"List failed: {ex.Message}"); } } + + private async Task HandleRecordAsync(NodeInvokeRequest request) + { + var durationMs = GetIntArg(request.Args, "durationMs", 5000); + var fps = GetIntArg(request.Args, "fps", 10); + var screenIndex = GetIntArg(request.Args, "screenIndex", GetIntArg(request.Args, "monitor", 0)); + + Logger.Info($"screen.record: durationMs={durationMs} fps={fps} screenIndex={screenIndex}"); + + if (RecordRequested == null) + return Error("Screen recording not available"); + + try + { + var result = await RecordRequested(new ScreenRecordArgs + { + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + }); + + return Success(new + { + format = result.Format, + base64 = result.Base64, + filePath = result.FilePath, + durationMs = result.DurationMs, + fps = result.Fps, + screenIndex = result.ScreenIndex, + width = result.Width, + height = result.Height, + hasAudio = result.HasAudio, + }); + } + catch (Exception ex) + { + Logger.Error("screen.record failed", ex); + return Error($"Record failed: {ex.GetType().Name}: {ex.Message} | {ex.StackTrace?.Split('\n').FirstOrDefault()?.Trim()}"); + } + } + + private async Task HandleStartAsync(NodeInvokeRequest request) + { + var fps = GetIntArg(request.Args, "fps", 10); + var screenIndex = GetIntArg(request.Args, "screenIndex", GetIntArg(request.Args, "monitor", 0)); + + Logger.Info($"screen.record.start: fps={fps} screenIndex={screenIndex}"); + + if (StartRequested == null) + return Error("Screen recording not available"); + + try + { + var recordingId = await StartRequested(new ScreenRecordStartArgs + { + Fps = fps, + ScreenIndex = screenIndex, + }); + return Success(new { recordingId }); + } + catch (Exception ex) + { + Logger.Error("screen.record.start failed", ex); + return Error($"Start failed: {ex.Message}"); + } + } + + private async Task HandleStopAsync(NodeInvokeRequest request) + { + var recordingId = GetStringArg(request.Args, "recordingId", ""); + + Logger.Info($"screen.record.stop: recordingId={recordingId}"); + + if (string.IsNullOrEmpty(recordingId)) + return Error("recordingId is required"); + + if (StopRequested == null) + return Error("Screen recording not available"); + + try + { + var result = await StopRequested(recordingId); + return Success(new + { + format = result.Format, + base64 = result.Base64, + filePath = result.FilePath, + durationMs = result.DurationMs, + fps = result.Fps, + screenIndex = result.ScreenIndex, + width = result.Width, + height = result.Height, + hasAudio = result.HasAudio, + }); + } + catch (Exception ex) + { + Logger.Error("screen.record.stop failed", ex); + return Error($"Stop failed: {ex.Message}"); + } + } +} + +/// +/// Parameters for a fixed-duration screen recording. +/// Memory usage: width × height × 4 bytes × (durationMs/1000 × fps) frames. +/// Recommended limits: durationMs ≤ 10 000, fps ≤ 10 for 1080p to stay under 500 MB. +/// The service enforces a hard 500 MB frame-buffer cap and stops capture early if exceeded. +/// +public class ScreenRecordArgs +{ + public int DurationMs { get; set; } = 5000; + public int Fps { get; set; } = 10; + public int ScreenIndex { get; set; } +} + +/// +/// Parameters for an open-ended screen recording session (screen.record.start / screen.record.stop). +/// The same 500 MB frame-buffer cap applies; capture stops automatically if the limit is hit. +/// +public class ScreenRecordStartArgs +{ + public int Fps { get; set; } = 10; + public int ScreenIndex { get; set; } +} + +public class ScreenRecordResult +{ + public string Base64 { get; set; } = ""; + public string Format { get; set; } = "mp4"; + public string? FilePath { get; set; } + public int DurationMs { get; set; } + public int Fps { get; set; } + public int ScreenIndex { get; set; } + public int Width { get; set; } + public int Height { get; set; } + public bool HasAudio { get; set; } } public class ScreenCaptureArgs diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index 731359f..4915855 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -20,6 +20,7 @@ public class NodeService : IDisposable private WindowsNodeClient? _nodeClient; private CanvasWindow? _canvasWindow; private ScreenCaptureService? _screenCaptureService; + private ScreenRecordingService? _screenRecordingService; private CameraCaptureService? _cameraCaptureService; private DateTime _lastScreenCaptureNotification = DateTime.MinValue; private string? _a2uiHostUrl; @@ -49,8 +50,9 @@ public NodeService(IOpenClawLogger logger, DispatcherQueue dispatcherQueue, stri _logger = logger; _dispatcherQueue = dispatcherQueue; _dataPath = dataPath; - _screenCaptureService = new ScreenCaptureService(logger); - _cameraCaptureService = new CameraCaptureService(logger); + _screenCaptureService = new ScreenCaptureService(logger); + _screenRecordingService = new ScreenRecordingService(logger); + _cameraCaptureService = new CameraCaptureService(logger); } /// @@ -92,7 +94,9 @@ public async Task DisconnectAsync() _nodeClient.Dispose(); _nodeClient = null; } - + + _screenRecordingService?.StopAllSessions(); + // Close canvas window if (_canvasWindow != null && !_canvasWindow.IsClosed) { @@ -125,8 +129,11 @@ private void RegisterCapabilities() // Screen capability _screenCapability = new ScreenCapability(_logger); - _screenCapability.ListRequested += OnScreenList; + _screenCapability.ListRequested += OnScreenList; _screenCapability.CaptureRequested += OnScreenCapture; + _screenCapability.RecordRequested += OnScreenRecord; + _screenCapability.StartRequested += OnScreenRecordStart; + _screenCapability.StopRequested += OnScreenRecordStop; _nodeClient.RegisterCapability(_screenCapability); // Camera capability @@ -432,7 +439,31 @@ private async Task OnScreenCapture(ScreenCaptureArgs args) return await _screenCaptureService.CaptureAsync(args); } - + + private Task OnScreenRecord(ScreenRecordArgs args) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.RecordAsync(args); + } + + private Task OnScreenRecordStart(ScreenRecordStartArgs args) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.StartAsync(args); + } + + private Task OnScreenRecordStop(string recordingId) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.StopAsync(recordingId); + } + #endregion #region Camera Capability Handlers @@ -483,6 +514,7 @@ public void Dispose() _nodeClient = null; try { client?.Dispose(); } catch { /* ignore */ } + try { _screenRecordingService?.Dispose(); } catch { /* ignore */ } try { _cameraCaptureService?.Dispose(); } catch { /* ignore */ } if (_canvasWindow != null && !_canvasWindow.IsClosed) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs new file mode 100644 index 0000000..a13a740 --- /dev/null +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -0,0 +1,593 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; +using Windows.Graphics.Capture; +using Windows.Graphics.DirectX; +using Windows.Graphics.DirectX.Direct3D11; +using Windows.Graphics.Imaging; +using Windows.Media.Core; +using Windows.Media.MediaProperties; +using Windows.Media.Transcoding; +using Windows.Storage.Streams; +using OpenClaw.Shared; +using OpenClaw.Shared.Capabilities; +using WinRT; + +namespace OpenClawTray.Services; + +/// +/// Records the screen using Windows.Graphics.Capture and encodes to MP4 via MediaTranscoder. +/// +internal sealed class ScreenRecordingService : IDisposable +{ + private readonly IOpenClawLogger _logger; + private readonly ConcurrentDictionary _sessions = new(); + + private const int MaxFps = 60; + private const int MinFps = 1; + private const int MinDurationMs = 250; + private const int MaxDurationMs = 60_000; + private const int PoolBuffers = 2; + + // BGRA frame buffer safety cap: ~500 MB across all queued frames. + // At 1080p (8 MB/frame) this allows ~62 frames; at 720p (~4 MB) ~125 frames. + // Frames beyond this limit are dropped to prevent OOM on long/high-fps recordings. + private const long MaxFrameBufferBytes = 500L * 1024 * 1024; + + public ScreenRecordingService(IOpenClawLogger logger) + { + _logger = logger; + } + + // ── Public API ──────────────────────────────────────────────────────────── + + public async Task RecordAsync(ScreenRecordArgs args) + { + var durationMs = Math.Clamp(args.DurationMs, MinDurationMs, MaxDurationMs); + var fps = Math.Clamp(args.Fps, MinFps, MaxFps); + var screenIndex = args.ScreenIndex; + + _logger.Info($"[ScreenRecording] duration={durationMs}ms fps={fps} screen={screenIndex}"); + + var item = CreateCaptureItem(screenIndex); + var width = item.Size.Width; + var height = item.Size.Height; + var d3d = CreateDirect3DDevice(); + + Direct3D11CaptureFramePool? pool = null; + GraphicsCaptureSession? session = null; + var latestFrame = (Direct3D11CaptureFrame?)null; + using var ready = new SemaphoreSlim(0, 1); + var frames = new List(); + var frameBytes = (long)width * height * 4; // BGRA bytes per frame + + try + { + pool = Direct3D11CaptureFramePool.CreateFreeThreaded( + d3d, + DirectXPixelFormat.B8G8R8A8UIntNormalized, + PoolBuffers, + new global::Windows.Graphics.SizeInt32 { Width = width, Height = height }); + + session = pool.CreateCaptureSession(item); + session.IsCursorCaptureEnabled = false; + + pool.FrameArrived += (p, _) => + { + var f = p.TryGetNextFrame(); + if (f == null) return; + Interlocked.Exchange(ref latestFrame, f)?.Dispose(); + try { ready.Release(); } catch { /* already signaled */ } + }; + + session.StartCapture(); + + var intervalMs = 1000 / fps; + var deadline = DateTime.UtcNow.AddMilliseconds(durationMs); + var nextCapture = DateTime.UtcNow; + + while (DateTime.UtcNow < deadline) + { + var waitMs = (int)(nextCapture - DateTime.UtcNow).TotalMilliseconds; + if (waitMs > 0) + await Task.Delay(waitMs); + + if (!await ready.WaitAsync(intervalMs * 2)) + continue; + + var frame = Interlocked.Exchange(ref latestFrame, null); + if (frame == null) continue; + + using (frame) + { + if (frames.Count * frameBytes >= MaxFrameBufferBytes) + { + _logger.Warn($"[ScreenRecording] Frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping early."); + break; + } + + try + { + using var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + frames.Add(ExtractBitmapBytes(bmp)); + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Frame skipped: {ex.Message}"); + } + } + + nextCapture = nextCapture.AddMilliseconds(intervalMs); + } + } + finally + { + session?.Dispose(); + pool?.Dispose(); + (d3d as IDisposable)?.Dispose(); + Interlocked.Exchange(ref latestFrame, null)?.Dispose(); + } + + _logger.Info($"[ScreenRecording] Captured {frames.Count} frames, encoding..."); + + var base64 = await EncodeToMp4Async(frames, width, height, fps); + var filePath = SaveToTempFile(base64); + + return new ScreenRecordResult + { + Format = "mp4", + Base64 = base64, + FilePath = filePath, + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + Width = width, + Height = height, + HasAudio = false, + }; + } + + public Task StartAsync(ScreenRecordStartArgs args) + { + var fps = Math.Clamp(args.Fps, MinFps, MaxFps); + var screenIndex = args.ScreenIndex; + + _logger.Info($"[ScreenRecording] start fps={fps} screen={screenIndex}"); + + var item = CreateCaptureItem(screenIndex); + var width = item.Size.Width; + var height = item.Size.Height; + var d3d = CreateDirect3DDevice(); + + var pool = Direct3D11CaptureFramePool.CreateFreeThreaded( + d3d, + DirectXPixelFormat.B8G8R8A8UIntNormalized, + PoolBuffers, + new global::Windows.Graphics.SizeInt32 { Width = width, Height = height }); + + var captureSession = pool.CreateCaptureSession(item); + captureSession.IsCursorCaptureEnabled = false; + + var session = new ActiveSession(screenIndex, fps, width, height, d3d, pool, captureSession, _logger); + _sessions[session.Id] = session; + + _logger.Info($"[ScreenRecording] started session {session.Id}"); + return Task.FromResult(session.Id); + } + + public async Task StopAsync(string recordingId) + { + if (!_sessions.TryRemove(recordingId, out var session)) + throw new KeyNotFoundException($"Recording session '{recordingId}' not found"); + + _logger.Info($"[ScreenRecording] stopping session {recordingId}..."); + + List frames; + int width, height, fps, screenIndex, durationMs; + using (session) + { + (frames, durationMs) = await session.StopAsync(); + width = session.Width; + height = session.Height; + fps = session.Fps; + screenIndex = session.ScreenIndex; + } + + _logger.Info($"[ScreenRecording] session {recordingId}: {frames.Count} frames, encoding..."); + var base64 = await EncodeToMp4Async(frames, width, height, fps); + var filePath = SaveToTempFile(base64); + + return new ScreenRecordResult + { + Format = "mp4", + Base64 = base64, + FilePath = filePath, + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + Width = width, + Height = height, + HasAudio = false, + }; + } + + public void StopAllSessions() + { + foreach (var kv in _sessions) + { + if (_sessions.TryRemove(kv.Key, out var s)) + try { s.Dispose(); } catch { } + } + } + + public void Dispose() => StopAllSessions(); + + // ── Temp file ───────────────────────────────────────────────────────────── + + private string SaveToTempFile(string base64) + { + var dir = Path.Combine(Path.GetTempPath(), "openclaw"); + Directory.CreateDirectory(dir); + CleanupOldTempRecordings(dir); + var path = Path.Combine(dir, $"openclaw-screen-record-{Guid.NewGuid()}.mp4"); + File.WriteAllBytes(path, Convert.FromBase64String(base64)); + _logger.Info($"[ScreenRecording] Saved to {path}"); + return path; + } + + private void CleanupOldTempRecordings(string dir) + { + try + { + foreach (var file in Directory.EnumerateFiles(dir, "openclaw-screen-record-*.mp4")) + { + try + { + if (new FileInfo(file).CreationTimeUtc < DateTime.UtcNow.AddHours(-24)) + File.Delete(file); + } + catch (IOException) { } + catch (UnauthorizedAccessException) { } + } + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Temp cleanup failed: {ex.Message}"); + } + } + + // ── Encoding ────────────────────────────────────────────────────────────── + + private static async Task EncodeToMp4Async( + List frames, int width, int height, int fps) + { + if (frames.Count == 0) + throw new InvalidOperationException("No frames to encode"); + + var encWidth = (uint)(width & ~1); + var encHeight = (uint)(height & ~1); + var fi = new[] { 0 }; + + MediaStreamSource MakeMss() + { + fi[0] = 0; + var inputProps = VideoEncodingProperties.CreateUncompressed( + MediaEncodingSubtypes.Nv12, encWidth, encHeight); + inputProps.FrameRate.Numerator = (uint)fps; + inputProps.FrameRate.Denominator = 1; + var mss = new MediaStreamSource(new VideoStreamDescriptor(inputProps)); + mss.BufferTime = TimeSpan.Zero; + mss.SampleRequested += (_, e) => + { + if (fi[0] >= frames.Count) { e.Request.Sample = null; return; } + var nv12 = BgraToNv12(frames[fi[0]], width, height, (int)encWidth, (int)encHeight); + var ts = TimeSpan.FromTicks((long)(fi[0] * 10_000_000.0 / fps)); + var dur = TimeSpan.FromTicks((long)(10_000_000.0 / fps)); + using var dw = new DataWriter(); + dw.WriteBytes(nv12); + var sample = MediaStreamSample.CreateFromBuffer(dw.DetachBuffer(), ts); + sample.Duration = dur; + e.Request.Sample = sample; + fi[0]++; + }; + return mss; + } + + MediaEncodingProfile MakeProfile() + { + var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.Auto); + profile.Video.Width = encWidth; + profile.Video.Height = encHeight; + profile.Video.Bitrate = 4_000_000; + profile.Video.FrameRate.Numerator = (uint)fps; + profile.Video.FrameRate.Denominator = 1; + profile.Audio = null; + return profile; + } + + foreach (var hwEnabled in new[] { true, false }) + { + using var output = new InMemoryRandomAccessStream(); + var transcoder = new MediaTranscoder { HardwareAccelerationEnabled = hwEnabled }; + PrepareTranscodeResult result; + try + { + result = await transcoder + .PrepareMediaStreamSourceTranscodeAsync(MakeMss(), output, MakeProfile()); + } + catch (System.Runtime.InteropServices.COMException) when (hwEnabled) + { + continue; + } + if (!result.CanTranscode) continue; + await result.TranscodeAsync(); + var size = (uint)output.Size; + if (size == 0) continue; + var dr = new DataReader(output.GetInputStreamAt(0)); + await dr.LoadAsync(size); + var bytes = new byte[size]; + dr.ReadBytes(bytes); + return Convert.ToBase64String(bytes); + } + + throw new InvalidOperationException("No encoder available (hardware or software)"); + } + + private static byte[] BgraToNv12(byte[] bgra, int srcWidth, int srcHeight, + int encWidth, int encHeight) + { + var nv12 = new byte[encWidth * encHeight * 3 / 2]; + for (int y = 0; y < encHeight; y++) + for (int x = 0; x < encWidth; x++) + { + int i = (y * srcWidth + x) * 4; + int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; + nv12[y * encWidth + x] = (byte)(((66 * r + 129 * g + 25 * b + 128) >> 8) + 16); + } + int uvBase = encWidth * encHeight; + for (int y = 0; y < encHeight; y += 2) + for (int x = 0; x < encWidth; x += 2) + { + int i = (y * srcWidth + x) * 4; + int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; + int uvIdx = uvBase + (y / 2) * encWidth + x; + nv12[uvIdx] = (byte)(((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128); + nv12[uvIdx + 1] = (byte)(((112 * r - 94 * g - 18 * b + 128) >> 8) + 128); + } + return nv12; + } + + // ── D3D11 / WinRT interop ───────────────────────────────────────────────── + + // IID_IDXGIDevice + private static readonly Guid IID_DXGIDevice = + new Guid("54ec77fa-1377-44e6-8c32-88fd5f44c84c"); + + private static IDirect3DDevice CreateDirect3DDevice() + { + // D3D_DRIVER_TYPE_HARDWARE=1, D3D11_CREATE_DEVICE_BGRA_SUPPORT=0x20, D3D11_SDK_VERSION=7 + D3D11CreateDevice(IntPtr.Zero, 1, IntPtr.Zero, 0x20, IntPtr.Zero, 0, 7, + out var d3dPtr, IntPtr.Zero, IntPtr.Zero); + + var iid = IID_DXGIDevice; + Marshal.QueryInterface(d3dPtr, ref iid, out var dxgiPtr); + Marshal.Release(d3dPtr); + + NativeCreateDirect3D11DeviceFromDXGIDevice(dxgiPtr, out var winrtPtr); + Marshal.Release(dxgiPtr); + + var device = MarshalInterface.FromAbi(winrtPtr); + Marshal.Release(winrtPtr); + return device; + } + + private static GraphicsCaptureItem CreateCaptureItem(int screenIndex) + { + var monitors = GetMonitorHandles(); + if (monitors.Count == 0) + throw new InvalidOperationException("No screens available for capture"); + if (screenIndex < 0 || screenIndex >= monitors.Count) + throw new ArgumentOutOfRangeException(nameof(screenIndex), + $"Screen index {screenIndex} is out of range (0\u2013{monitors.Count - 1})"); + + const string classId = "Windows.Graphics.Capture.GraphicsCaptureItem"; + var iid = typeof(IGraphicsCaptureItemInterop).GUID; + + WindowsCreateString(classId, classId.Length, out var hstring); + try + { + RoGetActivationFactory(hstring, ref iid, out var factoryPtr); + var factory = (IGraphicsCaptureItemInterop)Marshal.GetObjectForIUnknown(factoryPtr); + Marshal.Release(factoryPtr); + + var itemIid = new Guid("AF86E2E0-B12D-4C6A-9C5A-D7AA65101E90"); // IInspectable + factory.CreateForMonitor(monitors[screenIndex], in itemIid, out var itemPtr); + + var item = MarshalInspectable.FromAbi(itemPtr); + Marshal.Release(itemPtr); + return item; + } + finally + { + WindowsDeleteString(hstring); + } + } + + private static List GetMonitorHandles() + { + var handles = new List(); + EnumDisplayMonitors(IntPtr.Zero, IntPtr.Zero, + (hMon, _, ref _, _) => { handles.Add(hMon); return true; }, + IntPtr.Zero); + return handles; + } + + private static byte[] ExtractBitmapBytes(SoftwareBitmap bitmap) + { + var capacity = (uint)(bitmap.PixelWidth * bitmap.PixelHeight * 4); + var buf = new global::Windows.Storage.Streams.Buffer(capacity); + bitmap.CopyToBuffer(buf); + using var dr = DataReader.FromBuffer(buf); + var bytes = new byte[buf.Length]; + dr.ReadBytes(bytes); + return bytes; + } + + // ── P/Invoke declarations ───────────────────────────────────────────────── + + [DllImport("d3d11.dll")] + private static extern int D3D11CreateDevice( + IntPtr pAdapter, uint DriverType, IntPtr Software, uint Flags, + IntPtr pFeatureLevels, uint FeatureLevels, uint SDKVersion, + out IntPtr ppDevice, IntPtr pFeatureLevel, IntPtr ppImmediateContext); + + [DllImport("d3d11.dll", EntryPoint = "CreateDirect3D11DeviceFromDXGIDevice")] + private static extern int NativeCreateDirect3D11DeviceFromDXGIDevice( + IntPtr dxgiDevice, out IntPtr graphicsDevice); + + [DllImport("combase.dll")] + private static extern int WindowsCreateString( + [MarshalAs(UnmanagedType.LPWStr)] string sourceString, int length, out IntPtr hstring); + + [DllImport("combase.dll")] + private static extern int WindowsDeleteString(IntPtr hstring); + + [DllImport("combase.dll")] + private static extern int RoGetActivationFactory( + IntPtr runtimeClassId, ref Guid iid, out IntPtr factory); + + [DllImport("user32.dll")] + private static extern bool EnumDisplayMonitors( + IntPtr hdc, IntPtr lprcClip, MonitorEnumProc lpfnEnum, IntPtr dwData); + + private delegate bool MonitorEnumProc( + IntPtr hMonitor, IntPtr hdcMonitor, ref RECT lprcMonitor, IntPtr dwData); + + [StructLayout(LayoutKind.Sequential)] + private struct RECT { public int Left, Top, Right, Bottom; } + + [ComImport] + [Guid("3628E81B-3CAC-4C60-B7F4-23CE0E0C3356")] + [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] + private interface IGraphicsCaptureItemInterop + { + void CreateForWindow(IntPtr hwnd, in Guid riid, out IntPtr ppv); + void CreateForMonitor(IntPtr hMonitor, in Guid riid, out IntPtr ppv); + } + + // ── Active session (start/stop) ─────────────────────────────────────────── + + private sealed class ActiveSession : IDisposable + { + public readonly string Id = Guid.NewGuid().ToString("N")[..12]; + public readonly int ScreenIndex; + public readonly int Fps; + public readonly int Width; + public readonly int Height; + + private readonly IOpenClawLogger _logger; + private readonly IDirect3DDevice _device; + private readonly List _frames = new(); + private readonly object _framesLock = new(); + private readonly CancellationTokenSource _cts = new(); + private readonly Direct3D11CaptureFramePool _pool; + private readonly GraphicsCaptureSession _session; + private readonly DateTime _startedAt = DateTime.UtcNow; + private volatile Direct3D11CaptureFrame? _latestFrame; + private readonly SemaphoreSlim _ready = new(0, 1); + private readonly Task _captureTask; + + public ActiveSession(int screenIndex, int fps, int width, int height, + IDirect3DDevice device, Direct3D11CaptureFramePool pool, GraphicsCaptureSession session, + IOpenClawLogger logger) + { + ScreenIndex = screenIndex; Fps = fps; Width = width; Height = height; + _device = device; _pool = pool; _session = session; _logger = logger; + + pool.FrameArrived += OnFrameArrived; + session.StartCapture(); + _captureTask = RunAsync(_cts.Token); + } + + private void OnFrameArrived(Direct3D11CaptureFramePool pool, object _) + { + var f = pool.TryGetNextFrame(); + if (f == null) return; + Interlocked.Exchange(ref _latestFrame, f)?.Dispose(); + try { _ready.Release(); } catch { /* already signaled */ } + } + + private async Task RunAsync(CancellationToken ct) + { + var intervalMs = 1000 / Fps; + var nextCapture = DateTime.UtcNow; + var frameBytes = (long)Width * Height * 4; + + while (!ct.IsCancellationRequested) + { + try + { + var waitMs = (int)(nextCapture - DateTime.UtcNow).TotalMilliseconds; + if (waitMs > 0) await Task.Delay(waitMs, ct); + + if (!await _ready.WaitAsync(intervalMs * 2, ct)) continue; + } + catch (OperationCanceledException) { break; } + + var frame = Interlocked.Exchange(ref _latestFrame, null); + if (frame == null) continue; + + using (frame) + { + int frameCount; + lock (_framesLock) frameCount = _frames.Count; + if (frameCount * frameBytes >= MaxFrameBufferBytes) + { + _logger.Warn($"[ScreenRecording] Session {Id}: frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping capture."); + _cts.Cancel(); + break; + } + + try + { + using var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + var bytes = ExtractBitmapBytes(bmp); + lock (_framesLock) _frames.Add(bytes); + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Session {Id} frame skipped: {ex.Message}"); + } + } + + nextCapture = nextCapture.AddMilliseconds(intervalMs); + } + } + + public async Task<(List frames, int durationMs)> StopAsync() + { + _cts.Cancel(); + try { await _captureTask; } catch (OperationCanceledException) { } catch { } + + var durationMs = (int)(DateTime.UtcNow - _startedAt).TotalMilliseconds; + List snapshot; + lock (_framesLock) snapshot = new List(_frames); + return (snapshot, durationMs); + } + + public void Dispose() + { + _cts.Cancel(); + try { _captureTask.GetAwaiter().GetResult(); } catch { } + try { _session.Dispose(); } catch { } + try { _pool.Dispose(); } catch { } + try { (_device as IDisposable)?.Dispose(); } catch { } + Interlocked.Exchange(ref _latestFrame, null)?.Dispose(); + _cts.Dispose(); + _ready.Dispose(); + } + } +} diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index 67de774..4d6a57f 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -683,7 +683,8 @@ public void CanHandle_ScreenCommands() var cap = new ScreenCapability(NullLogger.Instance); Assert.True(cap.CanHandle("screen.capture")); Assert.True(cap.CanHandle("screen.list")); - Assert.False(cap.CanHandle("screen.record")); + Assert.True(cap.CanHandle("screen.record")); + Assert.False(cap.CanHandle("screen.unknown")); Assert.Equal("screen", cap.Category); } @@ -835,6 +836,298 @@ public async Task Capture_UsesMonitorAlias_ForScreenIndex() Assert.NotNull(receivedArgs); Assert.Equal(2, receivedArgs!.MonitorIndex); } + + [Fact] + public async Task Record_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "sr1", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Record_CallsHandler_WithArgs() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult + { + Format = "mp4", Base64 = "vid", DurationMs = 2000, Fps = 10, + ScreenIndex = 1, Width = 1920, Height = 1080 + }); + }; + + var req = new NodeInvokeRequest + { + Id = "sr2", + Command = "screen.record", + Args = Parse("""{"durationMs":2000,"fps":10,"screenIndex":1}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal(2000, receivedArgs!.DurationMs); + Assert.Equal(10, receivedArgs.Fps); + Assert.Equal(1, receivedArgs.ScreenIndex); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + var root = doc.RootElement; + Assert.Equal("mp4", root.GetProperty("format").GetString()); + Assert.Equal("vid", root.GetProperty("base64").GetString()); + Assert.Equal(2000, root.GetProperty("durationMs").GetInt32()); + Assert.Equal(10, root.GetProperty("fps").GetInt32()); + Assert.Equal(1, root.GetProperty("screenIndex").GetInt32()); + Assert.Equal(1920, root.GetProperty("width").GetInt32()); + Assert.Equal(1080, root.GetProperty("height").GetInt32()); + Assert.False( root.GetProperty("hasAudio").GetBoolean()); + } + + [Fact] + public async Task Record_UsesDefaults_WhenArgsMissing() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult()); + }; + + var req = new NodeInvokeRequest { Id = "sr3", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal(5000, receivedArgs!.DurationMs); + Assert.Equal(10, receivedArgs.Fps); + Assert.Equal(0, receivedArgs.ScreenIndex); + } + + [Fact] + public async Task Record_UsesMonitorAlias_ForScreenIndex() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult()); + }; + + var req = new NodeInvokeRequest + { + Id = "sr4", + Command = "screen.record", + Args = Parse("""{"monitor":2}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal(2, receivedArgs!.ScreenIndex); + } + + [Fact] + public async Task Record_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.RecordRequested += (args) => throw new InvalidOperationException("GPU capture failed"); + + var req = new NodeInvokeRequest { Id = "sr5", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("GPU capture failed", res.Error); + } + + [Fact] + public async Task Record_PropagatesOutOfRangeAsError() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.RecordRequested += _ => + throw new ArgumentOutOfRangeException("screenIndex", "Screen index 5 is out of range (0\u20131)"); + + var req = new NodeInvokeRequest + { + Id = "sr6", Command = "screen.record", + Args = Parse("""{"screenIndex":5}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("screenIndex", res.Error ?? ""); + } + + // ── screen.record.start ──────────────────────────────────────────────────── + + [Fact] + public void CanHandle_RecordStartStop() + { + var cap = new ScreenCapability(NullLogger.Instance); + Assert.True(cap.CanHandle("screen.record.start")); + Assert.True(cap.CanHandle("screen.record.stop")); + Assert.False(cap.CanHandle("screen.record.pause")); + } + + [Fact] + public async Task Start_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "ss1", Command = "screen.record.start", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Start_CallsHandler_WithArgs_AndReturnsRecordingId() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordStartArgs? receivedArgs = null; + cap.StartRequested += args => + { + receivedArgs = args; + return Task.FromResult("abc123"); + }; + + var req = new NodeInvokeRequest + { + Id = "ss2", + Command = "screen.record.start", + Args = Parse("""{"fps":15,"screenIndex":2}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal(15, receivedArgs!.Fps); + Assert.Equal(2, receivedArgs.ScreenIndex); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + Assert.Equal("abc123", doc.RootElement.GetProperty("recordingId").GetString()); + } + + [Fact] + public async Task Start_UsesMonitorAlias_ForScreenIndex() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordStartArgs? receivedArgs = null; + cap.StartRequested += args => { receivedArgs = args; return Task.FromResult("id1"); }; + + var req = new NodeInvokeRequest + { + Id = "ss3", + Command = "screen.record.start", + Args = Parse("""{"monitor":1}""") + }; + + await cap.ExecuteAsync(req); + Assert.Equal(1, receivedArgs!.ScreenIndex); + } + + [Fact] + public async Task Start_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StartRequested += _ => throw new InvalidOperationException("D3D init failed"); + + var req = new NodeInvokeRequest { Id = "ss4", Command = "screen.record.start", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("D3D init failed", res.Error); + } + + // ── screen.record.stop ───────────────────────────────────────────────────── + + [Fact] + public async Task Stop_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest + { + Id = "st1", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"abc"}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Stop_ReturnsError_WhenMissingRecordingId() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StopRequested += _ => Task.FromResult(new ScreenRecordResult()); + + var req = new NodeInvokeRequest { Id = "st2", Command = "screen.record.stop", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("recordingId", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Stop_CallsHandler_WithRecordingId_AndReturnsFullPayload() + { + var cap = new ScreenCapability(NullLogger.Instance); + string? receivedId = null; + cap.StopRequested += id => + { + receivedId = id; + return Task.FromResult(new ScreenRecordResult + { + Format = "mp4", + Base64 = "dGVzdA==", + DurationMs = 3200, + Fps = 15, + ScreenIndex = 1, + Width = 1920, + Height = 1080, + HasAudio = false, + }); + }; + + var req = new NodeInvokeRequest + { + Id = "st3", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"myRecId"}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal("myRecId", receivedId); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + var p = doc.RootElement; + Assert.Equal("mp4", p.GetProperty("format").GetString()); + Assert.Equal("dGVzdA==", p.GetProperty("base64").GetString()); + Assert.Equal(3200, p.GetProperty("durationMs").GetInt32()); + Assert.Equal(15, p.GetProperty("fps").GetInt32()); + Assert.Equal(1, p.GetProperty("screenIndex").GetInt32()); + Assert.Equal(1920, p.GetProperty("width").GetInt32()); + Assert.Equal(1080, p.GetProperty("height").GetInt32()); + Assert.False( p.GetProperty("hasAudio").GetBoolean()); + } + + [Fact] + public async Task Stop_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StopRequested += _ => throw new KeyNotFoundException("session not found"); + + var req = new NodeInvokeRequest + { + Id = "st4", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"bad"}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("session not found", res.Error); + } } public class CameraCapabilityTests