Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -183,16 +183,25 @@ extension RunnerTests {
if let requestedFps = command.fps, (requestedFps < minRecordingFps || requestedFps > maxRecordingFps) {
return Response(ok: false, error: ErrorPayload(message: "recordStart fps must be between \(minRecordingFps) and \(maxRecordingFps)"))
}
if let requestedQuality = command.quality, (requestedQuality < minRecordingQuality || requestedQuality > maxRecordingQuality) {
return Response(ok: false, error: ErrorPayload(message: "recordStart quality must be between \(minRecordingQuality) and \(maxRecordingQuality)"))
}
do {
let resolvedOutPath = resolveRecordingOutPath(requestedOutPath)
let fpsLabel = command.fps.map(String.init) ?? String(RunnerTests.defaultRecordingFps)
let qualityLabel = command.quality.map(String.init) ?? "native"
NSLog(
"AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@",
"AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@ quality=%@",
requestedOutPath,
resolvedOutPath,
fpsLabel
fpsLabel,
qualityLabel
)
let recorder = ScreenRecorder(
outputPath: resolvedOutPath,
fps: command.fps.map { Int32($0) },
quality: command.quality
)
let recorder = ScreenRecorder(outputPath: resolvedOutPath, fps: command.fps.map { Int32($0) })
try recorder.start { [weak self] in
return self?.captureRunnerFrame()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct Command: Codable {
let scale: Double?
let outPath: String?
let fps: Int?
let quality: Int?
let interactiveOnly: Bool?
let compact: Bool?
let depth: Int?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ extension RunnerTests {
final class ScreenRecorder {
private let outputPath: String
private let fps: Int32?
private let quality: Int?
private var effectiveFps: Int32 {
max(1, fps ?? RunnerTests.defaultRecordingFps)
}
Expand All @@ -25,9 +26,10 @@ extension RunnerTests {
private var startedSession = false
private var startError: Error?

init(outputPath: String, fps: Int32?) {
init(outputPath: String, fps: Int32?, quality: Int?) {
self.outputPath = outputPath
self.fps = fps
self.quality = quality
}

func start(captureFrame: @escaping () -> RunnerImage?) throws {
Expand All @@ -48,7 +50,7 @@ extension RunnerTests {
while Date() < bootstrapDeadline {
if let image = captureFrame(), let cgImage = runnerCGImage(from: image) {
bootstrapImage = image
dimensions = CGSize(width: cgImage.width, height: cgImage.height)
dimensions = scaledDimensions(width: cgImage.width, height: cgImage.height)
break
}
Thread.sleep(forTimeInterval: 0.05)
Expand Down Expand Up @@ -240,11 +242,13 @@ extension RunnerTests {

CVPixelBufferLockBaseAddress(pixelBuffer, [])
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, []) }
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
guard
let context = CGContext(
data: CVPixelBufferGetBaseAddress(pixelBuffer),
width: image.width,
height: image.height,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer),
space: CGColorSpaceCreateDeviceRGB(),
Expand All @@ -253,8 +257,23 @@ extension RunnerTests {
else {
return nil
}
context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
return pixelBuffer
}

private func scaledDimensions(width: Int, height: Int) -> CGSize {
guard let quality, quality < 10 else {
return CGSize(width: width, height: height)
}
let scale = Double(quality) / 10.0
return CGSize(
width: scaledEvenDimension(width, scale: scale),
height: scaledEvenDimension(height, scale: scale)
)
}

private func scaledEvenDimension(_ value: Int, scale: Double) -> Int {
max(2, Int((Double(value) * scale / 2.0).rounded()) * 2)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ final class RunnerTests: XCTestCase {
let tvRemoteDoublePressDelayDefault: TimeInterval = 0.0
let minRecordingFps = 1
let maxRecordingFps = 120
let minRecordingQuality = 5
let maxRecordingQuality = 10
var needsPostSnapshotInteractionDelay = false
var needsFirstInteractionDelay = false
var activeRecording: ScreenRecorder?
Expand Down
182 changes: 182 additions & 0 deletions ios-runner/AgentDeviceRunner/RecordingScripts/recording-resize.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import AVFoundation
import Foundation

enum ResizeError: Error, CustomStringConvertible {
case invalidArgs(String)
case missingVideoTrack
case exportFailed(String)

var description: String {
switch self {
case .invalidArgs(let message):
return message
case .missingVideoTrack:
return "Input video does not contain a video track."
case .exportFailed(let message):
return message
}
}
}

do {
try run()
} catch {
fputs("recording-resize: \(error)\n", stderr)
exit(1)
}

func run() throws {
let arguments = Array(CommandLine.arguments.dropFirst())
let parsedArgs = try parseArguments(arguments)
let inputURL = URL(fileURLWithPath: parsedArgs.inputPath)
let outputURL = URL(fileURLWithPath: parsedArgs.outputPath)

if FileManager.default.fileExists(atPath: outputURL.path) {
try FileManager.default.removeItem(at: outputURL)
}

let asset = AVURLAsset(url: inputURL)
guard let sourceVideoTrack = asset.tracks(withMediaType: .video).first else {
throw ResizeError.missingVideoTrack
}

let renderSize = scaledRenderSize(for: sourceVideoTrack, quality: parsedArgs.quality)
let composition = AVMutableComposition()
let fullRange = CMTimeRange(start: .zero, duration: asset.duration)

guard let compositionVideoTrack = composition.addMutableTrack(
withMediaType: .video,
preferredTrackID: kCMPersistentTrackID_Invalid
) else {
throw ResizeError.exportFailed("Failed to create composition video track.")
}
try compositionVideoTrack.insertTimeRange(fullRange, of: sourceVideoTrack, at: .zero)

if let sourceAudioTrack = asset.tracks(withMediaType: .audio).first,
let compositionAudioTrack = composition.addMutableTrack(
withMediaType: .audio,
preferredTrackID: kCMPersistentTrackID_Invalid
) {
try? compositionAudioTrack.insertTimeRange(fullRange, of: sourceAudioTrack, at: .zero)
}

let scale = CGFloat(parsedArgs.quality) / 10.0
let videoComposition = AVMutableVideoComposition()
videoComposition.renderSize = renderSize
videoComposition.frameDuration = resolvedFrameDuration(for: sourceVideoTrack)

let instruction = AVMutableVideoCompositionInstruction()
instruction.timeRange = fullRange
let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: compositionVideoTrack)
// Scale the full preferred transform (including translation) to match the smaller render canvas.
let scaledTransform = scaledPreferredTransform(sourceVideoTrack.preferredTransform, scale: scale)
layerInstruction.setTransform(scaledTransform, at: .zero)
instruction.layerInstructions = [layerInstruction]
videoComposition.instructions = [instruction]

guard let exporter = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
throw ResizeError.exportFailed("Failed to create export session.")
}

exporter.outputURL = outputURL
exporter.outputFileType = .mp4
exporter.videoComposition = videoComposition
exporter.shouldOptimizeForNetworkUse = true

let semaphore = DispatchSemaphore(value: 0)
exporter.exportAsynchronously {
semaphore.signal()
}
if semaphore.wait(timeout: .now() + 120) == .timedOut {
exporter.cancelExport()
throw ResizeError.exportFailed("Resize export timed out.")
}

if exporter.status != .completed {
throw ResizeError.exportFailed(exporter.error?.localizedDescription ?? "Resize export failed.")
}
}

func parseArguments(_ arguments: [String]) throws -> (inputPath: String, outputPath: String, quality: Int) {
var inputPath: String?
var outputPath: String?
var quality: Int?
var index = 0

while index < arguments.count {
let argument = arguments[index]
let nextIndex = index + 1
switch argument {
case "--input":
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--input requires a value") }
inputPath = arguments[nextIndex]
index += 2
case "--output":
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--output requires a value") }
outputPath = arguments[nextIndex]
index += 2
case "--quality":
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--quality requires a value") }
guard let parsed = Int(arguments[nextIndex]), parsed >= 5, parsed <= 10 else {
throw ResizeError.invalidArgs("--quality must be an integer between 5 and 10")
}
quality = parsed
index += 2
default:
throw ResizeError.invalidArgs("Unknown argument: \(argument)")
}
}

guard let inputPath, let outputPath, let quality else {
throw ResizeError.invalidArgs(
"Usage: recording-resize.swift --input <video> --output <video> --quality <5-10>"
)
}
return (inputPath, outputPath, quality)
}

func resolvedRenderSize(for track: AVAssetTrack) -> CGSize {
let transformed = track.naturalSize.applying(track.preferredTransform)
return CGSize(width: abs(transformed.width), height: abs(transformed.height))
}

func scaledRenderSize(for track: AVAssetTrack, quality: Int) -> CGSize {
let renderSize = resolvedRenderSize(for: track)
guard quality < 10 else { return renderSize }
let scale = CGFloat(quality) / 10.0
return CGSize(
width: scaledDimension(renderSize.width, scale: scale),
height: scaledDimension(renderSize.height, scale: scale)
)
}

func scaledDimension(_ value: CGFloat, scale: CGFloat) -> CGFloat {
let evenValue = Int((Double(value * scale) / 2.0).rounded()) * 2
return CGFloat(max(2, evenValue))
}

func resolvedFrameDuration(for track: AVAssetTrack) -> CMTime {
let minFrameDuration = track.minFrameDuration
if minFrameDuration.isValid && !minFrameDuration.isIndefinite && minFrameDuration.seconds > 0 {
return minFrameDuration
}

let nominalFrameRate = track.nominalFrameRate
if nominalFrameRate > 0 {
let timescale = Int32(max(1, round(nominalFrameRate)))
return CMTime(value: 1, timescale: timescale)
}

return CMTime(value: 1, timescale: 60)
}

func scaledPreferredTransform(_ transform: CGAffineTransform, scale: CGFloat) -> CGAffineTransform {
CGAffineTransform(
a: transform.a * scale,
b: transform.b * scale,
c: transform.c * scale,
d: transform.d * scale,
tx: transform.tx * scale,
ty: transform.ty * scale
)
}
2 changes: 1 addition & 1 deletion ios-runner/RUNNER_PROTOCOL.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Examples:
```

```json
{ "command": "recordStart", "outPath": "/tmp/demo.mp4", "fps": 30 }
{ "command": "recordStart", "outPath": "/tmp/demo.mp4", "fps": 30, "quality": 7 }
```

```json
Expand Down
1 change: 1 addition & 0 deletions skills/agent-device/references/verification.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ agent-device record stop
- On iOS, recording is a wrapper around `simctl` for simulators and the corresponding device capture path for physical devices.
- On Android, recording is a wrapper around `adb`.
- Recording writes a video artifact and a gesture-telemetry sidecar JSON.
- Use `record start <path> --quality 5` when a smaller video is easier to inspect or share. The scale is 5-10, where 10 is native resolution; omit it to preserve native/current resolution.
- On macOS hosts, touch overlay burn-in is available for supported recordings.
- On non-macOS hosts, recording still succeeds but the video stays raw and `record stop` can return an `overlayWarning`.
- If the agent already knows the interaction sequence and wants a more lifelike, uninterrupted recording, drive the flow with `batch` while recording instead of replanning between each step.
Expand Down
1 change: 1 addition & 0 deletions src/cli/commands/generic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ export const genericClientCommandHandlers = {
action: readStartStop(positionals[0], 'record'),
path: positionals[1],
fps: flags.fps,
quality: flags.quality as 5 | 6 | 7 | 8 | 9 | 10 | undefined,
hideTouches: flags.hideTouches,
}),
),
Expand Down
1 change: 1 addition & 0 deletions src/client-normalizers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags {
out: options.out,
count: options.count,
fps: options.fps,
quality: options.quality,
hideTouches: options.hideTouches,
intervalMs: options.intervalMs,
delayMs: options.delayMs,
Expand Down
4 changes: 4 additions & 0 deletions src/client-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -651,10 +651,13 @@ export type NetworkOptions = AgentDeviceRequestOverrides & {
include?: 'summary' | 'headers' | 'body' | 'all';
};

type RecordingQuality = 5 | 6 | 7 | 8 | 9 | 10;

export type RecordOptions = AgentDeviceRequestOverrides & {
action: 'start' | 'stop';
path?: string;
fps?: number;
quality?: RecordingQuality;
hideTouches?: boolean;
};

Expand Down Expand Up @@ -716,6 +719,7 @@ type CommandExecutionOptions = {
screenshotFullscreen?: boolean;
count?: number;
fps?: number;
quality?: RecordingQuality;
hideTouches?: boolean;
intervalMs?: number;
delayMs?: number;
Expand Down
4 changes: 2 additions & 2 deletions src/daemon/__tests__/session-store.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ test('writeSessionLog persists record --hide-touches flags in script output', ()
store.recordAction(session, {
command: 'record',
positionals: ['start', './capture.mp4'],
flags: { platform: 'ios', fps: 30, hideTouches: true },
flags: { platform: 'ios', fps: 30, quality: 8, hideTouches: true },
result: { action: 'start', showTouches: false },
});

store.writeSessionLog(session);
const scriptFile = fs.readdirSync(root).find((file) => file.endsWith('.ad'));
assert.ok(scriptFile);
const script = fs.readFileSync(path.join(root, scriptFile!), 'utf8');
assert.match(script, /record start "\.\/capture\.mp4" --fps 30 --hide-touches/);
assert.match(script, /record start "\.\/capture\.mp4" --fps 30 --quality 8 --hide-touches/);
});

test('writeSessionLog persists screenshot --fullscreen in script output', () => {
Expand Down
Loading
Loading