Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion skills/agent-device/references/verification.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Open this file when the task needs evidence, regression checks, replay maintenan

- `screenshot`
- `diff snapshot`
- `diff screenshot`
- `record`
- `replay -u`
- `perf`
Expand Down Expand Up @@ -41,12 +42,27 @@ agent-device diff snapshot -i
- Run `diff snapshot` to confirm the expected structural change.
- Re-run full `snapshot` only when you need fresh refs.

## Visual artifacts
## Screenshot artifacts

Use `screenshot` when the proof needs a rendered image instead of a structural tree.

- Add `--overlay-refs` when you want the saved PNG to show fresh `@eN` refs burned into the screenshot.

## Visual regression with diff screenshot

Use `diff screenshot` when comparing the current rendered screen against a saved visual baseline.

```bash
agent-device diff screenshot --baseline ./baseline.png --out /tmp/diff.png
agent-device diff screenshot --baseline ./baseline.png --out /tmp/diff.png --overlay-refs
```

- Text output includes ranked changed regions with screen-space rectangles, shape, size, density, average color, and luminance. JSON also includes normalized bounds.
- The diff PNG uses a light grayscale current-screen context with changed pixels tinted red and changed regions outlined.
- Install `tesseract` when you want `diff screenshot` to add best-effort OCR text deltas, movement clusters, and bbox size-change hints. OCR improves the text/JSON descriptions only; it does not change the pixel comparison or the diff PNG.
- When OCR is available, `diff screenshot` also reports best-effort non-text visual deltas by masking OCR text boxes out of the pixel diff and clustering the remaining residuals. Treat these as hints for icons, controls, and separators, not semantic icon recognition.
- Add `--overlay-refs` to `diff screenshot` when you also want a separate current-screen overlay guide. The raw screenshot is still used for pixel comparison; the overlay guide is only context for non-text controls, icons, and tappable regions. When overlay refs intersect changed regions, the output lists the best current-screen ref matches under the affected region.

## Session recording

Use `record` for debugging, documentation, or shareable verification artifacts.
Expand Down
66 changes: 65 additions & 1 deletion src/__tests__/cli-diff.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,25 @@ async function runCliCapture(
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, solidPngBuffer(10, 10, { r: 255, g: 255, b: 255 }));
}
return { ok: true, data: { path: outPath } };
return {
ok: true,
data: {
path: outPath,
...(req.flags?.overlayRefs
? {
overlayRefs: [
{
ref: 'e1',
label: 'Continue',
rect: { x: 1, y: 2, width: 3, height: 4 },
overlayRect: { x: 1, y: 2, width: 3, height: 4 },
center: { x: 3, y: 4 },
},
],
}
: {}),
},
};
}
return {
ok: true,
Expand Down Expand Up @@ -249,11 +267,13 @@ describe('cli diff commands', () => {
'screenshot',
'--baseline',
baseline,
'--overlay-refs',
'--threshold',
'0.2',
]);
assert.equal(result.code, null);
// The client-backed command captures a screenshot via the daemon client
// and skips a second overlay capture when there is no diff to map.
assert.equal(result.calls.length, 1);
const call = result.calls[0]!;
assert.equal(call.command, 'screenshot');
Expand Down Expand Up @@ -287,12 +307,15 @@ describe('cli diff commands', () => {
const originalHome = process.env.HOME;
const baselineRelative = path.join('fixtures', 'baseline.png');
const diffRelative = path.join('fixtures', 'diff.png');
const overlayRelative = path.join('fixtures', 'diff.current-overlay.png');
const baseline = path.join(fakeHome, baselineRelative);
const diffOut = path.join(fakeHome, diffRelative);
const overlayOut = path.join(fakeHome, overlayRelative);

fs.mkdirSync(path.dirname(baseline), { recursive: true });
fs.writeFileSync(baseline, solidPngBuffer(10, 10, { r: 255, g: 255, b: 255 }));
fs.writeFileSync(diffOut, 'stale diff');
fs.writeFileSync(overlayOut, 'stale overlay');
process.env.HOME = fakeHome;

try {
Expand All @@ -304,6 +327,7 @@ describe('cli diff commands', () => {
`~/${baselineRelative}`,
'--out',
`~/${diffRelative}`,
'--overlay-refs',
'--json',
],
{ preserveHome: true },
Expand All @@ -315,10 +339,50 @@ describe('cli diff commands', () => {
assert.equal(payload.success, true);
assert.equal(payload.data.match, true);
assert.equal(fs.existsSync(diffOut), false);
assert.equal(fs.existsSync(overlayOut), false);
} finally {
if (typeof originalHome === 'string') process.env.HOME = originalHome;
else delete process.env.HOME;
fs.rmSync(fakeHome, { recursive: true, force: true });
}
});

test('diff screenshot --overlay-refs writes a separate current overlay guide', async () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cli-diff-test-'));
const baseline = path.join(dir, 'baseline.png');
const diffOut = path.join(dir, 'diff.png');
const overlayOut = path.join(dir, 'diff.current-overlay.png');
fs.writeFileSync(baseline, solidPngBuffer(10, 10, { r: 0, g: 0, b: 0 }));

try {
const result = await runCliCapture([
'diff',
'screenshot',
'--baseline',
baseline,
'--out',
diffOut,
'--overlay-refs',
'--threshold',
'0',
]);
assert.equal(result.code, null);
assert.equal(result.calls.length, 2);
assert.equal(result.calls[0]?.command, 'screenshot');
assert.equal(result.calls[0]?.flags?.overlayRefs, undefined);
assert.equal(result.calls[1]?.command, 'screenshot');
assert.equal(result.calls[1]?.flags?.overlayRefs, true);
assert.equal(result.calls[1]?.positionals?.[0], overlayOut);
assert.match(result.stdout, /Diff image:/);
assert.match(result.stdout, /Current overlay:/);
assert.match(result.stdout, /diff\.current-overlay\.png \(1 refs\)/);
assert.match(
result.stdout,
/size=large shape=large-area density=100% avgColor=#000000->#ffffff luminance=0->255/,
);
assert.match(result.stdout, /overlaps @e1 "Continue", 12% of region/);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
});
39 changes: 39 additions & 0 deletions src/cli/commands/screenshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import path from 'node:path';
import { formatScreenshotDiffText, formatSnapshotDiffText } from '../../utils/output.ts';
import { AppError } from '../../utils/errors.ts';
import { compareScreenshots, type ScreenshotDiffResult } from '../../utils/screenshot-diff.ts';
import { attachCurrentOverlayMatches } from '../../utils/screenshot-diff-overlay-matches.ts';
import { resolveUserPath } from '../../utils/path-resolution.ts';
import { buildSelectionOptions, writeCommandOutput } from './shared.ts';
import type { ClientCommandHandler } from './router.ts';
Expand Down Expand Up @@ -71,6 +72,26 @@ export const diffCommand: ClientCommandHandler = async ({ positionals, flags, cl
threshold: thresholdNum,
outputPath,
});
if (flags.overlayRefs && !result.match && !result.dimensionMismatch) {
const overlayResult = await client.capture.screenshot({
path: outputPath ? deriveCurrentOverlayPath(outputPath) : undefined,
overlayRefs: true,
});
Comment on lines +75 to +79
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Delete stale current-overlay artifact on no-diff runs

The overlay guide is only captured on mismatch, but there is no cleanup when a later diff screenshot --overlay-refs --out ... run matches (or has a dimension mismatch). That leaves an old *.current-overlay.* file on disk, which can be mistaken for fresh output by users or scripts that rely on the deterministic filename.

Useful? React with 👍 / 👎.

result = {
...result,
currentOverlayPath: overlayResult.path,
...(overlayResult.overlayRefs
? { currentOverlayRefCount: overlayResult.overlayRefs.length }
: {}),
...(result.regions && overlayResult.overlayRefs
? {
regions: attachCurrentOverlayMatches(result.regions, overlayResult.overlayRefs),
}
: {}),
};
} else if (flags.overlayRefs && outputPath) {
removeStaleCurrentOverlay(outputPath);
}
} finally {
try {
fs.unlinkSync(currentPath);
Expand All @@ -83,3 +104,21 @@ export const diffCommand: ClientCommandHandler = async ({ positionals, flags, cl
writeCommandOutput(flags, result, () => formatScreenshotDiffText(result));
return true;
};

function deriveCurrentOverlayPath(outputPath: string): string {
const extension = path.extname(outputPath);
const base = extension ? outputPath.slice(0, -extension.length) : outputPath;
return `${base}.current-overlay${extension || '.png'}`;
}

function removeStaleCurrentOverlay(outputPath: string): void {
try {
fs.unlinkSync(deriveCurrentOverlayPath(outputPath));
} catch (error) {
if (!isFsError(error, 'ENOENT')) throw error;
}
}

function isFsError(error: unknown, code: string): error is NodeJS.ErrnoException {
return typeof error === 'object' && error !== null && 'code' in error && error.code === code;
}
99 changes: 99 additions & 0 deletions src/utils/__tests__/output.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -664,11 +664,110 @@ test('formatScreenshotDiffText renders mismatch with pixel counts without color'
totalPixels: 10000,
mismatchPercentage: 5,
diffPath: '/tmp/test/diff.png',
currentOverlayPath: '/tmp/test/diff.current-overlay.png',
currentOverlayRefCount: 1,
regions: [
{
index: 1,
rect: { x: 10, y: 20, width: 100, height: 40 },
normalizedRect: { x: 10, y: 20, width: 100, height: 40 },
differentPixels: 350,
shareOfDiffPercentage: 70,
densityPercentage: 8.75,
shape: 'horizontal-band',
size: 'medium',
location: 'top-left',
averageBaselineColorHex: '#141414',
averageCurrentColorHex: '#dcdcdc',
baselineLuminance: 20,
currentLuminance: 220,
dominantChange: 'brighter',
currentOverlayMatches: [
{
ref: 'e1',
label: 'Continue',
rect: { x: 1, y: 2, width: 3, height: 4 },
regionCoveragePercentage: 12,
},
],
},
],
ocr: {
provider: 'tesseract',
baselineBlocks: 2,
currentBlocks: 2,
matches: [
{
text: 'Wi-Fi',
baselineRect: { x: 120, y: 320, width: 60, height: 22 },
currentRect: { x: 130, y: 332, width: 70, height: 22 },
delta: { x: 10, y: 12, width: 10, height: 0 },
confidence: 94,
possibleTextMetricMismatch: true,
},
],
movementClusters: [
{
texts: ['Wi-Fi', 'Bluetooth'],
xRange: { min: 10, max: 12 },
yRange: { min: 10, max: 14 },
},
],
},
nonTextDeltas: [
{
index: 1,
regionIndex: 1,
slot: 'leading',
likelyKind: 'icon',
rect: { x: 80, y: 318, width: 30, height: 30 },
nearestText: 'Wi-Fi',
},
{
index: 2,
regionIndex: 1,
slot: 'separator',
likelyKind: 'separator',
rect: { x: 90, y: 360, width: 120, height: 2 },
},
],
}),
);
assert.match(text, /✗ 5% pixels differ/);
assert.match(text, /Diff image:/);
assert.match(text, /Current overlay:/);
assert.match(text, /diff\.current-overlay\.png \(1 refs\)/);
assert.match(text, /500 different \/ 10000 total pixels/);
assert.match(text, /Hints:/);
assert.match(
text,
/text movement cluster: "Wi-Fi", "Bluetooth" dx=\+10\.\.\+12px dy=\+10\.\.\+14px/,
);
assert.match(text, /non-text controls: icon near "Wi-Fi" r1/);
assert.match(text, /non-text boundaries: separator r1/);
assert.match(text, /Changed regions:/);
assert.match(text, /1\. top-left x=10 y=20 100x40, 70% of diff, change=brighter/);
assert.match(
text,
/size=medium shape=horizontal-band density=8\.75% avgColor=#141414->#dcdcdc luminance=20->220/,
);
assert.match(text, /overlaps @e1 "Continue", 12% of region/);
assert.match(
text,
/OCR text deltas \(tesseract; baselineBlocks=2 currentBlocks=2; showing 1\/1; px\):/,
);
assert.match(
text,
/item \| text \| movePx \| sizeDeltaPx \| bboxBaseline \| bboxCurrent \| confidence \| issueHint/,
);
assert.match(
text,
/1 \| "Wi-Fi" \| \+10,\+12 \| \+10,0 \| x=120,y=320,w=60,h=22 \| x=130,y=332,w=70,h=22 \| 94 \| ocr-bbox-size-change/,
);
assert.match(text, /Non-text visual deltas \(showing 2\/2; px\):/);
assert.match(text, /item \| region \| slot \| kind \| bboxCurrent \| nearestText/);
assert.match(text, /1 \| r1 \| leading \| icon \| x=80,y=318,w=30,h=30 \| "Wi-Fi"/);
assert.match(text, /2 \| r1 \| separator \| separator \| x=90,y=360,w=120,h=2 \| -/);
assert.equal(text.includes('\x1b['), false);
});

Expand Down
Loading
Loading