diff --git a/docs/agents/web-backend.md b/docs/agents/web-backend.md index 596724bb3..657f1878c 100644 --- a/docs/agents/web-backend.md +++ b/docs/agents/web-backend.md @@ -15,6 +15,8 @@ Default first-run flow: agent-device web setup agent-device open "https://example.com" --platform web agent-device snapshot -i --platform web +agent-device viewport 1280 900 --platform web +agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen agent-device network dump 25 --platform web agent-device close --platform web ``` diff --git a/src/client-types.ts b/src/client-types.ts index 210d9c449..c88a800fc 100644 --- a/src/client-types.ts +++ b/src/client-types.ts @@ -509,6 +509,16 @@ export type PrepareCommandOptions = DeviceCommandBaseOptions & { timeoutMs?: number; }; +export type ViewportCommandOptions = DeviceCommandBaseOptions & { + width: number; + height: number; +}; + +export type ViewportCommandResult = CommandRequestResult & { + width: number; + height: number; +}; + export type AgentDeviceCommandClient = { wait: (options: WaitCommandOptions) => Promise; alert: (options?: AlertCommandOptions) => Promise; @@ -521,6 +531,7 @@ export type AgentDeviceCommandClient = { clipboard: (options: ClipboardCommandOptions) => Promise; reactNative: (options: ReactNativeCommandOptions) => Promise; prepare: (options: PrepareCommandOptions) => Promise; + viewport: (options: ViewportCommandOptions) => Promise; }; type SelectorSnapshotCommandOptions = Pick; diff --git a/src/client.ts b/src/client.ts index 81916a116..99ec9a678 100644 --- a/src/client.ts +++ b/src/client.ts @@ -41,6 +41,7 @@ import type { Lease, MaterializationReleaseOptions, MetroPrepareOptions, + ViewportCommandResult, } from './client-types.ts'; import { readSerializedSnapshotCaptureAnnotations } from './snapshot-capture-annotations.ts'; import { readSnapshotDiagnosticsSummary } from './snapshot-diagnostics.ts'; @@ -101,6 +102,7 @@ export function createAgentDeviceClient( clipboard: async (options) => await executeCommand('clipboard', options), reactNative: async (options) => await executeCommand('react-native', options), prepare: async (options) => await executeCommand('prepare', options), + viewport: async (options) => await executeCommand('viewport', options), }, devices: { list: async (options = {}) => { diff --git a/src/command-catalog.ts b/src/command-catalog.ts index 45957990f..467302263 100644 --- a/src/command-catalog.ts +++ b/src/command-catalog.ts @@ -44,6 +44,7 @@ export const PUBLIC_COMMANDS = { trace: 'trace', triggerAppEvent: 'trigger-app-event', type: 'type', + viewport: 'viewport', wait: 'wait', } as const; diff --git a/src/commands/capture/index.test.ts b/src/commands/capture/index.test.ts index 90a5e50b1..186605ab4 100644 --- a/src/commands/capture/index.test.ts +++ b/src/commands/capture/index.test.ts @@ -82,11 +82,18 @@ describe('capture command interface', () => { ['page.png'], flags({ screenshotFullscreen: true, screenshotMaxSize: 1024 }), ); - expect(input).toMatchObject({ path: 'page.png', fullscreen: true, maxSize: 1024 }); + expect(input).toMatchObject({ + path: 'page.png', + fullscreen: true, + maxSize: 1024, + }); expect(screenshotDaemonWriter(input)).toMatchObject({ command: 'screenshot', positionals: ['page.png'], - options: { screenshotFullscreen: true, screenshotMaxSize: 1024 }, + options: { + screenshotFullscreen: true, + screenshotMaxSize: 1024, + }, }); }); diff --git a/src/commands/capture/screenshot-options.test.ts b/src/commands/capture/screenshot-options.test.ts index 077e99fef..9d24ca054 100644 --- a/src/commands/capture/screenshot-options.test.ts +++ b/src/commands/capture/screenshot-options.test.ts @@ -31,13 +31,13 @@ test('screenshot flag projection maps public options to request flags', () => { assert.deepEqual( screenshotFlagsFromOptions({ overlayRefs: true, - fullscreen: false, + fullscreen: true, maxSize: 512, stabilize: false, }), { overlayRefs: true, - screenshotFullscreen: false, + screenshotFullscreen: true, screenshotMaxSize: 512, screenshotNoStabilize: true, }, @@ -48,7 +48,11 @@ test('screenshot script flags use the shared recorded flag contract', () => { const parts: string[] = []; const flags = {}; - let result = readScreenshotScriptFlag({ args: ['--fullscreen'], index: 0, flags }); + let result = readScreenshotScriptFlag({ args: ['--full'], index: 0, flags }); + assert.deepEqual(result, { handled: true, nextIndex: 0 }); + result = readScreenshotScriptFlag({ args: ['-f'], index: 0, flags }); + assert.deepEqual(result, { handled: true, nextIndex: 0 }); + result = readScreenshotScriptFlag({ args: ['--fullscreen'], index: 0, flags }); assert.deepEqual(result, { handled: true, nextIndex: 0 }); result = readScreenshotScriptFlag({ args: ['--max-size', '640'], index: 0, flags }); assert.deepEqual(result, { handled: true, nextIndex: 1 }); diff --git a/src/commands/capture/screenshot.ts b/src/commands/capture/screenshot.ts index 5f9b9aa1c..73ad00d57 100644 --- a/src/commands/capture/screenshot.ts +++ b/src/commands/capture/screenshot.ts @@ -37,8 +37,9 @@ const screenshotCommandDefinition = defineExecutableCommand( const screenshotCliSchema = { helpDescription: - 'Capture screenshot (macOS app sessions default to the app window; use --fullscreen for full desktop, --max-size to downscale, --overlay-refs to annotate current refs, or --no-stabilize for low-latency Android capture loops)', - summary: 'Capture screenshot with optional desktop, downscale, or ref overlay modes', + 'Capture screenshot (web defaults to the viewport; use --fullscreen, --full, or -f for the entire page. macOS app sessions default to the app window; use --fullscreen for full desktop, --max-size to downscale, --overlay-refs to annotate current refs, or --no-stabilize for low-latency Android capture loops)', + summary: + 'Capture screenshot with optional web full-page, desktop, downscale, or ref overlay modes', positionalArgs: ['path?'], allowedFlags: SCREENSHOT_COMMAND_FLAG_KEYS, } as const; diff --git a/src/commands/management/index.ts b/src/commands/management/index.ts index fabaac020..1d63dae3c 100644 --- a/src/commands/management/index.ts +++ b/src/commands/management/index.ts @@ -5,6 +5,7 @@ import { installManagementCommandFacets } from './install.ts'; import { prepareCommandFacet } from './prepare.ts'; import { pushManagementCommandFacets } from './push.ts'; import { sessionCommandFacet } from './session.ts'; +import { viewportCommandFacet } from './viewport.ts'; export const managementCommandFamily = defineCommandFamilyFromFacets({ name: 'management', @@ -15,6 +16,7 @@ export const managementCommandFamily = defineCommandFamilyFromFacets({ sessionCommandFacet, openCommandFacet, closeCommandFacet, + viewportCommandFacet, ...installManagementCommandFacets, ...pushManagementCommandFacets, ], diff --git a/src/commands/management/output.ts b/src/commands/management/output.ts index f2566766c..ac1bf3cde 100644 --- a/src/commands/management/output.ts +++ b/src/commands/management/output.ts @@ -117,6 +117,7 @@ export const managementCliOutputFormatters = { reinstall: resultOutput(deployCliOutput), 'install-from-source': resultOutput(installFromSourceCliOutput), prepare: messageOutput, + viewport: messageOutput, } as const satisfies Record; function formatDeviceLine(device: AgentDeviceDevice): string { diff --git a/src/commands/management/viewport.ts b/src/commands/management/viewport.ts new file mode 100644 index 000000000..b121ca035 --- /dev/null +++ b/src/commands/management/viewport.ts @@ -0,0 +1,61 @@ +import { PUBLIC_COMMANDS } from '../../command-catalog.ts'; +import type { ViewportCommandOptions } from '../../client-types.ts'; +import type { CommandSchemaOverride } from '../../utils/cli-command-schema-types.ts'; +import { AppError } from '../../utils/errors.ts'; +import { integerField, requiredField } from '../command-input.ts'; +import { defineExecutableCommand } from '../command-contract.ts'; +import { commonInputFromFlags, direct } from '../cli-grammar/common.ts'; +import type { CliReader, DaemonWriter } from '../cli-grammar/types.ts'; +import { defineCommandFacet } from '../family/types.ts'; +import { defineFieldCommandMetadata } from '../field-command-contract.ts'; +import { managementCliOutputFormatters } from './output.ts'; + +const viewportCommandMetadata = defineFieldCommandMetadata( + 'viewport', + 'Resize the active web viewport.', + { + width: requiredField(integerField('Viewport width in CSS pixels.', { min: 1 })), + height: requiredField(integerField('Viewport height in CSS pixels.', { min: 1 })), + }, +); + +const viewportCommandDefinition = defineExecutableCommand( + viewportCommandMetadata, + (client, input) => client.command.viewport(input), +); + +const viewportCliSchema = { + helpDescription: + 'Resize the active web viewport before taking snapshots or screenshots. Useful for fixed-layout or 100vh apps where changing the viewport reveals different content.', + summary: 'Resize the active web viewport for the current session', + positionalArgs: ['width', 'height'], +} as const satisfies CommandSchemaOverride; + +const viewportCliReader: CliReader = (positionals, flags) => ({ + ...commonInputFromFlags(flags), + width: readViewportDimension(positionals[0], 'width'), + height: readViewportDimension(positionals[1], 'height'), +}); + +const viewportDaemonWriter: DaemonWriter = direct(PUBLIC_COMMANDS.viewport, (input) => { + const { width, height } = input as ViewportCommandOptions; + return [String(width), String(height)]; +}); + +export const viewportCommandFacet = defineCommandFacet({ + name: 'viewport', + metadata: viewportCommandMetadata, + definition: viewportCommandDefinition, + cliSchema: viewportCliSchema, + cliReader: viewportCliReader, + daemonWriter: viewportDaemonWriter, + cliOutputFormatter: managementCliOutputFormatters.viewport, +}); + +function readViewportDimension(value: string | undefined, label: 'width' | 'height'): number { + const parsed = value === undefined ? NaN : Number(value); + if (!Number.isInteger(parsed) || parsed < 1) { + throw new AppError('INVALID_ARGS', `viewport ${label} must be a positive integer`); + } + return parsed; +} diff --git a/src/contracts/screenshot.ts b/src/contracts/screenshot.ts index 2db31593c..abb983015 100644 --- a/src/contracts/screenshot.ts +++ b/src/contracts/screenshot.ts @@ -28,10 +28,11 @@ type ScreenshotSpecificFlagDefinition = { export const SCREENSHOT_SPECIFIC_FLAG_DEFINITIONS: readonly ScreenshotSpecificFlagDefinition[] = [ { key: 'screenshotFullscreen', - names: ['--fullscreen'], + names: ['--fullscreen', '--full', '-f'], type: 'boolean', - usageLabel: '--fullscreen', - usageDescription: 'Screenshot: capture the full screen instead of the app window', + usageLabel: '--fullscreen, --full, -f', + usageDescription: + 'Screenshot: on web capture the full page; on macOS app sessions capture the full desktop instead of the app window', }, { key: 'screenshotMaxSize', @@ -124,7 +125,7 @@ export function readScreenshotScriptFlag(params: { }): { handled: true; nextIndex: number } | { handled: false } { const { args, flags, index } = params; const token = args[index]; - if (token === '--fullscreen') { + if (token === '--fullscreen' || token === '--full' || token === '-f') { flags.screenshotFullscreen = true; return { handled: true, nextIndex: index }; } diff --git a/src/core/__tests__/web-interactor.test.ts b/src/core/__tests__/web-interactor.test.ts index 7b40737c5..a4a49c1fe 100644 --- a/src/core/__tests__/web-interactor.test.ts +++ b/src/core/__tests__/web-interactor.test.ts @@ -24,6 +24,9 @@ test('web interactor delegates first-slice operations to the scoped provider', a async screenshot(outPath, options) { calls.push(`screenshot:${outPath}:${options?.fullscreen === true}`); }, + async setViewport(width, height) { + calls.push(`viewport:${width}:${height}`); + }, async click(x, y) { calls.push(`click:${x}:${y}`); }, @@ -48,6 +51,7 @@ test('web interactor delegates first-slice operations to the scoped provider', a await interactor.type('world', 6); await interactor.scroll('down', { pixels: 400 }); await interactor.screenshot('/tmp/web.png', { fullscreen: true }); + await interactor.setViewport?.(1280, 900); return await interactor.snapshot({ scope: 'main' }); }); @@ -61,6 +65,7 @@ test('web interactor delegates first-slice operations to the scoped provider', a 'type:world:6', 'scroll:down:400', 'screenshot:/tmp/web.png:true', + 'viewport:1280:900', 'snapshot:main', ]); assert.equal(snapshot.backend, 'web'); @@ -86,6 +91,7 @@ function makeWebProvider(overrides: Partial = {}): WebProvider { close: async () => {}, snapshot: async () => ({ nodes: [] }), screenshot: async () => {}, + setViewport: async () => {}, click: async () => {}, fill: async () => {}, typeText: async () => {}, diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index a2511c748..91cc871b4 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -52,10 +52,12 @@ const WEB_QUERY_COMMANDS = [ 'wait', ] as const; const WEB_INTERACTION_COMMANDS = ['click', 'fill', 'focus', 'press', 'scroll', 'type'] as const; +const WEB_SETTING_COMMANDS = ['viewport'] as const; const WEB_SUPPORTED_COMMANDS = new Set([ ...WEB_RUNTIME_COMMANDS, ...WEB_QUERY_COMMANDS, ...WEB_INTERACTION_COMMANDS, + ...WEB_SETTING_COMMANDS, ]); const ALL_DEVICE_COMMAND_CAPABILITY = { apple: { simulator: true, device: true }, @@ -259,6 +261,11 @@ const BASE_COMMAND_CAPABILITY_MATRIX: Record = { supports: (device) => device.platform === 'android' || device.platform === 'macos' || device.kind === 'simulator', }, + viewport: { + apple: { simulator: true, device: true }, + android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, + }, 'trigger-app-event': { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index 30fd7ec76..154b6ce4a 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -139,6 +139,8 @@ async function dispatchKnownCommand( return await handleTriggerAppEventCommand(device, interactor, positionals, context); case 'screenshot': return await handleScreenshotCommand(interactor, positionals, outPath, context); + case 'viewport': + return await handleViewportCommand(interactor, positionals); case 'back': await interactor.back(context?.backMode); return { action: 'back', mode: context?.backMode ?? 'in-app', ...successText('Back') }; @@ -282,6 +284,22 @@ async function handleScreenshotCommand( return { path: screenshotPath, ...successText(`Saved screenshot: ${screenshotPath}`) }; } +async function handleViewportCommand( + interactor: Interactor, + positionals: string[], +): Promise> { + if (positionals.length !== 2) { + throw new AppError('INVALID_ARGS', 'viewport requires exactly two arguments: '); + } + const width = readViewportDimension(positionals[0], 'width'); + const height = readViewportDimension(positionals[1], 'height'); + if (!interactor.setViewport) { + throw new AppError('UNSUPPORTED_OPERATION', 'viewport is not supported by this backend'); + } + await interactor.setViewport(width, height); + return { width, height, ...successText(`Viewport set: ${width}x${height}`) }; +} + async function handleClipboardCommand( interactor: Interactor, positionals: string[], @@ -309,6 +327,14 @@ async function handleClipboardCommand( }; } +function readViewportDimension(value: string | undefined, label: 'width' | 'height'): number { + const parsed = value === undefined ? NaN : Number(value); + if (!Number.isInteger(parsed) || parsed < 1) { + throw new AppError('INVALID_ARGS', `viewport ${label} must be a positive integer`); + } + return parsed; +} + async function handleKeyboardCommand( device: DeviceInfo, positionals: string[], diff --git a/src/core/interactor-types.ts b/src/core/interactor-types.ts index a7d6fb7df..73ce2f199 100644 --- a/src/core/interactor-types.ts +++ b/src/core/interactor-types.ts @@ -118,6 +118,7 @@ export type Interactor = { ): Promise | void>; pinch(scale: number, x?: number, y?: number): Promise | void>; screenshot(outPath: string, options?: ScreenshotOptions): Promise; + setViewport?(width: number, height: number): Promise | void>; snapshot(options?: SnapshotOptions): Promise; back(mode?: BackMode): Promise; home(): Promise; diff --git a/src/core/interactors/web.ts b/src/core/interactors/web.ts index 753e0981b..241be4534 100644 --- a/src/core/interactors/web.ts +++ b/src/core/interactors/web.ts @@ -21,6 +21,7 @@ export function createWebInteractor(): Interactor { scroll: (direction, options) => provider().scroll(direction, options), pinch: () => unsupportedWebOperation('pinch'), screenshot: (outPath, options) => provider().screenshot(outPath, options), + setViewport: (width, height) => provider().setViewport(width, height), snapshot: async (options) => { const result = await withDiagnosticTimer( 'snapshot_capture', diff --git a/src/daemon/__tests__/request-handler-catalog.test.ts b/src/daemon/__tests__/request-handler-catalog.test.ts index 47244076d..db53cafef 100644 --- a/src/daemon/__tests__/request-handler-catalog.test.ts +++ b/src/daemon/__tests__/request-handler-catalog.test.ts @@ -45,6 +45,7 @@ test('catalog commands use generic routing only when intentionally passthrough o PUBLIC_COMMANDS.screenshot, PUBLIC_COMMANDS.scroll, PUBLIC_COMMANDS.swipe, + PUBLIC_COMMANDS.viewport, ].sort(); const genericCatalogCommands = [ ...Object.values(PUBLIC_COMMANDS), diff --git a/src/daemon/__tests__/request-platform-providers.test.ts b/src/daemon/__tests__/request-platform-providers.test.ts index 730cf4b31..dab8dcee9 100644 --- a/src/daemon/__tests__/request-platform-providers.test.ts +++ b/src/daemon/__tests__/request-platform-providers.test.ts @@ -274,6 +274,7 @@ function makeWebProvider(overrides: Partial = {}): WebProvider { close: async () => {}, snapshot: async () => ({ nodes: [] }), screenshot: async () => {}, + setViewport: async () => {}, click: async () => {}, fill: async () => {}, typeText: async () => {}, diff --git a/src/daemon/daemon-command-registry.ts b/src/daemon/daemon-command-registry.ts index a3ca2e176..e535f797a 100644 --- a/src/daemon/daemon-command-registry.ts +++ b/src/daemon/daemon-command-registry.ts @@ -158,6 +158,7 @@ const DAEMON_COMMAND_DESCRIPTORS = [ ), descriptor(PUBLIC_COMMANDS.focus, 'generic', { androidBlockingDialogGuard: true }), descriptor(PUBLIC_COMMANDS.screenshot, 'generic', { replayScopedAction: true }), + descriptor(PUBLIC_COMMANDS.viewport, 'generic', { replayScopedAction: true }), ...descriptors( 'generic', { androidBlockingDialogGuard: true }, diff --git a/src/platforms/web/agent-browser-provider.test.ts b/src/platforms/web/agent-browser-provider.test.ts index 1584f9475..2f088e394 100644 --- a/src/platforms/web/agent-browser-provider.test.ts +++ b/src/platforms/web/agent-browser-provider.test.ts @@ -27,6 +27,7 @@ test('agent-browser provider maps supported operations to session-scoped JSON co await withCommandExecutorOverride(recordingExecutor(calls), async () => { await provider.open('https://example.test'); await provider.screenshot('/tmp/page.png', { fullscreen: true }); + await provider.setViewport(1280, 900); await provider.click(10.4, 20.6); await provider.clickRef?.('@e3'); await provider.fill(11, 22, 'Ada'); @@ -41,6 +42,7 @@ test('agent-browser provider maps supported operations to session-scoped JSON co [ ['open', 'https://example.test', '--json', '--session', 'web-session'], ['screenshot', '--full', '/tmp/page.png', '--json', '--session', 'web-session'], + ['set', 'viewport', '1280', '900', '--json', '--session', 'web-session'], ['mouse', 'move', '10', '21', '--json', '--session', 'web-session'], ['mouse', 'down', '--json', '--session', 'web-session'], ['mouse', 'up', '--json', '--session', 'web-session'], diff --git a/src/platforms/web/agent-browser-provider.ts b/src/platforms/web/agent-browser-provider.ts index 5efe34cd7..00fed4e6e 100644 --- a/src/platforms/web/agent-browser-provider.ts +++ b/src/platforms/web/agent-browser-provider.ts @@ -42,6 +42,9 @@ export function createAgentBrowserWebProvider( async screenshot(outPath, screenshotOptions) { await runJson(['screenshot', ...(screenshotOptions?.fullscreen ? ['--full'] : []), outPath]); }, + async setViewport(width, height) { + await runJson(['set', 'viewport', String(width), String(height)]); + }, async click(x, y) { await clickCoordinates(runJson, x, y); }, diff --git a/src/platforms/web/provider.ts b/src/platforms/web/provider.ts index 07077a0be..1d95a0cac 100644 --- a/src/platforms/web/provider.ts +++ b/src/platforms/web/provider.ts @@ -34,6 +34,7 @@ export type WebProvider = { close(target?: string): Promise; snapshot(options?: WebSnapshotOptions): Promise; screenshot(outPath: string, options?: WebScreenshotOptions): Promise; + setViewport(width: number, height: number): Promise; click(x: number, y: number): Promise; clickRef?(ref: string): Promise; fill(x: number, y: number, text: string, options?: { delayMs?: number }): Promise; diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 7598b9a39..62014eed9 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -892,7 +892,16 @@ test('parseArgs recognizes record --hide-touches flag', () => { test('parseArgs recognizes screenshot flags', () => { const parsed = parseArgs( - ['screenshot', 'page.png', '--fullscreen', '--max-size', '1024', '--no-stabilize'], + [ + 'screenshot', + 'page.png', + '--full', + '-f', + '--fullscreen', + '--max-size', + '1024', + '--no-stabilize', + ], { strictFlags: true, }, @@ -904,13 +913,24 @@ test('parseArgs recognizes screenshot flags', () => { assert.equal(parsed.flags.screenshotNoStabilize, true); }); -test('usageForCommand documents screenshot stabilization tradeoff', () => { +test('usageForCommand documents screenshot web aliases and stabilization flags', () => { const help = usageForCommand('screenshot'); if (help === null) throw new Error('Expected screenshot help text'); + assert.match(help, /--fullscreen, --full, -f/); + assert.match(help, /entire page/i); assert.match(help, /--no-stabilize/); assert.match(help, /low-latency Android capture loops/); }); +test('parseArgs recognizes viewport command', () => { + const parsed = parseArgs(['viewport', '1280', '900', '--platform', 'web'], { + strictFlags: true, + }); + assert.equal(parsed.command, 'viewport'); + assert.deepEqual(parsed.positionals, ['1280', '900']); + assert.equal(parsed.flags.platform, 'web'); +}); + test('parseArgs rejects invalid record --fps range', () => { assert.throws( () => parseArgs(['record', 'start', './capture.mp4', '--fps', '0'], { strictFlags: true }), @@ -1775,7 +1795,10 @@ test('usage renders concise commands inline with descriptions', () => { assert.match(help, / proxy\s{2,}Expose a local daemon through cloudflared, ngrok/); assert.match(help, / batch --steps \| --steps-file \s{2,}Run multiple commands/); assert.match(help, / test \.\.\.\s{2,}Run replay test suites/); - assert.match(help, / screenshot \[path\]\s{2,}Capture screenshot with optional desktop/); + assert.match( + help, + / screenshot \[path\]\s{2,}Capture screenshot with optional web full-page, desktop/, + ); assert.match( help, / session\s{2,}List active sessions or print the effective daemon state directory/, diff --git a/src/utils/cli-help.ts b/src/utils/cli-help.ts index 22679fc58..2d9125b9d 100644 --- a/src/utils/cli-help.ts +++ b/src/utils/cli-help.ts @@ -253,6 +253,8 @@ Validation and evidence: agent-device wait text "Welcome" 3000 --platform web agent-device network dump 25 --include headers --platform web agent-device screenshot ./artifacts/web-home.png --platform web + agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen + agent-device viewport 1280 900 --platform web agent-device close --platform web Minimal web support is for browser sessions with open, snapshot, find, get, is, click/press, fill/type, wait, network dump, screenshot, close, and replay over those commands. Use agent-browser directly for browser-specific features that agent-device does not surface, such as tab/devtools management, advanced page scripting, network routing/HAR, or raw browser debugging. macOS menu bar: open ... --platform macos --surface menubar; snapshot -i --platform macos --surface menubar. @@ -660,6 +662,8 @@ First-slice loop: agent-device wait text "Welcome" 3000 --platform web agent-device network dump 25 --include headers --platform web agent-device screenshot ./artifacts/web-home.png --platform web + agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen + agent-device viewport 1280 900 --platform web agent-device close --platform web Supported in agent-device web sessions: diff --git a/test/integration/provider-scenarios/web-desktop.test.ts b/test/integration/provider-scenarios/web-desktop.test.ts index 5099c130c..b0149f492 100644 --- a/test/integration/provider-scenarios/web-desktop.test.ts +++ b/test/integration/provider-scenarios/web-desktop.test.ts @@ -105,7 +105,13 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca expectData: { pixels: 240 }, }, { - name: 'capture web screenshot artifact', + name: 'resize viewport', + command: 'viewport', + positionals: ['1280', '900'], + expectData: { width: 1280, height: 900 }, + }, + { + name: 'capture full-page web screenshot artifact', command: 'screenshot', positionals: [screenshotPath], flags: { @@ -157,6 +163,7 @@ test('Provider-backed integration web desktop flow uses semantic web provider ca assertFlatToolCall(semanticCalls, ['web', 'fillRef', '@e3', 'qa@example.test', '1']); assertFlatToolCall(semanticCalls, ['web', 'type', ' ok', '0']); assertFlatToolCall(semanticCalls, ['web', 'scroll', 'down', '', '240']); + assertFlatToolCall(semanticCalls, ['web', 'viewport', '1280', '900']); assertFlatToolCall(semanticCalls, [ 'web', 'screenshot', diff --git a/test/integration/provider-scenarios/web-provider.test.ts b/test/integration/provider-scenarios/web-provider.test.ts index 4bf47a3c8..4096d9ccb 100644 --- a/test/integration/provider-scenarios/web-provider.test.ts +++ b/test/integration/provider-scenarios/web-provider.test.ts @@ -41,6 +41,9 @@ test('web provider is scoped through the request router and dispatch path', asyn async screenshot() { calls.push('screenshot'); }, + async setViewport(width, height) { + calls.push(`viewport:${width}:${height}`); + }, async click(x, y) { calls.push(`click:${x}:${y}`); }, @@ -140,6 +143,17 @@ test('web provider is scoped through the request router and dispatch path', asyn ]); assert.equal(network.json.result.data.backend, 'agent-browser'); assert.equal(network.json.result.data.include, 'headers'); + const viewport = await harness.callCommand( + 'viewport', + ['1280', '900'], + { platform: 'web' }, + { meta: { requestId: 'req-web-viewport' } }, + ); + assert.deepEqual(viewport.json.result.data, { + width: 1280, + height: 900, + message: 'Viewport set: 1280x900', + }); assert.deepEqual(calls, [ 'scope:none:agent-browser-chrome', 'open:https://example.test', @@ -147,6 +161,8 @@ test('web provider is scoped through the request router and dispatch path', asyn 'snapshot:main', 'scope:default:agent-browser-chrome', 'network:5:headers', + 'scope:default:agent-browser-chrome', + 'viewport:1280:900', ]); } finally { await harness.close(); diff --git a/test/integration/provider-scenarios/web-world.ts b/test/integration/provider-scenarios/web-world.ts index d587c3c2d..9d7baf7d7 100644 --- a/test/integration/provider-scenarios/web-world.ts +++ b/test/integration/provider-scenarios/web-world.ts @@ -60,6 +60,9 @@ export async function createWebDesktopWorld(): Promise { ]); fs.writeFileSync(outPath, validPng()); }, + setViewport: async (width, height) => { + semanticCalls.push(['web', 'viewport', String(width), String(height)]); + }, click: async (x, y) => { semanticCalls.push(['web', 'click', String(x), String(y)]); if (pointInRect(x, y, BUTTON_RECT)) { diff --git a/test/integration/smoke-web-platform.test.ts b/test/integration/smoke-web-platform.test.ts index d339ac449..f5093650e 100644 --- a/test/integration/smoke-web-platform.test.ts +++ b/test/integration/smoke-web-platform.test.ts @@ -174,7 +174,7 @@ async function assertWebScreenshot(context: WebSmokeContext): Promise { await assertCommandData( context, 'capture screenshot artifact', - ['screenshot', context.screenshotPath, '--fullscreen', '--no-stabilize'], + ['screenshot', context.screenshotPath, '--full', '--no-stabilize'], { path: context.screenshotPath }, ); assertPngFile(context.screenshotPath); diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index 0a68c27fb..e1d722f51 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -840,6 +840,26 @@ const SKILL_GUIDANCE_CASES: Case[] = [ strictFinalOutput: true, allowOnlyLocalCliHelpCommands: true, }), + makeCase({ + id: 'web-fixed-layout-viewport-and-fullshot', + contract: [ + 'Platform: web', + 'Target URL: https://example.com/app', + 'agent-device web setup already passed', + 'The app uses a fixed 100vh layout, so a taller viewport is needed before taking evidence screenshots', + 'Visual evidence path: ./artifacts/web-app.png', + ], + task: 'Plan the agent-device commands to open the web app, resize the viewport to 1280x900, capture a full-page screenshot, and close.', + outputs: [ + /(?:^|\n)(?:agent-device\s+)?open\s+https:\/\/example\.com\/app\b[^\n]*--platform\s+web/i, + /(?:^|\n)(?:agent-device\s+)?viewport\s+1280\s+900\b[^\n]*--platform\s+web/i, + /(?:^|\n)(?:agent-device\s+)?screenshot\s+\.\/artifacts\/web-app\.png\b[^\n]*(?:--fullscreen|--full|-f)[^\n]*--platform\s+web/i, + /(?:^|\n)(?:agent-device\s+)?close\b[^\n]*--platform\s+web/i, + ], + forbiddenOutputs: [/agent-browser/i, /--full-page\b/i], + strictFinalOutput: true, + allowOnlyLocalCliHelpCommands: true, + }), makeCase({ id: 'inspect-visible-text-readonly', contract: [ diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index 7dcd45edb..c8b17b481 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -124,6 +124,8 @@ agent-device fill @e13 "test@example.com" --platform web agent-device wait text "Welcome" --platform web agent-device network dump 25 --include headers --platform web agent-device screenshot ./artifacts/web-home.png --platform web +agent-device screenshot ./artifacts/web-full.png --platform web --fullscreen +agent-device viewport 1280 900 --platform web agent-device close --platform web ``` @@ -756,6 +758,8 @@ agent-device screenshot # Auto filename agent-device screenshot page.png # Explicit screenshot path agent-device screenshot page.png --max-size 1024 # Downscale longest edge for agent-friendly artifacts agent-device screenshot page.png --overlay-refs # Draw current @eN refs and target rectangles onto the PNG +agent-device screenshot page.png --platform web --fullscreen # On web, --fullscreen/--full/-f captures the entire document +agent-device viewport 1280 900 --platform web # Resize the active web viewport for fixed-layout or 100vh apps agent-device screenshot textedit.png # App-session window capture on macOS agent-device screenshot --fullscreen # Force full-screen capture on macOS app sessions agent-device open --platform macos --surface desktop && agent-device screenshot desktop.png