import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; import { loadConfig } from '../utils/config.js'; import { ab } from '../utils/exec.js'; import { loadSession, saveSession, type SessionState } from '../session/state.js'; const SESSION_LOG_FILENAME = 'session-log.json'; export interface SessionLogEntry { action: string; relativeTimeSec: number; timestamp: string; element?: { label: string; bbox: { x: number; y: number; width: number; height: number }; viewport: { width: number; height: number }; }; } /** * Load existing session log entries from disk. */ export function loadSessionLog(sessionDir: string): SessionLogEntry[] { const logPath = path.join(sessionDir, SESSION_LOG_FILENAME); if (!fs.existsSync(logPath)) return []; try { return JSON.parse(fs.readFileSync(logPath, 'utf-8')); } catch { return []; } } /** * For screenshot commands, resolve relative paths into the session directory * so agents can just say `proofshot exec screenshot step-name.png`. */ function resolveScreenshotPath(args: string[], sessionDir: string): string[] { if (args[0] !== 'screenshot' || args.length < 2) return args; const screenshotPath = args[args.length - 1]; // If it's already absolute, leave it alone if (path.isAbsolute(screenshotPath)) return args; // Resolve relative to session dir const resolved = path.join(sessionDir, screenshotPath); return [...args.slice(0, -1), resolved]; } /** * Build the shell command string for agent-browser. * * For `eval` commands, we need to pass the JS code as a single quoted argument * to prevent the shell from interpreting parentheses, brackets, etc. * For other commands, simple joining is fine. */ function buildShellCommand(args: string[]): string { if (args[0] === 'eval' && args.length > 1) { // Join everything after 'eval' as the JS code, wrap in single quotes const jsCode = args.slice(1).join(' '); // Escape any single quotes in the JS code for shell safety const escaped = jsCode.replace(/'/g, "'\\''"); return `agent-browser eval '${escaped}'`; } // For all other commands, quote each arg that contains shell-special chars const quotedArgs = args.map((arg) => { if (/[(){}[\]$`!#&|;<>*? "'\\]/.test(arg)) { const escaped = arg.replace(/'/g, "'\\''"); return `'${escaped}'`; } return arg; }); return `agent-browser ${quotedArgs.join(' ')}`; } /** * Parse an element ref (@eN) from command args. */ function parseElementRef(args: string[]): string | null { for (const arg of args) { const match = arg.match(/@e\d+/); if (match) return match[0]; } return null; } /** * Capture element bounding box and label before action execution. * * agent-browser's `get box` doesn't support @eN refs, but `get text` and * `get attr` do. Strategy: * 1. Try `get attr @eN id` — if found, use `get box #` (reliable for inputs) * 2. Otherwise try `get text @eN` — use `get box "text=