/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { execSync, spawn } from 'child_process'; import { parse } from 'shell-quote'; import { mkdirSync, writeFileSync, readFileSync } from 'fs'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { env } from 'process'; import fs from 'fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); function sanitizeTestName(name: string) { return name .toLowerCase() .replace(/[^a-z0-9]/g, '-') .replace(/-+/g, '-'); } // Helper to create detailed error messages export function createToolCallErrorMessage( expectedTools: string | string[], foundTools: string[], result: string, ) { const expectedStr = Array.isArray(expectedTools) ? expectedTools.join(' or ') : expectedTools; return ( `Expected to find ${expectedStr} tool call(s). ` + `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` + `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}` ); } // Helper to print debug information when tests fail export function printDebugInfo( rig: TestRig, result: string, context: Record = {}, ) { console.error('Test failed - Debug info:'); console.error('Result length:', result.length); console.error('Result (first 500 chars):', result.substring(0, 500)); console.error( 'Result (last 500 chars):', result.substring(result.length - 500), ); // Print any additional context provided Object.entries(context).forEach(([key, value]) => { console.error(`${key}:`, value); }); // Check what tools were actually called const allTools = rig.readToolLogs(); console.error( 'All tool calls found:', allTools.map((t) => t.toolRequest.name), ); return allTools; } // Helper to validate model output and warn about unexpected content export function validateModelOutput( result: string, expectedContent: string | (string | RegExp)[] | null = null, testName = '', ) { // First, check if there's any output at all (this should fail the test if missing) if (!result || result.trim().length === 0) { throw new Error('Expected LLM to return some output'); } // If expectedContent is provided, check for it and warn if missing if (expectedContent) { const contents = Array.isArray(expectedContent) ? expectedContent : [expectedContent]; const missingContent = contents.filter((content) => { if (typeof content === 'string') { return !result.toLowerCase().includes(content.toLowerCase()); } else if (content instanceof RegExp) { return !content.test(result); } return false; }); if (missingContent.length > 0) { console.warn( `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, 'This is not ideal but not a test failure.', ); console.warn( 'The tool was called successfully, which is the main requirement.', ); return false; } else if (process.env.VERBOSE === 'true') { console.log(`${testName}: Model output validated successfully.`); } return true; } return true; } export class TestRig { bundlePath: string; testDir: string | null; testName?: string; _lastRunStdout?: string; constructor() { this.bundlePath = join(__dirname, '..', 'bundle/gemini.js'); this.testDir = null; } // Get timeout based on environment getDefaultTimeout() { if (env.CI) return 60000; // 1 minute in CI if (env.GEMINI_SANDBOX) return 30000; // 30s in containers return 15000; // 15s locally } setup( testName: string, options: { settings?: Record } = {}, ) { this.testName = testName; const sanitizedName = sanitizeTestName(testName); this.testDir = join(env.INTEGRATION_TEST_FILE_DIR!, sanitizedName); mkdirSync(this.testDir, { recursive: true }); // Create a settings file to point the CLI to the local collector const geminiDir = join(this.testDir, '.gemini'); mkdirSync(geminiDir, { recursive: true }); // In sandbox mode, use an absolute path for telemetry inside the container // The container mounts the test directory at the same path as the host const telemetryPath = env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' ? join(this.testDir, 'telemetry.log') // Absolute path in test directory : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox const settings = { telemetry: { enabled: true, target: 'local', otlpEndpoint: '', outfile: telemetryPath, }, sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false, ...options.settings, // Allow tests to override/add settings }; writeFileSync( join(geminiDir, 'settings.json'), JSON.stringify(settings, null, 2), ); } createFile(fileName: string, content: string) { const filePath = join(this.testDir!, fileName); writeFileSync(filePath, content); return filePath; } mkdir(dir: string) { mkdirSync(join(this.testDir!, dir), { recursive: true }); } sync() { // ensure file system is done before spawning execSync('sync', { cwd: this.testDir! }); } run( promptOrOptions: string | { prompt?: string; stdin?: string }, ...args: string[] ): Promise { let command = `node ${this.bundlePath} --yolo`; const execOptions: { cwd: string; encoding: 'utf-8'; input?: string; } = { cwd: this.testDir!, encoding: 'utf-8', }; if (typeof promptOrOptions === 'string') { command += ` --prompt ${JSON.stringify(promptOrOptions)}`; } else if ( typeof promptOrOptions === 'object' && promptOrOptions !== null ) { if (promptOrOptions.prompt) { command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`; } if (promptOrOptions.stdin) { execOptions.input = promptOrOptions.stdin; } } command += ` ${args.join(' ')}`; const commandArgs = parse(command); const node = commandArgs.shift() as string; const child = spawn(node, commandArgs as string[], { cwd: this.testDir!, stdio: 'pipe', }); let stdout = ''; let stderr = ''; // Handle stdin if provided if (execOptions.input) { child.stdin!.write(execOptions.input); } child.stdin!.end(); child.stdout!.on('data', (data: Buffer) => { stdout += data; if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { process.stdout.write(data); } }); child.stderr!.on('data', (data: Buffer) => { stderr += data; if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { process.stderr.write(data); } }); const promise = new Promise((resolve, reject) => { child.on('close', (code: number) => { if (code === 0) { // Store the raw stdout for Podman telemetry parsing this._lastRunStdout = stdout; // Filter out telemetry output when running with Podman // Podman seems to output telemetry to stdout even when writing to file let result = stdout; if (env.GEMINI_SANDBOX === 'podman') { // Remove telemetry JSON objects from output // They are multi-line JSON objects that start with { and contain telemetry fields const lines = result.split('\n'); const filteredLines = []; let inTelemetryObject = false; let braceDepth = 0; for (const line of lines) { if (!inTelemetryObject && line.trim() === '{') { // Check if this might be start of telemetry object inTelemetryObject = true; braceDepth = 1; } else if (inTelemetryObject) { // Count braces to track nesting for (const char of line) { if (char === '{') braceDepth++; else if (char === '}') braceDepth--; } // Check if we've closed all braces if (braceDepth === 0) { inTelemetryObject = false; // Skip this line (the closing brace) continue; } } else { // Not in telemetry object, keep the line filteredLines.push(line); } } result = filteredLines.join('\n'); } // If we have stderr output, include that also if (stderr) { result += `\n\nStdErr:\n${stderr}`; } resolve(result); } else { reject(new Error(`Process exited with code ${code}:\n${stderr}`)); } }); }); return promise; } readFile(fileName: string) { const filePath = join(this.testDir!, fileName); const content = readFileSync(filePath, 'utf-8'); if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { console.log(`--- FILE: ${filePath} ---`); console.log(content); console.log(`--- END FILE: ${filePath} ---`); } return content; } async cleanup() { // Clean up test directory if (this.testDir && !env.KEEP_OUTPUT) { try { execSync(`rm -rf ${this.testDir}`); } catch (error) { // Ignore cleanup errors if (env.VERBOSE === 'true') { console.warn('Cleanup warning:', (error as Error).message); } } } } async waitForTelemetryReady() { // In sandbox mode, telemetry is written to a relative path in the test directory const logFilePath = env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' ? join(this.testDir!, 'telemetry.log') : env.TELEMETRY_LOG_FILE; if (!logFilePath) return; // Wait for telemetry file to exist and have content await this.poll( () => { if (!fs.existsSync(logFilePath)) return false; try { const content = readFileSync(logFilePath, 'utf-8'); // Check if file has meaningful content (at least one complete JSON object) return content.includes('"event.name"'); } catch { return false; } }, 2000, // 2 seconds max - reduced since telemetry should flush on exit now 100, // check every 100ms ); } async waitForToolCall(toolName: string, timeout?: number) { // Use environment-specific timeout if (!timeout) { timeout = this.getDefaultTimeout(); } // Wait for telemetry to be ready before polling for tool calls await this.waitForTelemetryReady(); return this.poll( () => { const toolLogs = this.readToolLogs(); return toolLogs.some((log) => log.toolRequest.name === toolName); }, timeout, 100, ); } async waitForAnyToolCall(toolNames: string[], timeout?: number) { // Use environment-specific timeout if (!timeout) { timeout = this.getDefaultTimeout(); } // Wait for telemetry to be ready before polling for tool calls await this.waitForTelemetryReady(); return this.poll( () => { const toolLogs = this.readToolLogs(); return toolNames.some((name) => toolLogs.some((log) => log.toolRequest.name === name), ); }, timeout, 100, ); } async poll( predicate: () => boolean, timeout: number, interval: number, ): Promise { const startTime = Date.now(); let attempts = 0; while (Date.now() - startTime < timeout) { attempts++; const result = predicate(); if (env.VERBOSE === 'true' && attempts % 5 === 0) { console.log( `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`, ); } if (result) { return true; } await new Promise((resolve) => setTimeout(resolve, interval)); } if (env.VERBOSE === 'true') { console.log(`Poll timed out after ${attempts} attempts`); } return false; } _parseToolLogsFromStdout(stdout: string) { const logs: { timestamp: number; toolRequest: { name: string; args: string; success: boolean; duration_ms: number; }; }[] = []; // The console output from Podman is JavaScript object notation, not JSON // Look for tool call events in the output // Updated regex to handle tool names with hyphens and underscores const toolCallPattern = /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g; const matches = [...stdout.matchAll(toolCallPattern)]; for (const match of matches) { const toolName = match[1]; const success = match[2] === 'true'; const duration = parseInt(match[3], 10); // Try to find function_args nearby const matchIndex = match.index || 0; const contextStart = Math.max(0, matchIndex - 500); const contextEnd = Math.min(stdout.length, matchIndex + 500); const context = stdout.substring(contextStart, contextEnd); // Look for function_args in the context let args = '{}'; const argsMatch = context.match(/function_args:\s*'([^']+)'/); if (argsMatch) { args = argsMatch[1]; } // Also try to find function_name to double-check // Updated regex to handle tool names with hyphens and underscores const nameMatch = context.match(/function_name:\s*'([\w-]+)'/); const actualToolName = nameMatch ? nameMatch[1] : toolName; logs.push({ timestamp: Date.now(), toolRequest: { name: actualToolName, args: args, success: success, duration_ms: duration, }, }); } // If no matches found with the simple pattern, try the JSON parsing approach // in case the format changes if (logs.length === 0) { const lines = stdout.split('\n'); let currentObject = ''; let inObject = false; let braceDepth = 0; for (const line of lines) { if (!inObject && line.trim() === '{') { inObject = true; braceDepth = 1; currentObject = line + '\n'; } else if (inObject) { currentObject += line + '\n'; // Count braces for (const char of line) { if (char === '{') braceDepth++; else if (char === '}') braceDepth--; } // If we've closed all braces, try to parse the object if (braceDepth === 0) { inObject = false; try { const obj = JSON.parse(currentObject); // Check for tool call in different formats if ( obj.body && obj.body.includes('Tool call:') && obj.attributes ) { const bodyMatch = obj.body.match(/Tool call: (\w+)\./); if (bodyMatch) { logs.push({ timestamp: obj.timestamp || Date.now(), toolRequest: { name: bodyMatch[1], args: obj.attributes.function_args || '{}', success: obj.attributes.success !== false, duration_ms: obj.attributes.duration_ms || 0, }, }); } } else if ( obj.attributes && obj.attributes['event.name'] === 'gemini_cli.tool_call' ) { logs.push({ timestamp: obj.attributes['event.timestamp'], toolRequest: { name: obj.attributes.function_name, args: obj.attributes.function_args, success: obj.attributes.success, duration_ms: obj.attributes.duration_ms, }, }); } } catch { // Not valid JSON } currentObject = ''; } } } } return logs; } readToolLogs() { // For Podman, first check if telemetry file exists and has content // If not, fall back to parsing from stdout if (env.GEMINI_SANDBOX === 'podman') { // Try reading from file first const logFilePath = join(this.testDir!, 'telemetry.log'); if (fs.existsSync(logFilePath)) { try { const content = readFileSync(logFilePath, 'utf-8'); if (content && content.includes('"event.name"')) { // File has content, use normal file parsing // Continue to the normal file parsing logic below } else if (this._lastRunStdout) { // File exists but is empty or doesn't have events, parse from stdout return this._parseToolLogsFromStdout(this._lastRunStdout); } } catch { // Error reading file, fall back to stdout if (this._lastRunStdout) { return this._parseToolLogsFromStdout(this._lastRunStdout); } } } else if (this._lastRunStdout) { // No file exists, parse from stdout return this._parseToolLogsFromStdout(this._lastRunStdout); } } // In sandbox mode, telemetry is written to a relative path in the test directory const logFilePath = env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false' ? join(this.testDir!, 'telemetry.log') : env.TELEMETRY_LOG_FILE; if (!logFilePath) { console.warn(`TELEMETRY_LOG_FILE environment variable not set`); return []; } // Check if file exists, if not return empty array (file might not be created yet) if (!fs.existsSync(logFilePath)) { return []; } const content = readFileSync(logFilePath, 'utf-8'); // Split the content into individual JSON objects // They are separated by "}\n{" const jsonObjects = content .split(/}\s*\n\s*{/) .map((obj, index, array) => { // Add back the braces we removed during split if (index > 0) obj = '{' + obj; if (index < array.length - 1) obj = obj + '}'; return obj.trim(); }) .filter((obj) => obj); const logs: { toolRequest: { name: string; args: string; success: boolean; duration_ms: number; }; }[] = []; for (const jsonStr of jsonObjects) { try { const logData = JSON.parse(jsonStr); // Look for tool call logs if ( logData.attributes && logData.attributes['event.name'] === 'gemini_cli.tool_call' ) { const toolName = logData.attributes.function_name; logs.push({ toolRequest: { name: toolName, args: logData.attributes.function_args, success: logData.attributes.success, duration_ms: logData.attributes.duration_ms, }, }); } } catch (e) { // Skip objects that aren't valid JSON if (env.VERBOSE === 'true') { console.error( 'Failed to parse telemetry object:', (e as Error).message, ); } } } return logs; } }