gemini-cli/integration-tests/test-helper.ts

640 lines
19 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { execSync, spawn } from 'child_process';
import { parse } from 'shell-quote';
import { mkdirSync, writeFileSync, readFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { env } from 'process';
import fs from 'fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
function sanitizeTestName(name: string) {
return name
.toLowerCase()
.replace(/[^a-z0-9]/g, '-')
.replace(/-+/g, '-');
}
// Helper to create detailed error messages
export function createToolCallErrorMessage(
expectedTools: string | string[],
foundTools: string[],
result: string,
) {
const expectedStr = Array.isArray(expectedTools)
? expectedTools.join(' or ')
: expectedTools;
return (
`Expected to find ${expectedStr} tool call(s). ` +
`Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
`Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
);
}
// Helper to print debug information when tests fail
export function printDebugInfo(
rig: TestRig,
result: string,
context: Record<string, unknown> = {},
) {
console.error('Test failed - Debug info:');
console.error('Result length:', result.length);
console.error('Result (first 500 chars):', result.substring(0, 500));
console.error(
'Result (last 500 chars):',
result.substring(result.length - 500),
);
// Print any additional context provided
Object.entries(context).forEach(([key, value]) => {
console.error(`${key}:`, value);
});
// Check what tools were actually called
const allTools = rig.readToolLogs();
console.error(
'All tool calls found:',
allTools.map((t) => t.toolRequest.name),
);
return allTools;
}
// Helper to validate model output and warn about unexpected content
export function validateModelOutput(
result: string,
expectedContent: string | (string | RegExp)[] | null = null,
testName = '',
) {
// First, check if there's any output at all (this should fail the test if missing)
if (!result || result.trim().length === 0) {
throw new Error('Expected LLM to return some output');
}
// If expectedContent is provided, check for it and warn if missing
if (expectedContent) {
const contents = Array.isArray(expectedContent)
? expectedContent
: [expectedContent];
const missingContent = contents.filter((content) => {
if (typeof content === 'string') {
return !result.toLowerCase().includes(content.toLowerCase());
} else if (content instanceof RegExp) {
return !content.test(result);
}
return false;
});
if (missingContent.length > 0) {
console.warn(
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
'This is not ideal but not a test failure.',
);
console.warn(
'The tool was called successfully, which is the main requirement.',
);
return false;
} else if (process.env.VERBOSE === 'true') {
console.log(`${testName}: Model output validated successfully.`);
}
return true;
}
return true;
}
export class TestRig {
bundlePath: string;
testDir: string | null;
testName?: string;
_lastRunStdout?: string;
constructor() {
this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
this.testDir = null;
}
// Get timeout based on environment
getDefaultTimeout() {
if (env.CI) return 60000; // 1 minute in CI
if (env.GEMINI_SANDBOX) return 30000; // 30s in containers
return 15000; // 15s locally
}
setup(
testName: string,
options: { settings?: Record<string, unknown> } = {},
) {
this.testName = testName;
const sanitizedName = sanitizeTestName(testName);
this.testDir = join(env.INTEGRATION_TEST_FILE_DIR!, sanitizedName);
mkdirSync(this.testDir, { recursive: true });
// Create a settings file to point the CLI to the local collector
const geminiDir = join(this.testDir, '.gemini');
mkdirSync(geminiDir, { recursive: true });
// In sandbox mode, use an absolute path for telemetry inside the container
// The container mounts the test directory at the same path as the host
const telemetryPath =
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
const settings = {
telemetry: {
enabled: true,
target: 'local',
otlpEndpoint: '',
outfile: telemetryPath,
},
sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false,
...options.settings, // Allow tests to override/add settings
};
writeFileSync(
join(geminiDir, 'settings.json'),
JSON.stringify(settings, null, 2),
);
}
createFile(fileName: string, content: string) {
const filePath = join(this.testDir!, fileName);
writeFileSync(filePath, content);
return filePath;
}
mkdir(dir: string) {
mkdirSync(join(this.testDir!, dir), { recursive: true });
}
sync() {
// ensure file system is done before spawning
execSync('sync', { cwd: this.testDir! });
}
run(
promptOrOptions: string | { prompt?: string; stdin?: string },
...args: string[]
): Promise<string> {
let command = `node ${this.bundlePath} --yolo`;
const execOptions: {
cwd: string;
encoding: 'utf-8';
input?: string;
} = {
cwd: this.testDir!,
encoding: 'utf-8',
};
if (typeof promptOrOptions === 'string') {
command += ` --prompt ${JSON.stringify(promptOrOptions)}`;
} else if (
typeof promptOrOptions === 'object' &&
promptOrOptions !== null
) {
if (promptOrOptions.prompt) {
command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`;
}
if (promptOrOptions.stdin) {
execOptions.input = promptOrOptions.stdin;
}
}
command += ` ${args.join(' ')}`;
const commandArgs = parse(command);
const node = commandArgs.shift() as string;
const child = spawn(node, commandArgs as string[], {
cwd: this.testDir!,
stdio: 'pipe',
});
let stdout = '';
let stderr = '';
// Handle stdin if provided
if (execOptions.input) {
child.stdin!.write(execOptions.input);
}
child.stdin!.end();
child.stdout!.on('data', (data: Buffer) => {
stdout += data;
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
process.stdout.write(data);
}
});
child.stderr!.on('data', (data: Buffer) => {
stderr += data;
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
process.stderr.write(data);
}
});
const promise = new Promise<string>((resolve, reject) => {
child.on('close', (code: number) => {
if (code === 0) {
// Store the raw stdout for Podman telemetry parsing
this._lastRunStdout = stdout;
// Filter out telemetry output when running with Podman
// Podman seems to output telemetry to stdout even when writing to file
let result = stdout;
if (env.GEMINI_SANDBOX === 'podman') {
// Remove telemetry JSON objects from output
// They are multi-line JSON objects that start with { and contain telemetry fields
const lines = result.split('\n');
const filteredLines = [];
let inTelemetryObject = false;
let braceDepth = 0;
for (const line of lines) {
if (!inTelemetryObject && line.trim() === '{') {
// Check if this might be start of telemetry object
inTelemetryObject = true;
braceDepth = 1;
} else if (inTelemetryObject) {
// Count braces to track nesting
for (const char of line) {
if (char === '{') braceDepth++;
else if (char === '}') braceDepth--;
}
// Check if we've closed all braces
if (braceDepth === 0) {
inTelemetryObject = false;
// Skip this line (the closing brace)
continue;
}
} else {
// Not in telemetry object, keep the line
filteredLines.push(line);
}
}
result = filteredLines.join('\n');
}
// If we have stderr output, include that also
if (stderr) {
result += `\n\nStdErr:\n${stderr}`;
}
resolve(result);
} else {
reject(new Error(`Process exited with code ${code}:\n${stderr}`));
}
});
});
return promise;
}
readFile(fileName: string) {
const filePath = join(this.testDir!, fileName);
const content = readFileSync(filePath, 'utf-8');
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
console.log(`--- FILE: ${filePath} ---`);
console.log(content);
console.log(`--- END FILE: ${filePath} ---`);
}
return content;
}
async cleanup() {
// Clean up test directory
if (this.testDir && !env.KEEP_OUTPUT) {
try {
execSync(`rm -rf ${this.testDir}`);
} catch (error) {
// Ignore cleanup errors
if (env.VERBOSE === 'true') {
console.warn('Cleanup warning:', (error as Error).message);
}
}
}
}
async waitForTelemetryReady() {
// In sandbox mode, telemetry is written to a relative path in the test directory
const logFilePath =
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir!, 'telemetry.log')
: env.TELEMETRY_LOG_FILE;
if (!logFilePath) return;
// Wait for telemetry file to exist and have content
await this.poll(
() => {
if (!fs.existsSync(logFilePath)) return false;
try {
const content = readFileSync(logFilePath, 'utf-8');
// Check if file has meaningful content (at least one complete JSON object)
return content.includes('"event.name"');
} catch {
return false;
}
},
2000, // 2 seconds max - reduced since telemetry should flush on exit now
100, // check every 100ms
);
}
async waitForToolCall(toolName: string, timeout?: number) {
// Use environment-specific timeout
if (!timeout) {
timeout = this.getDefaultTimeout();
}
// Wait for telemetry to be ready before polling for tool calls
await this.waitForTelemetryReady();
return this.poll(
() => {
const toolLogs = this.readToolLogs();
return toolLogs.some((log) => log.toolRequest.name === toolName);
},
timeout,
100,
);
}
async waitForAnyToolCall(toolNames: string[], timeout?: number) {
// Use environment-specific timeout
if (!timeout) {
timeout = this.getDefaultTimeout();
}
// Wait for telemetry to be ready before polling for tool calls
await this.waitForTelemetryReady();
return this.poll(
() => {
const toolLogs = this.readToolLogs();
return toolNames.some((name) =>
toolLogs.some((log) => log.toolRequest.name === name),
);
},
timeout,
100,
);
}
async poll(
predicate: () => boolean,
timeout: number,
interval: number,
): Promise<boolean> {
const startTime = Date.now();
let attempts = 0;
while (Date.now() - startTime < timeout) {
attempts++;
const result = predicate();
if (env.VERBOSE === 'true' && attempts % 5 === 0) {
console.log(
`Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
);
}
if (result) {
return true;
}
await new Promise((resolve) => setTimeout(resolve, interval));
}
if (env.VERBOSE === 'true') {
console.log(`Poll timed out after ${attempts} attempts`);
}
return false;
}
_parseToolLogsFromStdout(stdout: string) {
const logs: {
timestamp: number;
toolRequest: {
name: string;
args: string;
success: boolean;
duration_ms: number;
};
}[] = [];
// The console output from Podman is JavaScript object notation, not JSON
// Look for tool call events in the output
// Updated regex to handle tool names with hyphens and underscores
const toolCallPattern =
/body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
const matches = [...stdout.matchAll(toolCallPattern)];
for (const match of matches) {
const toolName = match[1];
const success = match[2] === 'true';
const duration = parseInt(match[3], 10);
// Try to find function_args nearby
const matchIndex = match.index || 0;
const contextStart = Math.max(0, matchIndex - 500);
const contextEnd = Math.min(stdout.length, matchIndex + 500);
const context = stdout.substring(contextStart, contextEnd);
// Look for function_args in the context
let args = '{}';
const argsMatch = context.match(/function_args:\s*'([^']+)'/);
if (argsMatch) {
args = argsMatch[1];
}
// Also try to find function_name to double-check
// Updated regex to handle tool names with hyphens and underscores
const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
const actualToolName = nameMatch ? nameMatch[1] : toolName;
logs.push({
timestamp: Date.now(),
toolRequest: {
name: actualToolName,
args: args,
success: success,
duration_ms: duration,
},
});
}
// If no matches found with the simple pattern, try the JSON parsing approach
// in case the format changes
if (logs.length === 0) {
const lines = stdout.split('\n');
let currentObject = '';
let inObject = false;
let braceDepth = 0;
for (const line of lines) {
if (!inObject && line.trim() === '{') {
inObject = true;
braceDepth = 1;
currentObject = line + '\n';
} else if (inObject) {
currentObject += line + '\n';
// Count braces
for (const char of line) {
if (char === '{') braceDepth++;
else if (char === '}') braceDepth--;
}
// If we've closed all braces, try to parse the object
if (braceDepth === 0) {
inObject = false;
try {
const obj = JSON.parse(currentObject);
// Check for tool call in different formats
if (
obj.body &&
obj.body.includes('Tool call:') &&
obj.attributes
) {
const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
if (bodyMatch) {
logs.push({
timestamp: obj.timestamp || Date.now(),
toolRequest: {
name: bodyMatch[1],
args: obj.attributes.function_args || '{}',
success: obj.attributes.success !== false,
duration_ms: obj.attributes.duration_ms || 0,
},
});
}
} else if (
obj.attributes &&
obj.attributes['event.name'] === 'gemini_cli.tool_call'
) {
logs.push({
timestamp: obj.attributes['event.timestamp'],
toolRequest: {
name: obj.attributes.function_name,
args: obj.attributes.function_args,
success: obj.attributes.success,
duration_ms: obj.attributes.duration_ms,
},
});
}
} catch {
// Not valid JSON
}
currentObject = '';
}
}
}
}
return logs;
}
readToolLogs() {
// For Podman, first check if telemetry file exists and has content
// If not, fall back to parsing from stdout
if (env.GEMINI_SANDBOX === 'podman') {
// Try reading from file first
const logFilePath = join(this.testDir!, 'telemetry.log');
if (fs.existsSync(logFilePath)) {
try {
const content = readFileSync(logFilePath, 'utf-8');
if (content && content.includes('"event.name"')) {
// File has content, use normal file parsing
// Continue to the normal file parsing logic below
} else if (this._lastRunStdout) {
// File exists but is empty or doesn't have events, parse from stdout
return this._parseToolLogsFromStdout(this._lastRunStdout);
}
} catch {
// Error reading file, fall back to stdout
if (this._lastRunStdout) {
return this._parseToolLogsFromStdout(this._lastRunStdout);
}
}
} else if (this._lastRunStdout) {
// No file exists, parse from stdout
return this._parseToolLogsFromStdout(this._lastRunStdout);
}
}
// In sandbox mode, telemetry is written to a relative path in the test directory
const logFilePath =
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir!, 'telemetry.log')
: env.TELEMETRY_LOG_FILE;
if (!logFilePath) {
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
return [];
}
// Check if file exists, if not return empty array (file might not be created yet)
if (!fs.existsSync(logFilePath)) {
return [];
}
const content = readFileSync(logFilePath, 'utf-8');
// Split the content into individual JSON objects
// They are separated by "}\n{"
const jsonObjects = content
.split(/}\s*\n\s*{/)
.map((obj, index, array) => {
// Add back the braces we removed during split
if (index > 0) obj = '{' + obj;
if (index < array.length - 1) obj = obj + '}';
return obj.trim();
})
.filter((obj) => obj);
const logs: {
toolRequest: {
name: string;
args: string;
success: boolean;
duration_ms: number;
};
}[] = [];
for (const jsonStr of jsonObjects) {
try {
const logData = JSON.parse(jsonStr);
// Look for tool call logs
if (
logData.attributes &&
logData.attributes['event.name'] === 'gemini_cli.tool_call'
) {
const toolName = logData.attributes.function_name;
logs.push({
toolRequest: {
name: toolName,
args: logData.attributes.function_args,
success: logData.attributes.success,
duration_ms: logData.attributes.duration_ms,
},
});
}
} catch (e) {
// Skip objects that aren't valid JSON
if (env.VERBOSE === 'true') {
console.error(
'Failed to parse telemetry object:',
(e as Error).message,
);
}
}
}
return logs;
}
}