Upgrade integration tests to use Vitest (#6021)

This commit is contained in:
Tommaso Sciortino 2025-08-12 15:57:27 -07:00 committed by GitHub
parent 8d6eb8c322
commit 9d023be1d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 511 additions and 620 deletions

View File

@ -46,7 +46,9 @@ jobs:
- name: Run E2E tests
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: npm run test:integration:${{ matrix.sandbox }} -- --verbose --keep-output
VERBOSE: true
KEEP_OUTPUT: true
run: npm run test:integration:${{ matrix.sandbox }}
e2e-test-macos:
name: E2E Test - macOS

13
.vscode/launch.json vendored
View File

@ -17,19 +17,6 @@
"GEMINI_SANDBOX": "false"
}
},
{
"type": "node",
"request": "launch",
"name": "Launch E2E",
"program": "${workspaceFolder}/integration-tests/run-tests.js",
"args": ["--verbose", "--keep-output", "list_directory"],
"skipFiles": ["<node_internals>/**"],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"env": {
"GEMINI_SANDBOX": "false"
}
},
{
"name": "Launch Companion VS Code Extension",
"type": "extensionHost",

View File

@ -67,13 +67,9 @@ The integration test runner provides several options for diagnostics to help tra
You can preserve the temporary files created during a test run for inspection. This is useful for debugging issues with file system operations.
To keep the test output, you can either use the `--keep-output` flag or set the `KEEP_OUTPUT` environment variable to `true`.
To keep the test output set the `KEEP_OUTPUT` environment variable to `true`.
```bash
# Using the flag
npm run test:integration:sandbox:none -- --keep-output
# Using the environment variable
KEEP_OUTPUT=true npm run test:integration:sandbox:none
```
@ -81,20 +77,20 @@ When output is kept, the test runner will print the path to the unique directory
### Verbose output
For more detailed debugging, the `--verbose` flag streams the real-time output from the `gemini` command to the console.
For more detailed debugging, set the `VERBOSE` environment variable to `true`.
```bash
npm run test:integration:sandbox:none -- --verbose
VERBOSE=true npm run test:integration:sandbox:none
```
When using `--verbose` and `--keep-output` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory.
When using `VERBOSE=true` and `KEEP_OUTPUT=true` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory.
The verbose output is formatted to clearly identify the source of the logs:
```
--- TEST: <file-name-without-js>:<test-name> ---
--- TEST: <log dir>:<test-name> ---
... output from the gemini command ...
--- END TEST: <file-name-without-js>:<test-name> ---
--- END TEST: <log dir>:<test-name> ---
```
## Linting and formatting

View File

@ -4,86 +4,90 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { strict as assert } from 'assert';
import { test } from 'node:test';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to read a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to read a file');
rig.createFile('test.txt', 'hello world');
describe('file-system', () => {
it('should be able to read a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to read a file');
rig.createFile('test.txt', 'hello world');
const result = await rig.run(
`read the file test.txt and show me its contents`,
);
const result = await rig.run(
`read the file test.txt and show me its contents`,
);
const foundToolCall = await rig.waitForToolCall('read_file');
const foundToolCall = await rig.waitForToolCall('read_file');
// Add debugging information
if (!foundToolCall || !result.includes('hello world')) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains hello world': result.includes('hello world'),
});
}
// Add debugging information
if (!foundToolCall || !result.includes('hello world')) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains hello world': result.includes('hello world'),
});
}
assert.ok(foundToolCall, 'Expected to find a read_file tool call');
expect(
foundToolCall,
'Expected to find a read_file tool call',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'hello world', 'File read test');
});
test('should be able to write a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to write a file');
rig.createFile('test.txt', '');
const result = await rig.run(`edit test.txt to have a hello world message`);
// Accept multiple valid tools for editing files
const foundToolCall = await rig.waitForAnyToolCall([
'write_file',
'edit',
'replace',
]);
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
assert.ok(
foundToolCall,
'Expected to find a write_file, edit, or replace tool call',
);
// Validate model output - will throw if no output
validateModelOutput(result, null, 'File write test');
const fileContent = rig.readFile('test.txt');
// Add debugging for file content
if (!fileContent.toLowerCase().includes('hello')) {
const writeCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'write_file')
.map((t) => t.toolRequest.args);
printDebugInfo(rig, result, {
'File content mismatch': true,
'Expected to contain': 'hello',
'Actual content': fileContent,
'Write tool calls': JSON.stringify(writeCalls),
});
}
assert.ok(
fileContent.toLowerCase().includes('hello'),
'Expected file to contain hello',
);
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log('File written successfully with hello message.');
}
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'hello world', 'File read test');
});
it('should be able to write a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to write a file');
rig.createFile('test.txt', '');
const result = await rig.run(`edit test.txt to have a hello world message`);
// Accept multiple valid tools for editing files
const foundToolCall = await rig.waitForAnyToolCall([
'write_file',
'edit',
'replace',
]);
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
expect(
foundToolCall,
'Expected to find a write_file, edit, or replace tool call',
).toBeTruthy();
// Validate model output - will throw if no output
validateModelOutput(result, null, 'File write test');
const fileContent = rig.readFile('test.txt');
// Add debugging for file content
if (!fileContent.toLowerCase().includes('hello')) {
const writeCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'write_file')
.map((t) => t.toolRequest.args);
printDebugInfo(rig, result, {
'File content mismatch': true,
'Expected to contain': 'hello',
'Actual content': fileContent,
'Write tool calls': JSON.stringify(writeCalls),
});
}
expect(
fileContent.toLowerCase().includes('hello'),
'Expected file to contain hello',
).toBeTruthy();
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log('File written successfully with hello message.');
}
});
});

View File

@ -0,0 +1,55 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { mkdir, readdir, rm } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const rootDir = join(__dirname, '..');
const integrationTestsDir = join(rootDir, '.integration-tests');
let runDir = ''; // Make runDir accessible in teardown
export async function setup() {
runDir = join(integrationTestsDir, `${Date.now()}`);
await mkdir(runDir, { recursive: true });
// Clean up old test runs, but keep the latest few for debugging
try {
const testRuns = await readdir(integrationTestsDir);
if (testRuns.length > 5) {
const oldRuns = testRuns.sort().slice(0, testRuns.length - 5);
await Promise.all(
oldRuns.map((oldRun) =>
rm(join(integrationTestsDir, oldRun), {
recursive: true,
force: true,
}),
),
);
}
} catch (e) {
console.error('Error cleaning up old test runs:', e);
}
process.env.INTEGRATION_TEST_FILE_DIR = runDir;
process.env.GEMINI_CLI_INTEGRATION_TEST = 'true';
process.env.TELEMETRY_LOG_FILE = join(runDir, 'telemetry.log');
if (process.env.KEEP_OUTPUT) {
console.log(`Keeping output for test run in: ${runDir}`);
}
process.env.VERBOSE = process.env.VERBOSE ?? 'false';
console.log(`\nIntegration test output directory: ${runDir}`);
}
export async function teardown() {
// Cleanup the test run directory unless KEEP_OUTPUT is set
if (process.env.KEEP_OUTPUT !== 'true' && runDir) {
await rm(runDir, { recursive: true, force: true });
}
}

View File

@ -4,74 +4,78 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to search the web', async () => {
const rig = new TestRig();
await rig.setup('should be able to search the web');
describe('google_web_search', () => {
it('should be able to search the web', async () => {
const rig = new TestRig();
await rig.setup('should be able to search the web');
let result;
try {
result = await rig.run(`what is the weather in London`);
} catch (error) {
// Network errors can occur in CI environments
if (
error instanceof Error &&
(error.message.includes('network') || error.message.includes('timeout'))
) {
console.warn(
'Skipping test due to network error:',
(error as Error).message,
);
return; // Skip the test
let result;
try {
result = await rig.run(`what is the weather in London`);
} catch (error) {
// Network errors can occur in CI environments
if (
error instanceof Error &&
(error.message.includes('network') || error.message.includes('timeout'))
) {
console.warn(
'Skipping test due to network error:',
(error as Error).message,
);
return; // Skip the test
}
throw error; // Re-throw if not a network error
}
throw error; // Re-throw if not a network error
}
const foundToolCall = await rig.waitForToolCall('google_web_search');
const foundToolCall = await rig.waitForToolCall('google_web_search');
// Add debugging information
if (!foundToolCall) {
const allTools = printDebugInfo(rig, result);
// Add debugging information
if (!foundToolCall) {
const allTools = printDebugInfo(rig, result);
// Check if the tool call failed due to network issues
const failedSearchCalls = allTools.filter(
(t) =>
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
// Check if the tool call failed due to network issues
const failedSearchCalls = allTools.filter(
(t) =>
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
);
if (failedSearchCalls.length > 0) {
console.warn(
'google_web_search tool was called but failed, possibly due to network issues',
);
console.warn(
'Failed calls:',
failedSearchCalls.map((t) => t.toolRequest.args),
);
return; // Skip the test if network issues
}
}
expect(
foundToolCall,
'Expected to find a call to google_web_search',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
const hasExpectedContent = validateModelOutput(
result,
['weather', 'london'],
'Google web search test',
);
if (failedSearchCalls.length > 0) {
console.warn(
'google_web_search tool was called but failed, possibly due to network issues',
);
console.warn(
'Failed calls:',
failedSearchCalls.map((t) => t.toolRequest.args),
);
return; // Skip the test if network issues
// If content was missing, log the search queries used
if (!hasExpectedContent) {
const searchCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'google_web_search');
if (searchCalls.length > 0) {
console.warn(
'Search queries used:',
searchCalls.map((t) => t.toolRequest.args),
);
}
}
}
assert.ok(foundToolCall, 'Expected to find a call to google_web_search');
// Validate model output - will throw if no output, warn if missing expected content
const hasExpectedContent = validateModelOutput(
result,
['weather', 'london'],
'Google web search test',
);
// If content was missing, log the search queries used
if (!hasExpectedContent) {
const searchCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'google_web_search');
if (searchCalls.length > 0) {
console.warn(
'Search queries used:',
searchCalls.map((t) => t.toolRequest.args),
);
}
}
});
});

View File

@ -4,59 +4,63 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
import { existsSync } from 'fs';
import { join } from 'path';
test('should be able to list a directory', async () => {
const rig = new TestRig();
await rig.setup('should be able to list a directory');
rig.createFile('file1.txt', 'file 1 content');
rig.mkdir('subdir');
rig.sync();
describe('list_directory', () => {
it('should be able to list a directory', async () => {
const rig = new TestRig();
await rig.setup('should be able to list a directory');
rig.createFile('file1.txt', 'file 1 content');
rig.mkdir('subdir');
rig.sync();
// Poll for filesystem changes to propagate in containers
await rig.poll(
() => {
// Check if the files exist in the test directory
const file1Path = join(rig.testDir!, 'file1.txt');
const subdirPath = join(rig.testDir!, 'subdir');
return existsSync(file1Path) && existsSync(subdirPath);
},
1000, // 1 second max wait
50, // check every 50ms
);
const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`;
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('list_directory');
// Add debugging information
if (
!foundToolCall ||
!result.includes('file1.txt') ||
!result.includes('subdir')
) {
const allTools = printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains file1.txt': result.includes('file1.txt'),
'Contains subdir': result.includes('subdir'),
});
console.error(
'List directory calls:',
allTools
.filter((t) => t.toolRequest.name === 'list_directory')
.map((t) => t.toolRequest.args),
// Poll for filesystem changes to propagate in containers
await rig.poll(
() => {
// Check if the files exist in the test directory
const file1Path = join(rig.testDir!, 'file1.txt');
const subdirPath = join(rig.testDir!, 'subdir');
return existsSync(file1Path) && existsSync(subdirPath);
},
1000, // 1 second max wait
50, // check every 50ms
);
}
assert.ok(foundToolCall, 'Expected to find a list_directory tool call');
const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`;
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('list_directory');
// Add debugging information
if (
!foundToolCall ||
!result.includes('file1.txt') ||
!result.includes('subdir')
) {
const allTools = printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains file1.txt': result.includes('file1.txt'),
'Contains subdir': result.includes('subdir'),
});
console.error(
'List directory calls:',
allTools
.filter((t) => t.toolRequest.name === 'list_directory')
.map((t) => t.toolRequest.args),
);
}
expect(
foundToolCall,
'Expected to find a list_directory tool call',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
});
});

View File

@ -9,15 +9,11 @@
* and then detect and warn about the potential tools that caused the error.
*/
import { test, describe, before } from 'node:test';
import { strict as assert } from 'node:assert';
import { describe, it, beforeAll, expect } from 'vitest';
import { TestRig } from './test-helper.js';
import { join } from 'path';
import { fileURLToPath } from 'url';
import { writeFileSync } from 'fs';
const __dirname = fileURLToPath(new URL('.', import.meta.url));
// Create a minimal MCP server that doesn't require external dependencies
// This implements the MCP protocol directly using Node.js built-ins
const serverScript = `#!/usr/bin/env node
@ -160,7 +156,7 @@ rpc.send({
describe('mcp server with cyclic tool schema is detected', () => {
const rig = new TestRig();
before(async () => {
beforeAll(async () => {
// Setup test directory with MCP server configuration
await rig.setup('cyclic-schema-mcp-server', {
settings: {
@ -174,7 +170,7 @@ describe('mcp server with cyclic tool schema is detected', () => {
});
// Create server script in the test directory
const testServerPath = join(rig.testDir, 'mcp-server.cjs');
const testServerPath = join(rig.testDir!, 'mcp-server.cjs');
writeFileSync(testServerPath, serverScript);
// Make the script executable (though running with 'node' should work anyway)
@ -184,15 +180,14 @@ describe('mcp server with cyclic tool schema is detected', () => {
}
});
test('should error and suggest disabling the cyclic tool', async () => {
it('should error and suggest disabling the cyclic tool', async () => {
// Just run any command to trigger the schema depth error.
// If this test starts failing, check `isSchemaDepthError` from
// geminiChat.ts to see if it needs to be updated.
// Or, possibly it could mean that gemini has fixed the issue.
const output = await rig.run('hello');
assert.match(
output,
expect(output).toMatch(
/Skipping tool 'tool_with_cyclic_schema' from MCP server 'cyclic-schema-server' because it has missing types in its parameter schema/,
);
});

View File

@ -4,47 +4,48 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to read multiple files', async () => {
const rig = new TestRig();
await rig.setup('should be able to read multiple files');
rig.createFile('file1.txt', 'file 1 content');
rig.createFile('file2.txt', 'file 2 content');
describe('read_many_files', () => {
it('should be able to read multiple files', async () => {
const rig = new TestRig();
await rig.setup('should be able to read multiple files');
rig.createFile('file1.txt', 'file 1 content');
rig.createFile('file2.txt', 'file 2 content');
const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`;
const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`;
const result = await rig.run(prompt);
const result = await rig.run(prompt);
// Check for either read_many_files or multiple read_file calls
const allTools = rig.readToolLogs();
const readManyFilesCall = await rig.waitForToolCall('read_many_files');
const readFileCalls = allTools.filter(
(t) => t.toolRequest.name === 'read_file',
);
// Check for either read_many_files or multiple read_file calls
const allTools = rig.readToolLogs();
const readManyFilesCall = await rig.waitForToolCall('read_many_files');
const readFileCalls = allTools.filter(
(t) => t.toolRequest.name === 'read_file',
);
// Accept either read_many_files OR at least 2 read_file calls
const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2;
// Accept either read_many_files OR at least 2 read_file calls
const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2;
// Add debugging information
if (!foundValidPattern) {
printDebugInfo(rig, result, {
'read_many_files called': readManyFilesCall,
'read_file calls': readFileCalls.length,
});
}
// Add debugging information
if (!foundValidPattern) {
printDebugInfo(rig, result, {
'read_many_files called': readManyFilesCall,
'read_file calls': readFileCalls.length,
});
}
assert.ok(
foundValidPattern,
'Expected to find either read_many_files or multiple read_file tool calls',
);
expect(
foundValidPattern,
'Expected to find either read_many_files or multiple read_file tool calls',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(
result,
['file 1 content', 'file 2 content'],
'Read many files test',
);
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(
result,
['file 1 content', 'file 2 content'],
'Read many files test',
);
});
});

View File

@ -4,63 +4,60 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to replace content in a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to replace content in a file');
describe('replace', () => {
it('should be able to replace content in a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to replace content in a file');
const fileName = 'file_to_replace.txt';
const originalContent = 'original content';
const expectedContent = 'replaced content';
const fileName = 'file_to_replace.txt';
const originalContent = 'original content';
const expectedContent = 'replaced content';
rig.createFile(fileName, originalContent);
const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
rig.createFile(fileName, originalContent);
const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
const result = await rig.run(prompt);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('replace');
const foundToolCall = await rig.waitForToolCall('replace');
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
assert.ok(foundToolCall, 'Expected to find a replace tool call');
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(
result,
['replaced', 'file_to_replace.txt'],
'Replace content test',
);
const newFileContent = rig.readFile(fileName);
// Add debugging for file content
if (newFileContent !== expectedContent) {
console.error('File content mismatch - Debug info:');
console.error('Expected:', expectedContent);
console.error('Actual:', newFileContent);
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(
result,
['replaced', 'file_to_replace.txt'],
'Replace content test',
);
}
assert.strictEqual(
newFileContent,
expectedContent,
'File content should be updated correctly',
);
const newFileContent = rig.readFile(fileName);
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log('File replaced successfully. New content:', newFileContent);
}
// Add debugging for file content
if (newFileContent !== expectedContent) {
console.error('File content mismatch - Debug info:');
console.error('Expected:', expectedContent);
console.error('Actual:', newFileContent);
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
);
}
expect(newFileContent).toBe(expectedContent);
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log('File replaced successfully. New content:', newFileContent);
}
});
});

View File

@ -1,182 +0,0 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { spawnSync, spawn } from 'child_process';
import { mkdirSync, rmSync, createWriteStream } from 'fs';
import { join, dirname, basename } from 'path';
import { fileURLToPath } from 'url';
import { glob } from 'glob';
async function main() {
const __dirname = dirname(fileURLToPath(import.meta.url));
const rootDir = join(__dirname, '..');
const integrationTestsDir = join(rootDir, '.integration-tests');
if (process.env.GEMINI_SANDBOX === 'docker' && !process.env.IS_DOCKER) {
console.log('Building sandbox for Docker...');
const buildResult = spawnSync('npm', ['run', 'build:all'], {
stdio: 'inherit',
});
if (buildResult.status !== 0) {
console.error('Sandbox build failed.');
process.exit(1);
}
}
const runId = `${Date.now()}`;
const runDir = join(integrationTestsDir, runId);
mkdirSync(runDir, { recursive: true });
const args = process.argv.slice(2);
const keepOutput =
process.env.KEEP_OUTPUT === 'true' || args.includes('--keep-output');
if (keepOutput) {
const keepOutputIndex = args.indexOf('--keep-output');
if (keepOutputIndex > -1) {
args.splice(keepOutputIndex, 1);
}
console.log(`Keeping output for test run in: ${runDir}`);
}
const verbose = args.includes('--verbose');
if (verbose) {
const verboseIndex = args.indexOf('--verbose');
if (verboseIndex > -1) {
args.splice(verboseIndex, 1);
}
}
const testPatterns =
args.length > 0
? args.map((arg) => `integration-tests/${arg}.test.ts`)
: ['integration-tests/*.test.ts'];
const testFiles = glob.sync(testPatterns, { cwd: rootDir, absolute: true });
for (const testFile of testFiles) {
const testFileName = basename(testFile);
console.log(` Found test file: ${testFileName}`);
}
const MAX_RETRIES = 3;
let allTestsPassed = true;
for (const testFile of testFiles) {
const testFileName = basename(testFile);
const testFileDir = join(runDir, testFileName);
mkdirSync(testFileDir, { recursive: true });
console.log(
`------------- Running test file: ${testFileName} ------------------------------`,
);
let attempt = 0;
let testFilePassed = false;
let lastStdout = [];
let lastStderr = [];
while (attempt < MAX_RETRIES && !testFilePassed) {
attempt++;
if (attempt > 1) {
console.log(
`--- Retrying ${testFileName} (attempt ${attempt} of ${MAX_RETRIES}) ---`,
);
}
const nodeArgs = ['--test'];
if (verbose) {
nodeArgs.push('--test-reporter=spec');
}
nodeArgs.push(testFile);
const child = spawn('npx', ['tsx', ...nodeArgs], {
stdio: 'pipe',
env: {
...process.env,
GEMINI_CLI_INTEGRATION_TEST: 'true',
INTEGRATION_TEST_FILE_DIR: testFileDir,
KEEP_OUTPUT: keepOutput.toString(),
VERBOSE: verbose.toString(),
TEST_FILE_NAME: testFileName,
TELEMETRY_LOG_FILE: join(testFileDir, 'telemetry.log'),
},
});
let outputStream;
if (keepOutput) {
const outputFile = join(testFileDir, `output-attempt-${attempt}.log`);
outputStream = createWriteStream(outputFile);
console.log(`Output for ${testFileName} written to: ${outputFile}`);
}
const stdout = [];
const stderr = [];
child.stdout.on('data', (data) => {
if (verbose) {
process.stdout.write(data);
} else {
stdout.push(data);
}
if (outputStream) {
outputStream.write(data);
}
});
child.stderr.on('data', (data) => {
if (verbose) {
process.stderr.write(data);
} else {
stderr.push(data);
}
if (outputStream) {
outputStream.write(data);
}
});
const exitCode = await new Promise((resolve) => {
child.on('close', (code) => {
if (outputStream) {
outputStream.end(() => {
resolve(code);
});
} else {
resolve(code);
}
});
});
if (exitCode === 0) {
testFilePassed = true;
} else {
lastStdout = stdout;
lastStderr = stderr;
}
}
if (!testFilePassed) {
console.error(
`Test file failed after ${MAX_RETRIES} attempts: ${testFileName}`,
);
if (!verbose) {
process.stdout.write(Buffer.concat(lastStdout).toString('utf8'));
process.stderr.write(Buffer.concat(lastStderr).toString('utf8'));
}
allTestsPassed = false;
}
}
if (!keepOutput) {
rmSync(runDir, { recursive: true, force: true });
}
if (!allTestsPassed) {
console.error('One or more test files failed.');
process.exit(1);
}
}
main();

View File

@ -4,60 +4,67 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to run a shell command', async () => {
const rig = new TestRig();
await rig.setup('should be able to run a shell command');
describe('run_shell_command', () => {
it('should be able to run a shell command', async () => {
const rig = new TestRig();
await rig.setup('should be able to run a shell command');
const prompt = `Please run the command "echo hello-world" and show me the output`;
const prompt = `Please run the command "echo hello-world" and show me the output`;
const result = await rig.run(prompt);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('run_shell_command');
const foundToolCall = await rig.waitForToolCall('run_shell_command');
// Add debugging information
if (!foundToolCall || !result.includes('hello-world')) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains hello-world': result.includes('hello-world'),
});
}
// Add debugging information
if (!foundToolCall || !result.includes('hello-world')) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains hello-world': result.includes('hello-world'),
});
}
assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
expect(
foundToolCall,
'Expected to find a run_shell_command tool call',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
// Model often reports exit code instead of showing output
validateModelOutput(
result,
['hello-world', 'exit code 0'],
'Shell command test',
);
});
test('should be able to run a shell command via stdin', async () => {
const rig = new TestRig();
await rig.setup('should be able to run a shell command via stdin');
const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
const result = await rig.run({ stdin: prompt });
const foundToolCall = await rig.waitForToolCall('run_shell_command');
// Add debugging information
if (!foundToolCall || !result.includes('test-stdin')) {
printDebugInfo(rig, result, {
'Test type': 'Stdin test',
'Found tool call': foundToolCall,
'Contains test-stdin': result.includes('test-stdin'),
});
}
assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
// Validate model output - will throw if no output, warn if missing expected content
// Model often reports exit code instead of showing output
validateModelOutput(
result,
['hello-world', 'exit code 0'],
'Shell command test',
);
});
it('should be able to run a shell command via stdin', async () => {
const rig = new TestRig();
await rig.setup('should be able to run a shell command via stdin');
const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
const result = await rig.run({ stdin: prompt });
const foundToolCall = await rig.waitForToolCall('run_shell_command');
// Add debugging information
if (!foundToolCall || !result.includes('test-stdin')) {
printDebugInfo(rig, result, {
'Test type': 'Stdin test',
'Found tool call': foundToolCall,
'Contains test-stdin': result.includes('test-stdin'),
});
}
expect(
foundToolCall,
'Expected to find a run_shell_command tool call',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
});
});

View File

@ -4,38 +4,42 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to save to memory', async () => {
const rig = new TestRig();
await rig.setup('should be able to save to memory');
describe('save_memory', () => {
it('should be able to save to memory', async () => {
const rig = new TestRig();
await rig.setup('should be able to save to memory');
const prompt = `remember that my favorite color is blue.
const prompt = `remember that my favorite color is blue.
what is my favorite color? tell me that and surround it with $ symbol`;
const result = await rig.run(prompt);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('save_memory');
const foundToolCall = await rig.waitForToolCall('save_memory');
// Add debugging information
if (!foundToolCall || !result.toLowerCase().includes('blue')) {
const allTools = printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains blue': result.toLowerCase().includes('blue'),
});
// Add debugging information
if (!foundToolCall || !result.toLowerCase().includes('blue')) {
const allTools = printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains blue': result.toLowerCase().includes('blue'),
});
console.error(
'Memory tool calls:',
allTools
.filter((t) => t.toolRequest.name === 'save_memory')
.map((t) => t.toolRequest.args),
);
}
console.error(
'Memory tool calls:',
allTools
.filter((t) => t.toolRequest.name === 'save_memory')
.map((t) => t.toolRequest.args),
);
}
assert.ok(foundToolCall, 'Expected to find a save_memory tool call');
expect(
foundToolCall,
'Expected to find a save_memory tool call',
).toBeTruthy();
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'blue', 'Save memory test');
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'blue', 'Save memory test');
});
});

View File

@ -10,8 +10,7 @@
* external dependencies, making it compatible with Docker sandbox mode.
*/
import { test, describe, before } from 'node:test';
import { strict as assert } from 'node:assert';
import { describe, it, beforeAll, expect } from 'vitest';
import { TestRig, validateModelOutput } from './test-helper.js';
import { join } from 'path';
import { writeFileSync } from 'fs';
@ -168,7 +167,7 @@ rpc.send({
describe('simple-mcp-server', () => {
const rig = new TestRig();
before(async () => {
beforeAll(async () => {
// Setup test directory with MCP server configuration
await rig.setup('simple-mcp-server', {
settings: {
@ -192,17 +191,20 @@ describe('simple-mcp-server', () => {
}
});
test('should add two numbers', async () => {
it('should add two numbers', async () => {
// Test directory is already set up in before hook
// Just run the command - MCP server config is in settings.json
const output = await rig.run('add 5 and 10');
const foundToolCall = await rig.waitForToolCall('add');
assert.ok(foundToolCall, 'Expected to find an add tool call');
expect(foundToolCall, 'Expected to find an add tool call').toBeTruthy();
// Validate model output - will throw if no output, fail if missing expected content
validateModelOutput(output, '15', 'MCP server test');
assert.ok(output.includes('15'), 'Expected output to contain the sum (15)');
expect(
output.includes('15'),
'Expected output to contain the sum (15)',
).toBeTruthy();
});
});

View File

@ -10,7 +10,7 @@ import { mkdirSync, writeFileSync, readFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { env } from 'process';
import { fileExists } from '../scripts/telemetry_utils.js';
import fs from 'fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
@ -297,15 +297,12 @@ export class TestRig {
}
readFile(fileName: string) {
const content = readFileSync(join(this.testDir!, fileName), 'utf-8');
const filePath = join(this.testDir!, fileName);
const content = readFileSync(filePath, 'utf-8');
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
const testId = `${env.TEST_FILE_NAME!.replace(
'.test.js',
'',
)}:${this.testName!.replace(/ /g, '-')}`;
console.log(`--- FILE: ${testId}/${fileName} ---`);
console.log(`--- FILE: ${filePath} ---`);
console.log(content);
console.log(`--- END FILE: ${testId}/${fileName} ---`);
console.log(`--- END FILE: ${filePath} ---`);
}
return content;
}
@ -336,7 +333,7 @@ export class TestRig {
// Wait for telemetry file to exist and have content
await this.poll(
() => {
if (!fileExists(logFilePath)) return false;
if (!fs.existsSync(logFilePath)) return false;
try {
const content = readFileSync(logFilePath, 'utf-8');
// Check if file has meaningful content (at least one complete JSON object)
@ -547,7 +544,7 @@ export class TestRig {
// Try reading from file first
const logFilePath = join(this.testDir!, 'telemetry.log');
if (fileExists(logFilePath)) {
if (fs.existsSync(logFilePath)) {
try {
const content = readFileSync(logFilePath, 'utf-8');
if (content && content.includes('"event.name"')) {
@ -581,7 +578,7 @@ export class TestRig {
}
// Check if file exists, if not return empty array (file might not be created yet)
if (!fileExists(logFilePath)) {
if (!fs.existsSync(logFilePath)) {
return [];
}

View File

@ -0,0 +1,18 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
testTimeout: 300000, // 5 minutes
globalSetup: './globalSetup.ts',
reporters: ['default'],
include: ['**/*.test.ts'],
retry: 2,
fileParallelism: false,
},
});

View File

@ -4,8 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { describe, it, expect } from 'vitest';
import {
TestRig,
createToolCallErrorMessage,
@ -13,56 +12,57 @@ import {
validateModelOutput,
} from './test-helper.js';
test('should be able to write a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to write a file');
const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
describe('write_file', () => {
it('should be able to write a file', async () => {
const rig = new TestRig();
await rig.setup('should be able to write a file');
const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
const result = await rig.run(prompt);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('write_file');
const foundToolCall = await rig.waitForToolCall('write_file');
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
const allTools = rig.readToolLogs();
assert.ok(
foundToolCall,
createToolCallErrorMessage(
'write_file',
allTools.map((t) => t.toolRequest.name),
result,
),
);
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'dad.txt', 'Write file test');
const newFilePath = 'dad.txt';
const newFileContent = rig.readFile(newFilePath);
// Add debugging for file content
if (newFileContent === '') {
console.error('File was created but is empty');
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
const allTools = rig.readToolLogs();
expect(foundToolCall, 'Expected to find a write_file tool call').toBeTruthy(
createToolCallErrorMessage(
'write_file',
allTools.map((t) => t.toolRequest.name),
result,
),
);
}
assert.notEqual(newFileContent, '', 'Expected file to have content');
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'dad.txt', 'Write file test');
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log(
'File created successfully with content:',
newFileContent.substring(0, 100) + '...',
);
}
const newFilePath = 'dad.txt';
const newFileContent = rig.readFile(newFilePath);
// Add debugging for file content
if (newFileContent === '') {
console.error('File was created but is empty');
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
);
}
expect(newFileContent).not.toBe('');
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log(
'File created successfully with content:',
newFileContent.substring(0, 100) + '...',
);
}
});
});

View File

@ -33,11 +33,11 @@
"test": "npm run test --workspaces --if-present",
"test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts",
"test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts",
"test:e2e": "npm run test:integration:sandbox:none -- --verbose --keep-output",
"test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none",
"test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
"test:integration:sandbox:none": "GEMINI_SANDBOX=false node integration-tests/run-tests.js",
"test:integration:sandbox:docker": "GEMINI_SANDBOX=docker node integration-tests/run-tests.js",
"test:integration:sandbox:podman": "GEMINI_SANDBOX=podman node integration-tests/run-tests.js",
"test:integration:sandbox:none": "GEMINI_SANDBOX=false vitest run --root ./integration-tests",
"test:integration:sandbox:docker": "npm run build:sandbox && GEMINI_SANDBOX=docker vitest run --root ./integration-tests",
"test:integration:sandbox:podman": "GEMINI_SANDBOX=podman vitest run --root ./integration-tests",
"lint": "eslint . --ext .ts,.tsx && eslint integration-tests",
"lint:fix": "eslint . --fix && eslint integration-tests --fix",
"lint:ci": "eslint . --ext .ts,.tsx --max-warnings 0 && eslint integration-tests --max-warnings 0",