From 9d023be1d16a6bf7427569f863e6cfd2c3442d8b Mon Sep 17 00:00:00 2001 From: Tommaso Sciortino Date: Tue, 12 Aug 2025 15:57:27 -0700 Subject: [PATCH] Upgrade integration tests to use Vitest (#6021) --- .github/workflows/e2e.yml | 4 +- .vscode/launch.json | 13 -- docs/integration-tests.md | 16 +- integration-tests/file-system.test.ts | 154 +++++++-------- integration-tests/globalSetup.ts | 55 ++++++ integration-tests/google_web_search.test.ts | 126 ++++++------ integration-tests/list_directory.test.ts | 98 +++++----- ...st.js => mcp_server_cyclic_schema.test.ts} | 15 +- integration-tests/read_many_files.test.ts | 69 +++---- integration-tests/replace.test.ts | 91 +++++---- integration-tests/run-tests.js | 182 ------------------ integration-tests/run_shell_command.test.ts | 103 +++++----- integration-tests/save_memory.test.ts | 52 ++--- integration-tests/simple-mcp-server.test.ts | 14 +- integration-tests/test-helper.ts | 19 +- integration-tests/vitest.config.ts | 18 ++ integration-tests/write_file.test.ts | 94 ++++----- package.json | 8 +- 18 files changed, 511 insertions(+), 620 deletions(-) create mode 100644 integration-tests/globalSetup.ts rename integration-tests/{mcp_server_cyclic_schema.test.js => mcp_server_cyclic_schema.test.ts} (92%) delete mode 100644 integration-tests/run-tests.js create mode 100644 integration-tests/vitest.config.ts diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index f9f82422..a6f6ad5f 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -46,7 +46,9 @@ jobs: - name: Run E2E tests env: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - run: npm run test:integration:${{ matrix.sandbox }} -- --verbose --keep-output + VERBOSE: true + KEEP_OUTPUT: true + run: npm run test:integration:${{ matrix.sandbox }} e2e-test-macos: name: E2E Test - macOS diff --git a/.vscode/launch.json b/.vscode/launch.json index 97c9eba5..6e4a7605 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,19 +17,6 @@ "GEMINI_SANDBOX": "false" } }, - { - "type": "node", - "request": "launch", - "name": "Launch E2E", - "program": "${workspaceFolder}/integration-tests/run-tests.js", - "args": ["--verbose", "--keep-output", "list_directory"], - "skipFiles": ["/**"], - "cwd": "${workspaceFolder}", - "console": "integratedTerminal", - "env": { - "GEMINI_SANDBOX": "false" - } - }, { "name": "Launch Companion VS Code Extension", "type": "extensionHost", diff --git a/docs/integration-tests.md b/docs/integration-tests.md index 7a4c8489..6289b7a7 100644 --- a/docs/integration-tests.md +++ b/docs/integration-tests.md @@ -67,13 +67,9 @@ The integration test runner provides several options for diagnostics to help tra You can preserve the temporary files created during a test run for inspection. This is useful for debugging issues with file system operations. -To keep the test output, you can either use the `--keep-output` flag or set the `KEEP_OUTPUT` environment variable to `true`. +To keep the test output set the `KEEP_OUTPUT` environment variable to `true`. ```bash -# Using the flag -npm run test:integration:sandbox:none -- --keep-output - -# Using the environment variable KEEP_OUTPUT=true npm run test:integration:sandbox:none ``` @@ -81,20 +77,20 @@ When output is kept, the test runner will print the path to the unique directory ### Verbose output -For more detailed debugging, the `--verbose` flag streams the real-time output from the `gemini` command to the console. +For more detailed debugging, set the `VERBOSE` environment variable to `true`. ```bash -npm run test:integration:sandbox:none -- --verbose +VERBOSE=true npm run test:integration:sandbox:none ``` -When using `--verbose` and `--keep-output` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory. +When using `VERBOSE=true` and `KEEP_OUTPUT=true` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory. The verbose output is formatted to clearly identify the source of the logs: ``` ---- TEST: : --- +--- TEST: : --- ... output from the gemini command ... ---- END TEST: : --- +--- END TEST: : --- ``` ## Linting and formatting diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index d43f047f..5a7028e0 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -4,86 +4,90 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { strict as assert } from 'assert'; -import { test } from 'node:test'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to read a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to read a file'); - rig.createFile('test.txt', 'hello world'); +describe('file-system', () => { + it('should be able to read a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to read a file'); + rig.createFile('test.txt', 'hello world'); - const result = await rig.run( - `read the file test.txt and show me its contents`, - ); + const result = await rig.run( + `read the file test.txt and show me its contents`, + ); - const foundToolCall = await rig.waitForToolCall('read_file'); + const foundToolCall = await rig.waitForToolCall('read_file'); - // Add debugging information - if (!foundToolCall || !result.includes('hello world')) { - printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains hello world': result.includes('hello world'), - }); - } + // Add debugging information + if (!foundToolCall || !result.includes('hello world')) { + printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains hello world': result.includes('hello world'), + }); + } - assert.ok(foundToolCall, 'Expected to find a read_file tool call'); + expect( + foundToolCall, + 'Expected to find a read_file tool call', + ).toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'hello world', 'File read test'); -}); - -test('should be able to write a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to write a file'); - rig.createFile('test.txt', ''); - - const result = await rig.run(`edit test.txt to have a hello world message`); - - // Accept multiple valid tools for editing files - const foundToolCall = await rig.waitForAnyToolCall([ - 'write_file', - 'edit', - 'replace', - ]); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - - assert.ok( - foundToolCall, - 'Expected to find a write_file, edit, or replace tool call', - ); - - // Validate model output - will throw if no output - validateModelOutput(result, null, 'File write test'); - - const fileContent = rig.readFile('test.txt'); - - // Add debugging for file content - if (!fileContent.toLowerCase().includes('hello')) { - const writeCalls = rig - .readToolLogs() - .filter((t) => t.toolRequest.name === 'write_file') - .map((t) => t.toolRequest.args); - - printDebugInfo(rig, result, { - 'File content mismatch': true, - 'Expected to contain': 'hello', - 'Actual content': fileContent, - 'Write tool calls': JSON.stringify(writeCalls), - }); - } - - assert.ok( - fileContent.toLowerCase().includes('hello'), - 'Expected file to contain hello', - ); - - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log('File written successfully with hello message.'); - } + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'hello world', 'File read test'); + }); + + it('should be able to write a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to write a file'); + rig.createFile('test.txt', ''); + + const result = await rig.run(`edit test.txt to have a hello world message`); + + // Accept multiple valid tools for editing files + const foundToolCall = await rig.waitForAnyToolCall([ + 'write_file', + 'edit', + 'replace', + ]); + + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } + + expect( + foundToolCall, + 'Expected to find a write_file, edit, or replace tool call', + ).toBeTruthy(); + + // Validate model output - will throw if no output + validateModelOutput(result, null, 'File write test'); + + const fileContent = rig.readFile('test.txt'); + + // Add debugging for file content + if (!fileContent.toLowerCase().includes('hello')) { + const writeCalls = rig + .readToolLogs() + .filter((t) => t.toolRequest.name === 'write_file') + .map((t) => t.toolRequest.args); + + printDebugInfo(rig, result, { + 'File content mismatch': true, + 'Expected to contain': 'hello', + 'Actual content': fileContent, + 'Write tool calls': JSON.stringify(writeCalls), + }); + } + + expect( + fileContent.toLowerCase().includes('hello'), + 'Expected file to contain hello', + ).toBeTruthy(); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log('File written successfully with hello message.'); + } + }); }); diff --git a/integration-tests/globalSetup.ts b/integration-tests/globalSetup.ts new file mode 100644 index 00000000..89ca203f --- /dev/null +++ b/integration-tests/globalSetup.ts @@ -0,0 +1,55 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { mkdir, readdir, rm } from 'fs/promises'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const rootDir = join(__dirname, '..'); +const integrationTestsDir = join(rootDir, '.integration-tests'); +let runDir = ''; // Make runDir accessible in teardown + +export async function setup() { + runDir = join(integrationTestsDir, `${Date.now()}`); + await mkdir(runDir, { recursive: true }); + + // Clean up old test runs, but keep the latest few for debugging + try { + const testRuns = await readdir(integrationTestsDir); + if (testRuns.length > 5) { + const oldRuns = testRuns.sort().slice(0, testRuns.length - 5); + await Promise.all( + oldRuns.map((oldRun) => + rm(join(integrationTestsDir, oldRun), { + recursive: true, + force: true, + }), + ), + ); + } + } catch (e) { + console.error('Error cleaning up old test runs:', e); + } + + process.env.INTEGRATION_TEST_FILE_DIR = runDir; + process.env.GEMINI_CLI_INTEGRATION_TEST = 'true'; + process.env.TELEMETRY_LOG_FILE = join(runDir, 'telemetry.log'); + + if (process.env.KEEP_OUTPUT) { + console.log(`Keeping output for test run in: ${runDir}`); + } + process.env.VERBOSE = process.env.VERBOSE ?? 'false'; + + console.log(`\nIntegration test output directory: ${runDir}`); +} + +export async function teardown() { + // Cleanup the test run directory unless KEEP_OUTPUT is set + if (process.env.KEEP_OUTPUT !== 'true' && runDir) { + await rm(runDir, { recursive: true, force: true }); + } +} diff --git a/integration-tests/google_web_search.test.ts b/integration-tests/google_web_search.test.ts index 6fb365a0..698edfe5 100644 --- a/integration-tests/google_web_search.test.ts +++ b/integration-tests/google_web_search.test.ts @@ -4,74 +4,78 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to search the web', async () => { - const rig = new TestRig(); - await rig.setup('should be able to search the web'); +describe('google_web_search', () => { + it('should be able to search the web', async () => { + const rig = new TestRig(); + await rig.setup('should be able to search the web'); - let result; - try { - result = await rig.run(`what is the weather in London`); - } catch (error) { - // Network errors can occur in CI environments - if ( - error instanceof Error && - (error.message.includes('network') || error.message.includes('timeout')) - ) { - console.warn( - 'Skipping test due to network error:', - (error as Error).message, - ); - return; // Skip the test + let result; + try { + result = await rig.run(`what is the weather in London`); + } catch (error) { + // Network errors can occur in CI environments + if ( + error instanceof Error && + (error.message.includes('network') || error.message.includes('timeout')) + ) { + console.warn( + 'Skipping test due to network error:', + (error as Error).message, + ); + return; // Skip the test + } + throw error; // Re-throw if not a network error } - throw error; // Re-throw if not a network error - } - const foundToolCall = await rig.waitForToolCall('google_web_search'); + const foundToolCall = await rig.waitForToolCall('google_web_search'); - // Add debugging information - if (!foundToolCall) { - const allTools = printDebugInfo(rig, result); + // Add debugging information + if (!foundToolCall) { + const allTools = printDebugInfo(rig, result); - // Check if the tool call failed due to network issues - const failedSearchCalls = allTools.filter( - (t) => - t.toolRequest.name === 'google_web_search' && !t.toolRequest.success, + // Check if the tool call failed due to network issues + const failedSearchCalls = allTools.filter( + (t) => + t.toolRequest.name === 'google_web_search' && !t.toolRequest.success, + ); + if (failedSearchCalls.length > 0) { + console.warn( + 'google_web_search tool was called but failed, possibly due to network issues', + ); + console.warn( + 'Failed calls:', + failedSearchCalls.map((t) => t.toolRequest.args), + ); + return; // Skip the test if network issues + } + } + + expect( + foundToolCall, + 'Expected to find a call to google_web_search', + ).toBeTruthy(); + + // Validate model output - will throw if no output, warn if missing expected content + const hasExpectedContent = validateModelOutput( + result, + ['weather', 'london'], + 'Google web search test', ); - if (failedSearchCalls.length > 0) { - console.warn( - 'google_web_search tool was called but failed, possibly due to network issues', - ); - console.warn( - 'Failed calls:', - failedSearchCalls.map((t) => t.toolRequest.args), - ); - return; // Skip the test if network issues + + // If content was missing, log the search queries used + if (!hasExpectedContent) { + const searchCalls = rig + .readToolLogs() + .filter((t) => t.toolRequest.name === 'google_web_search'); + if (searchCalls.length > 0) { + console.warn( + 'Search queries used:', + searchCalls.map((t) => t.toolRequest.args), + ); + } } - } - - assert.ok(foundToolCall, 'Expected to find a call to google_web_search'); - - // Validate model output - will throw if no output, warn if missing expected content - const hasExpectedContent = validateModelOutput( - result, - ['weather', 'london'], - 'Google web search test', - ); - - // If content was missing, log the search queries used - if (!hasExpectedContent) { - const searchCalls = rig - .readToolLogs() - .filter((t) => t.toolRequest.name === 'google_web_search'); - if (searchCalls.length > 0) { - console.warn( - 'Search queries used:', - searchCalls.map((t) => t.toolRequest.args), - ); - } - } + }); }); diff --git a/integration-tests/list_directory.test.ts b/integration-tests/list_directory.test.ts index 023eca12..38416f4f 100644 --- a/integration-tests/list_directory.test.ts +++ b/integration-tests/list_directory.test.ts @@ -4,59 +4,63 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; import { existsSync } from 'fs'; import { join } from 'path'; -test('should be able to list a directory', async () => { - const rig = new TestRig(); - await rig.setup('should be able to list a directory'); - rig.createFile('file1.txt', 'file 1 content'); - rig.mkdir('subdir'); - rig.sync(); +describe('list_directory', () => { + it('should be able to list a directory', async () => { + const rig = new TestRig(); + await rig.setup('should be able to list a directory'); + rig.createFile('file1.txt', 'file 1 content'); + rig.mkdir('subdir'); + rig.sync(); - // Poll for filesystem changes to propagate in containers - await rig.poll( - () => { - // Check if the files exist in the test directory - const file1Path = join(rig.testDir!, 'file1.txt'); - const subdirPath = join(rig.testDir!, 'subdir'); - return existsSync(file1Path) && existsSync(subdirPath); - }, - 1000, // 1 second max wait - 50, // check every 50ms - ); - - const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`; - - const result = await rig.run(prompt); - - const foundToolCall = await rig.waitForToolCall('list_directory'); - - // Add debugging information - if ( - !foundToolCall || - !result.includes('file1.txt') || - !result.includes('subdir') - ) { - const allTools = printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains file1.txt': result.includes('file1.txt'), - 'Contains subdir': result.includes('subdir'), - }); - - console.error( - 'List directory calls:', - allTools - .filter((t) => t.toolRequest.name === 'list_directory') - .map((t) => t.toolRequest.args), + // Poll for filesystem changes to propagate in containers + await rig.poll( + () => { + // Check if the files exist in the test directory + const file1Path = join(rig.testDir!, 'file1.txt'); + const subdirPath = join(rig.testDir!, 'subdir'); + return existsSync(file1Path) && existsSync(subdirPath); + }, + 1000, // 1 second max wait + 50, // check every 50ms ); - } - assert.ok(foundToolCall, 'Expected to find a list_directory tool call'); + const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`; - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test'); + const result = await rig.run(prompt); + + const foundToolCall = await rig.waitForToolCall('list_directory'); + + // Add debugging information + if ( + !foundToolCall || + !result.includes('file1.txt') || + !result.includes('subdir') + ) { + const allTools = printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains file1.txt': result.includes('file1.txt'), + 'Contains subdir': result.includes('subdir'), + }); + + console.error( + 'List directory calls:', + allTools + .filter((t) => t.toolRequest.name === 'list_directory') + .map((t) => t.toolRequest.args), + ); + } + + expect( + foundToolCall, + 'Expected to find a list_directory tool call', + ).toBeTruthy(); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test'); + }); }); diff --git a/integration-tests/mcp_server_cyclic_schema.test.js b/integration-tests/mcp_server_cyclic_schema.test.ts similarity index 92% rename from integration-tests/mcp_server_cyclic_schema.test.js rename to integration-tests/mcp_server_cyclic_schema.test.ts index 1ace98f1..18c1bcde 100644 --- a/integration-tests/mcp_server_cyclic_schema.test.js +++ b/integration-tests/mcp_server_cyclic_schema.test.ts @@ -9,15 +9,11 @@ * and then detect and warn about the potential tools that caused the error. */ -import { test, describe, before } from 'node:test'; -import { strict as assert } from 'node:assert'; +import { describe, it, beforeAll, expect } from 'vitest'; import { TestRig } from './test-helper.js'; import { join } from 'path'; -import { fileURLToPath } from 'url'; import { writeFileSync } from 'fs'; -const __dirname = fileURLToPath(new URL('.', import.meta.url)); - // Create a minimal MCP server that doesn't require external dependencies // This implements the MCP protocol directly using Node.js built-ins const serverScript = `#!/usr/bin/env node @@ -160,7 +156,7 @@ rpc.send({ describe('mcp server with cyclic tool schema is detected', () => { const rig = new TestRig(); - before(async () => { + beforeAll(async () => { // Setup test directory with MCP server configuration await rig.setup('cyclic-schema-mcp-server', { settings: { @@ -174,7 +170,7 @@ describe('mcp server with cyclic tool schema is detected', () => { }); // Create server script in the test directory - const testServerPath = join(rig.testDir, 'mcp-server.cjs'); + const testServerPath = join(rig.testDir!, 'mcp-server.cjs'); writeFileSync(testServerPath, serverScript); // Make the script executable (though running with 'node' should work anyway) @@ -184,15 +180,14 @@ describe('mcp server with cyclic tool schema is detected', () => { } }); - test('should error and suggest disabling the cyclic tool', async () => { + it('should error and suggest disabling the cyclic tool', async () => { // Just run any command to trigger the schema depth error. // If this test starts failing, check `isSchemaDepthError` from // geminiChat.ts to see if it needs to be updated. // Or, possibly it could mean that gemini has fixed the issue. const output = await rig.run('hello'); - assert.match( - output, + expect(output).toMatch( /Skipping tool 'tool_with_cyclic_schema' from MCP server 'cyclic-schema-server' because it has missing types in its parameter schema/, ); }); diff --git a/integration-tests/read_many_files.test.ts b/integration-tests/read_many_files.test.ts index 74d2f358..8e839a6a 100644 --- a/integration-tests/read_many_files.test.ts +++ b/integration-tests/read_many_files.test.ts @@ -4,47 +4,48 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to read multiple files', async () => { - const rig = new TestRig(); - await rig.setup('should be able to read multiple files'); - rig.createFile('file1.txt', 'file 1 content'); - rig.createFile('file2.txt', 'file 2 content'); +describe('read_many_files', () => { + it('should be able to read multiple files', async () => { + const rig = new TestRig(); + await rig.setup('should be able to read multiple files'); + rig.createFile('file1.txt', 'file 1 content'); + rig.createFile('file2.txt', 'file 2 content'); - const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`; + const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`; - const result = await rig.run(prompt); + const result = await rig.run(prompt); - // Check for either read_many_files or multiple read_file calls - const allTools = rig.readToolLogs(); - const readManyFilesCall = await rig.waitForToolCall('read_many_files'); - const readFileCalls = allTools.filter( - (t) => t.toolRequest.name === 'read_file', - ); + // Check for either read_many_files or multiple read_file calls + const allTools = rig.readToolLogs(); + const readManyFilesCall = await rig.waitForToolCall('read_many_files'); + const readFileCalls = allTools.filter( + (t) => t.toolRequest.name === 'read_file', + ); - // Accept either read_many_files OR at least 2 read_file calls - const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2; + // Accept either read_many_files OR at least 2 read_file calls + const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2; - // Add debugging information - if (!foundValidPattern) { - printDebugInfo(rig, result, { - 'read_many_files called': readManyFilesCall, - 'read_file calls': readFileCalls.length, - }); - } + // Add debugging information + if (!foundValidPattern) { + printDebugInfo(rig, result, { + 'read_many_files called': readManyFilesCall, + 'read_file calls': readFileCalls.length, + }); + } - assert.ok( - foundValidPattern, - 'Expected to find either read_many_files or multiple read_file tool calls', - ); + expect( + foundValidPattern, + 'Expected to find either read_many_files or multiple read_file tool calls', + ).toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput( - result, - ['file 1 content', 'file 2 content'], - 'Read many files test', - ); + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput( + result, + ['file 1 content', 'file 2 content'], + 'Read many files test', + ); + }); }); diff --git a/integration-tests/replace.test.ts b/integration-tests/replace.test.ts index 1ac6f5a4..3a2d979b 100644 --- a/integration-tests/replace.test.ts +++ b/integration-tests/replace.test.ts @@ -4,63 +4,60 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to replace content in a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to replace content in a file'); +describe('replace', () => { + it('should be able to replace content in a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to replace content in a file'); - const fileName = 'file_to_replace.txt'; - const originalContent = 'original content'; - const expectedContent = 'replaced content'; + const fileName = 'file_to_replace.txt'; + const originalContent = 'original content'; + const expectedContent = 'replaced content'; - rig.createFile(fileName, originalContent); - const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`; + rig.createFile(fileName, originalContent); + const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`; - const result = await rig.run(prompt); + const result = await rig.run(prompt); - const foundToolCall = await rig.waitForToolCall('replace'); + const foundToolCall = await rig.waitForToolCall('replace'); - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } - assert.ok(foundToolCall, 'Expected to find a replace tool call'); + expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput( - result, - ['replaced', 'file_to_replace.txt'], - 'Replace content test', - ); - - const newFileContent = rig.readFile(fileName); - - // Add debugging for file content - if (newFileContent !== expectedContent) { - console.error('File content mismatch - Debug info:'); - console.error('Expected:', expectedContent); - console.error('Actual:', newFileContent); - console.error( - 'Tool calls:', - rig.readToolLogs().map((t) => ({ - name: t.toolRequest.name, - args: t.toolRequest.args, - })), + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput( + result, + ['replaced', 'file_to_replace.txt'], + 'Replace content test', ); - } - assert.strictEqual( - newFileContent, - expectedContent, - 'File content should be updated correctly', - ); + const newFileContent = rig.readFile(fileName); - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log('File replaced successfully. New content:', newFileContent); - } + // Add debugging for file content + if (newFileContent !== expectedContent) { + console.error('File content mismatch - Debug info:'); + console.error('Expected:', expectedContent); + console.error('Actual:', newFileContent); + console.error( + 'Tool calls:', + rig.readToolLogs().map((t) => ({ + name: t.toolRequest.name, + args: t.toolRequest.args, + })), + ); + } + + expect(newFileContent).toBe(expectedContent); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log('File replaced successfully. New content:', newFileContent); + } + }); }); diff --git a/integration-tests/run-tests.js b/integration-tests/run-tests.js deleted file mode 100644 index b33e1afa..00000000 --- a/integration-tests/run-tests.js +++ /dev/null @@ -1,182 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { spawnSync, spawn } from 'child_process'; -import { mkdirSync, rmSync, createWriteStream } from 'fs'; -import { join, dirname, basename } from 'path'; -import { fileURLToPath } from 'url'; -import { glob } from 'glob'; - -async function main() { - const __dirname = dirname(fileURLToPath(import.meta.url)); - const rootDir = join(__dirname, '..'); - const integrationTestsDir = join(rootDir, '.integration-tests'); - - if (process.env.GEMINI_SANDBOX === 'docker' && !process.env.IS_DOCKER) { - console.log('Building sandbox for Docker...'); - const buildResult = spawnSync('npm', ['run', 'build:all'], { - stdio: 'inherit', - }); - if (buildResult.status !== 0) { - console.error('Sandbox build failed.'); - process.exit(1); - } - } - - const runId = `${Date.now()}`; - const runDir = join(integrationTestsDir, runId); - - mkdirSync(runDir, { recursive: true }); - - const args = process.argv.slice(2); - const keepOutput = - process.env.KEEP_OUTPUT === 'true' || args.includes('--keep-output'); - if (keepOutput) { - const keepOutputIndex = args.indexOf('--keep-output'); - if (keepOutputIndex > -1) { - args.splice(keepOutputIndex, 1); - } - console.log(`Keeping output for test run in: ${runDir}`); - } - - const verbose = args.includes('--verbose'); - if (verbose) { - const verboseIndex = args.indexOf('--verbose'); - if (verboseIndex > -1) { - args.splice(verboseIndex, 1); - } - } - - const testPatterns = - args.length > 0 - ? args.map((arg) => `integration-tests/${arg}.test.ts`) - : ['integration-tests/*.test.ts']; - const testFiles = glob.sync(testPatterns, { cwd: rootDir, absolute: true }); - - for (const testFile of testFiles) { - const testFileName = basename(testFile); - console.log(` Found test file: ${testFileName}`); - } - - const MAX_RETRIES = 3; - let allTestsPassed = true; - - for (const testFile of testFiles) { - const testFileName = basename(testFile); - const testFileDir = join(runDir, testFileName); - mkdirSync(testFileDir, { recursive: true }); - - console.log( - `------------- Running test file: ${testFileName} ------------------------------`, - ); - - let attempt = 0; - let testFilePassed = false; - let lastStdout = []; - let lastStderr = []; - - while (attempt < MAX_RETRIES && !testFilePassed) { - attempt++; - if (attempt > 1) { - console.log( - `--- Retrying ${testFileName} (attempt ${attempt} of ${MAX_RETRIES}) ---`, - ); - } - - const nodeArgs = ['--test']; - if (verbose) { - nodeArgs.push('--test-reporter=spec'); - } - nodeArgs.push(testFile); - - const child = spawn('npx', ['tsx', ...nodeArgs], { - stdio: 'pipe', - env: { - ...process.env, - GEMINI_CLI_INTEGRATION_TEST: 'true', - INTEGRATION_TEST_FILE_DIR: testFileDir, - KEEP_OUTPUT: keepOutput.toString(), - VERBOSE: verbose.toString(), - TEST_FILE_NAME: testFileName, - TELEMETRY_LOG_FILE: join(testFileDir, 'telemetry.log'), - }, - }); - - let outputStream; - if (keepOutput) { - const outputFile = join(testFileDir, `output-attempt-${attempt}.log`); - outputStream = createWriteStream(outputFile); - console.log(`Output for ${testFileName} written to: ${outputFile}`); - } - - const stdout = []; - const stderr = []; - - child.stdout.on('data', (data) => { - if (verbose) { - process.stdout.write(data); - } else { - stdout.push(data); - } - if (outputStream) { - outputStream.write(data); - } - }); - - child.stderr.on('data', (data) => { - if (verbose) { - process.stderr.write(data); - } else { - stderr.push(data); - } - if (outputStream) { - outputStream.write(data); - } - }); - - const exitCode = await new Promise((resolve) => { - child.on('close', (code) => { - if (outputStream) { - outputStream.end(() => { - resolve(code); - }); - } else { - resolve(code); - } - }); - }); - - if (exitCode === 0) { - testFilePassed = true; - } else { - lastStdout = stdout; - lastStderr = stderr; - } - } - - if (!testFilePassed) { - console.error( - `Test file failed after ${MAX_RETRIES} attempts: ${testFileName}`, - ); - if (!verbose) { - process.stdout.write(Buffer.concat(lastStdout).toString('utf8')); - process.stderr.write(Buffer.concat(lastStderr).toString('utf8')); - } - allTestsPassed = false; - } - } - - if (!keepOutput) { - rmSync(runDir, { recursive: true, force: true }); - } - - if (!allTestsPassed) { - console.error('One or more test files failed.'); - process.exit(1); - } -} - -main(); diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts index 2a5f9ed4..a1aa08ae 100644 --- a/integration-tests/run_shell_command.test.ts +++ b/integration-tests/run_shell_command.test.ts @@ -4,60 +4,67 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to run a shell command', async () => { - const rig = new TestRig(); - await rig.setup('should be able to run a shell command'); +describe('run_shell_command', () => { + it('should be able to run a shell command', async () => { + const rig = new TestRig(); + await rig.setup('should be able to run a shell command'); - const prompt = `Please run the command "echo hello-world" and show me the output`; + const prompt = `Please run the command "echo hello-world" and show me the output`; - const result = await rig.run(prompt); + const result = await rig.run(prompt); - const foundToolCall = await rig.waitForToolCall('run_shell_command'); + const foundToolCall = await rig.waitForToolCall('run_shell_command'); - // Add debugging information - if (!foundToolCall || !result.includes('hello-world')) { - printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains hello-world': result.includes('hello-world'), - }); - } + // Add debugging information + if (!foundToolCall || !result.includes('hello-world')) { + printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains hello-world': result.includes('hello-world'), + }); + } - assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); + expect( + foundToolCall, + 'Expected to find a run_shell_command tool call', + ).toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - // Model often reports exit code instead of showing output - validateModelOutput( - result, - ['hello-world', 'exit code 0'], - 'Shell command test', - ); -}); - -test('should be able to run a shell command via stdin', async () => { - const rig = new TestRig(); - await rig.setup('should be able to run a shell command via stdin'); - - const prompt = `Please run the command "echo test-stdin" and show me what it outputs`; - - const result = await rig.run({ stdin: prompt }); - - const foundToolCall = await rig.waitForToolCall('run_shell_command'); - - // Add debugging information - if (!foundToolCall || !result.includes('test-stdin')) { - printDebugInfo(rig, result, { - 'Test type': 'Stdin test', - 'Found tool call': foundToolCall, - 'Contains test-stdin': result.includes('test-stdin'), - }); - } - - assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call'); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'test-stdin', 'Shell command stdin test'); + // Validate model output - will throw if no output, warn if missing expected content + // Model often reports exit code instead of showing output + validateModelOutput( + result, + ['hello-world', 'exit code 0'], + 'Shell command test', + ); + }); + + it('should be able to run a shell command via stdin', async () => { + const rig = new TestRig(); + await rig.setup('should be able to run a shell command via stdin'); + + const prompt = `Please run the command "echo test-stdin" and show me what it outputs`; + + const result = await rig.run({ stdin: prompt }); + + const foundToolCall = await rig.waitForToolCall('run_shell_command'); + + // Add debugging information + if (!foundToolCall || !result.includes('test-stdin')) { + printDebugInfo(rig, result, { + 'Test type': 'Stdin test', + 'Found tool call': foundToolCall, + 'Contains test-stdin': result.includes('test-stdin'), + }); + } + + expect( + foundToolCall, + 'Expected to find a run_shell_command tool call', + ).toBeTruthy(); + + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'test-stdin', 'Shell command stdin test'); + }); }); diff --git a/integration-tests/save_memory.test.ts b/integration-tests/save_memory.test.ts index 3ec641d4..15b062e9 100644 --- a/integration-tests/save_memory.test.ts +++ b/integration-tests/save_memory.test.ts @@ -4,38 +4,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js'; -test('should be able to save to memory', async () => { - const rig = new TestRig(); - await rig.setup('should be able to save to memory'); +describe('save_memory', () => { + it('should be able to save to memory', async () => { + const rig = new TestRig(); + await rig.setup('should be able to save to memory'); - const prompt = `remember that my favorite color is blue. + const prompt = `remember that my favorite color is blue. what is my favorite color? tell me that and surround it with $ symbol`; - const result = await rig.run(prompt); + const result = await rig.run(prompt); - const foundToolCall = await rig.waitForToolCall('save_memory'); + const foundToolCall = await rig.waitForToolCall('save_memory'); - // Add debugging information - if (!foundToolCall || !result.toLowerCase().includes('blue')) { - const allTools = printDebugInfo(rig, result, { - 'Found tool call': foundToolCall, - 'Contains blue': result.toLowerCase().includes('blue'), - }); + // Add debugging information + if (!foundToolCall || !result.toLowerCase().includes('blue')) { + const allTools = printDebugInfo(rig, result, { + 'Found tool call': foundToolCall, + 'Contains blue': result.toLowerCase().includes('blue'), + }); - console.error( - 'Memory tool calls:', - allTools - .filter((t) => t.toolRequest.name === 'save_memory') - .map((t) => t.toolRequest.args), - ); - } + console.error( + 'Memory tool calls:', + allTools + .filter((t) => t.toolRequest.name === 'save_memory') + .map((t) => t.toolRequest.args), + ); + } - assert.ok(foundToolCall, 'Expected to find a save_memory tool call'); + expect( + foundToolCall, + 'Expected to find a save_memory tool call', + ).toBeTruthy(); - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'blue', 'Save memory test'); + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'blue', 'Save memory test'); + }); }); diff --git a/integration-tests/simple-mcp-server.test.ts b/integration-tests/simple-mcp-server.test.ts index c4191078..98c81f16 100644 --- a/integration-tests/simple-mcp-server.test.ts +++ b/integration-tests/simple-mcp-server.test.ts @@ -10,8 +10,7 @@ * external dependencies, making it compatible with Docker sandbox mode. */ -import { test, describe, before } from 'node:test'; -import { strict as assert } from 'node:assert'; +import { describe, it, beforeAll, expect } from 'vitest'; import { TestRig, validateModelOutput } from './test-helper.js'; import { join } from 'path'; import { writeFileSync } from 'fs'; @@ -168,7 +167,7 @@ rpc.send({ describe('simple-mcp-server', () => { const rig = new TestRig(); - before(async () => { + beforeAll(async () => { // Setup test directory with MCP server configuration await rig.setup('simple-mcp-server', { settings: { @@ -192,17 +191,20 @@ describe('simple-mcp-server', () => { } }); - test('should add two numbers', async () => { + it('should add two numbers', async () => { // Test directory is already set up in before hook // Just run the command - MCP server config is in settings.json const output = await rig.run('add 5 and 10'); const foundToolCall = await rig.waitForToolCall('add'); - assert.ok(foundToolCall, 'Expected to find an add tool call'); + expect(foundToolCall, 'Expected to find an add tool call').toBeTruthy(); // Validate model output - will throw if no output, fail if missing expected content validateModelOutput(output, '15', 'MCP server test'); - assert.ok(output.includes('15'), 'Expected output to contain the sum (15)'); + expect( + output.includes('15'), + 'Expected output to contain the sum (15)', + ).toBeTruthy(); }); }); diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index 33443aaf..ac7fec6f 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -10,7 +10,7 @@ import { mkdirSync, writeFileSync, readFileSync } from 'fs'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { env } from 'process'; -import { fileExists } from '../scripts/telemetry_utils.js'; +import fs from 'fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -297,15 +297,12 @@ export class TestRig { } readFile(fileName: string) { - const content = readFileSync(join(this.testDir!, fileName), 'utf-8'); + const filePath = join(this.testDir!, fileName); + const content = readFileSync(filePath, 'utf-8'); if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') { - const testId = `${env.TEST_FILE_NAME!.replace( - '.test.js', - '', - )}:${this.testName!.replace(/ /g, '-')}`; - console.log(`--- FILE: ${testId}/${fileName} ---`); + console.log(`--- FILE: ${filePath} ---`); console.log(content); - console.log(`--- END FILE: ${testId}/${fileName} ---`); + console.log(`--- END FILE: ${filePath} ---`); } return content; } @@ -336,7 +333,7 @@ export class TestRig { // Wait for telemetry file to exist and have content await this.poll( () => { - if (!fileExists(logFilePath)) return false; + if (!fs.existsSync(logFilePath)) return false; try { const content = readFileSync(logFilePath, 'utf-8'); // Check if file has meaningful content (at least one complete JSON object) @@ -547,7 +544,7 @@ export class TestRig { // Try reading from file first const logFilePath = join(this.testDir!, 'telemetry.log'); - if (fileExists(logFilePath)) { + if (fs.existsSync(logFilePath)) { try { const content = readFileSync(logFilePath, 'utf-8'); if (content && content.includes('"event.name"')) { @@ -581,7 +578,7 @@ export class TestRig { } // Check if file exists, if not return empty array (file might not be created yet) - if (!fileExists(logFilePath)) { + if (!fs.existsSync(logFilePath)) { return []; } diff --git a/integration-tests/vitest.config.ts b/integration-tests/vitest.config.ts new file mode 100644 index 00000000..e0c6b848 --- /dev/null +++ b/integration-tests/vitest.config.ts @@ -0,0 +1,18 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 300000, // 5 minutes + globalSetup: './globalSetup.ts', + reporters: ['default'], + include: ['**/*.test.ts'], + retry: 2, + fileParallelism: false, + }, +}); diff --git a/integration-tests/write_file.test.ts b/integration-tests/write_file.test.ts index 7809161e..3fe26af6 100644 --- a/integration-tests/write_file.test.ts +++ b/integration-tests/write_file.test.ts @@ -4,8 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { test } from 'node:test'; -import { strict as assert } from 'assert'; +import { describe, it, expect } from 'vitest'; import { TestRig, createToolCallErrorMessage, @@ -13,56 +12,57 @@ import { validateModelOutput, } from './test-helper.js'; -test('should be able to write a file', async () => { - const rig = new TestRig(); - await rig.setup('should be able to write a file'); - const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`; +describe('write_file', () => { + it('should be able to write a file', async () => { + const rig = new TestRig(); + await rig.setup('should be able to write a file'); + const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`; - const result = await rig.run(prompt); + const result = await rig.run(prompt); - const foundToolCall = await rig.waitForToolCall('write_file'); + const foundToolCall = await rig.waitForToolCall('write_file'); - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } + // Add debugging information + if (!foundToolCall) { + printDebugInfo(rig, result); + } - const allTools = rig.readToolLogs(); - assert.ok( - foundToolCall, - createToolCallErrorMessage( - 'write_file', - allTools.map((t) => t.toolRequest.name), - result, - ), - ); - - // Validate model output - will throw if no output, warn if missing expected content - validateModelOutput(result, 'dad.txt', 'Write file test'); - - const newFilePath = 'dad.txt'; - - const newFileContent = rig.readFile(newFilePath); - - // Add debugging for file content - if (newFileContent === '') { - console.error('File was created but is empty'); - console.error( - 'Tool calls:', - rig.readToolLogs().map((t) => ({ - name: t.toolRequest.name, - args: t.toolRequest.args, - })), + const allTools = rig.readToolLogs(); + expect(foundToolCall, 'Expected to find a write_file tool call').toBeTruthy( + createToolCallErrorMessage( + 'write_file', + allTools.map((t) => t.toolRequest.name), + result, + ), ); - } - assert.notEqual(newFileContent, '', 'Expected file to have content'); + // Validate model output - will throw if no output, warn if missing expected content + validateModelOutput(result, 'dad.txt', 'Write file test'); - // Log success info if verbose - if (process.env.VERBOSE === 'true') { - console.log( - 'File created successfully with content:', - newFileContent.substring(0, 100) + '...', - ); - } + const newFilePath = 'dad.txt'; + + const newFileContent = rig.readFile(newFilePath); + + // Add debugging for file content + if (newFileContent === '') { + console.error('File was created but is empty'); + console.error( + 'Tool calls:', + rig.readToolLogs().map((t) => ({ + name: t.toolRequest.name, + args: t.toolRequest.args, + })), + ); + } + + expect(newFileContent).not.toBe(''); + + // Log success info if verbose + if (process.env.VERBOSE === 'true') { + console.log( + 'File created successfully with content:', + newFileContent.substring(0, 100) + '...', + ); + } + }); }); diff --git a/package.json b/package.json index e5a14de5..8b6d7295 100644 --- a/package.json +++ b/package.json @@ -33,11 +33,11 @@ "test": "npm run test --workspaces --if-present", "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", - "test:e2e": "npm run test:integration:sandbox:none -- --verbose --keep-output", + "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none", "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman", - "test:integration:sandbox:none": "GEMINI_SANDBOX=false node integration-tests/run-tests.js", - "test:integration:sandbox:docker": "GEMINI_SANDBOX=docker node integration-tests/run-tests.js", - "test:integration:sandbox:podman": "GEMINI_SANDBOX=podman node integration-tests/run-tests.js", + "test:integration:sandbox:none": "GEMINI_SANDBOX=false vitest run --root ./integration-tests", + "test:integration:sandbox:docker": "npm run build:sandbox && GEMINI_SANDBOX=docker vitest run --root ./integration-tests", + "test:integration:sandbox:podman": "GEMINI_SANDBOX=podman vitest run --root ./integration-tests", "lint": "eslint . --ext .ts,.tsx && eslint integration-tests", "lint:fix": "eslint . --fix && eslint integration-tests --fix", "lint:ci": "eslint . --ext .ts,.tsx --max-warnings 0 && eslint integration-tests --max-warnings 0",