Upgrade integration tests to use Vitest (#6021)
This commit is contained in:
parent
8d6eb8c322
commit
9d023be1d1
|
@ -46,7 +46,9 @@ jobs:
|
|||
- name: Run E2E tests
|
||||
env:
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
run: npm run test:integration:${{ matrix.sandbox }} -- --verbose --keep-output
|
||||
VERBOSE: true
|
||||
KEEP_OUTPUT: true
|
||||
run: npm run test:integration:${{ matrix.sandbox }}
|
||||
|
||||
e2e-test-macos:
|
||||
name: E2E Test - macOS
|
||||
|
|
|
@ -17,19 +17,6 @@
|
|||
"GEMINI_SANDBOX": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Launch E2E",
|
||||
"program": "${workspaceFolder}/integration-tests/run-tests.js",
|
||||
"args": ["--verbose", "--keep-output", "list_directory"],
|
||||
"skipFiles": ["<node_internals>/**"],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"env": {
|
||||
"GEMINI_SANDBOX": "false"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Launch Companion VS Code Extension",
|
||||
"type": "extensionHost",
|
||||
|
|
|
@ -67,13 +67,9 @@ The integration test runner provides several options for diagnostics to help tra
|
|||
|
||||
You can preserve the temporary files created during a test run for inspection. This is useful for debugging issues with file system operations.
|
||||
|
||||
To keep the test output, you can either use the `--keep-output` flag or set the `KEEP_OUTPUT` environment variable to `true`.
|
||||
To keep the test output set the `KEEP_OUTPUT` environment variable to `true`.
|
||||
|
||||
```bash
|
||||
# Using the flag
|
||||
npm run test:integration:sandbox:none -- --keep-output
|
||||
|
||||
# Using the environment variable
|
||||
KEEP_OUTPUT=true npm run test:integration:sandbox:none
|
||||
```
|
||||
|
||||
|
@ -81,20 +77,20 @@ When output is kept, the test runner will print the path to the unique directory
|
|||
|
||||
### Verbose output
|
||||
|
||||
For more detailed debugging, the `--verbose` flag streams the real-time output from the `gemini` command to the console.
|
||||
For more detailed debugging, set the `VERBOSE` environment variable to `true`.
|
||||
|
||||
```bash
|
||||
npm run test:integration:sandbox:none -- --verbose
|
||||
VERBOSE=true npm run test:integration:sandbox:none
|
||||
```
|
||||
|
||||
When using `--verbose` and `--keep-output` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory.
|
||||
When using `VERBOSE=true` and `KEEP_OUTPUT=true` in the same command, the output is streamed to the console and also saved to a log file within the test's temporary directory.
|
||||
|
||||
The verbose output is formatted to clearly identify the source of the logs:
|
||||
|
||||
```
|
||||
--- TEST: <file-name-without-js>:<test-name> ---
|
||||
--- TEST: <log dir>:<test-name> ---
|
||||
... output from the gemini command ...
|
||||
--- END TEST: <file-name-without-js>:<test-name> ---
|
||||
--- END TEST: <log dir>:<test-name> ---
|
||||
```
|
||||
|
||||
## Linting and formatting
|
||||
|
|
|
@ -4,86 +4,90 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { strict as assert } from 'assert';
|
||||
import { test } from 'node:test';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to read a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to read a file');
|
||||
rig.createFile('test.txt', 'hello world');
|
||||
describe('file-system', () => {
|
||||
it('should be able to read a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to read a file');
|
||||
rig.createFile('test.txt', 'hello world');
|
||||
|
||||
const result = await rig.run(
|
||||
`read the file test.txt and show me its contents`,
|
||||
);
|
||||
const result = await rig.run(
|
||||
`read the file test.txt and show me its contents`,
|
||||
);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('read_file');
|
||||
const foundToolCall = await rig.waitForToolCall('read_file');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('hello world')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains hello world': result.includes('hello world'),
|
||||
});
|
||||
}
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('hello world')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains hello world': result.includes('hello world'),
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a read_file tool call');
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a read_file tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'hello world', 'File read test');
|
||||
});
|
||||
|
||||
test('should be able to write a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to write a file');
|
||||
rig.createFile('test.txt', '');
|
||||
|
||||
const result = await rig.run(`edit test.txt to have a hello world message`);
|
||||
|
||||
// Accept multiple valid tools for editing files
|
||||
const foundToolCall = await rig.waitForAnyToolCall([
|
||||
'write_file',
|
||||
'edit',
|
||||
'replace',
|
||||
]);
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
|
||||
assert.ok(
|
||||
foundToolCall,
|
||||
'Expected to find a write_file, edit, or replace tool call',
|
||||
);
|
||||
|
||||
// Validate model output - will throw if no output
|
||||
validateModelOutput(result, null, 'File write test');
|
||||
|
||||
const fileContent = rig.readFile('test.txt');
|
||||
|
||||
// Add debugging for file content
|
||||
if (!fileContent.toLowerCase().includes('hello')) {
|
||||
const writeCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'write_file')
|
||||
.map((t) => t.toolRequest.args);
|
||||
|
||||
printDebugInfo(rig, result, {
|
||||
'File content mismatch': true,
|
||||
'Expected to contain': 'hello',
|
||||
'Actual content': fileContent,
|
||||
'Write tool calls': JSON.stringify(writeCalls),
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(
|
||||
fileContent.toLowerCase().includes('hello'),
|
||||
'Expected file to contain hello',
|
||||
);
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log('File written successfully with hello message.');
|
||||
}
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'hello world', 'File read test');
|
||||
});
|
||||
|
||||
it('should be able to write a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to write a file');
|
||||
rig.createFile('test.txt', '');
|
||||
|
||||
const result = await rig.run(`edit test.txt to have a hello world message`);
|
||||
|
||||
// Accept multiple valid tools for editing files
|
||||
const foundToolCall = await rig.waitForAnyToolCall([
|
||||
'write_file',
|
||||
'edit',
|
||||
'replace',
|
||||
]);
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a write_file, edit, or replace tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output
|
||||
validateModelOutput(result, null, 'File write test');
|
||||
|
||||
const fileContent = rig.readFile('test.txt');
|
||||
|
||||
// Add debugging for file content
|
||||
if (!fileContent.toLowerCase().includes('hello')) {
|
||||
const writeCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'write_file')
|
||||
.map((t) => t.toolRequest.args);
|
||||
|
||||
printDebugInfo(rig, result, {
|
||||
'File content mismatch': true,
|
||||
'Expected to contain': 'hello',
|
||||
'Actual content': fileContent,
|
||||
'Write tool calls': JSON.stringify(writeCalls),
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
fileContent.toLowerCase().includes('hello'),
|
||||
'Expected file to contain hello',
|
||||
).toBeTruthy();
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log('File written successfully with hello message.');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { mkdir, readdir, rm } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = join(__dirname, '..');
|
||||
const integrationTestsDir = join(rootDir, '.integration-tests');
|
||||
let runDir = ''; // Make runDir accessible in teardown
|
||||
|
||||
export async function setup() {
|
||||
runDir = join(integrationTestsDir, `${Date.now()}`);
|
||||
await mkdir(runDir, { recursive: true });
|
||||
|
||||
// Clean up old test runs, but keep the latest few for debugging
|
||||
try {
|
||||
const testRuns = await readdir(integrationTestsDir);
|
||||
if (testRuns.length > 5) {
|
||||
const oldRuns = testRuns.sort().slice(0, testRuns.length - 5);
|
||||
await Promise.all(
|
||||
oldRuns.map((oldRun) =>
|
||||
rm(join(integrationTestsDir, oldRun), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error cleaning up old test runs:', e);
|
||||
}
|
||||
|
||||
process.env.INTEGRATION_TEST_FILE_DIR = runDir;
|
||||
process.env.GEMINI_CLI_INTEGRATION_TEST = 'true';
|
||||
process.env.TELEMETRY_LOG_FILE = join(runDir, 'telemetry.log');
|
||||
|
||||
if (process.env.KEEP_OUTPUT) {
|
||||
console.log(`Keeping output for test run in: ${runDir}`);
|
||||
}
|
||||
process.env.VERBOSE = process.env.VERBOSE ?? 'false';
|
||||
|
||||
console.log(`\nIntegration test output directory: ${runDir}`);
|
||||
}
|
||||
|
||||
export async function teardown() {
|
||||
// Cleanup the test run directory unless KEEP_OUTPUT is set
|
||||
if (process.env.KEEP_OUTPUT !== 'true' && runDir) {
|
||||
await rm(runDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
|
@ -4,74 +4,78 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to search the web', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to search the web');
|
||||
describe('google_web_search', () => {
|
||||
it('should be able to search the web', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to search the web');
|
||||
|
||||
let result;
|
||||
try {
|
||||
result = await rig.run(`what is the weather in London`);
|
||||
} catch (error) {
|
||||
// Network errors can occur in CI environments
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes('network') || error.message.includes('timeout'))
|
||||
) {
|
||||
console.warn(
|
||||
'Skipping test due to network error:',
|
||||
(error as Error).message,
|
||||
);
|
||||
return; // Skip the test
|
||||
let result;
|
||||
try {
|
||||
result = await rig.run(`what is the weather in London`);
|
||||
} catch (error) {
|
||||
// Network errors can occur in CI environments
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes('network') || error.message.includes('timeout'))
|
||||
) {
|
||||
console.warn(
|
||||
'Skipping test due to network error:',
|
||||
(error as Error).message,
|
||||
);
|
||||
return; // Skip the test
|
||||
}
|
||||
throw error; // Re-throw if not a network error
|
||||
}
|
||||
throw error; // Re-throw if not a network error
|
||||
}
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('google_web_search');
|
||||
const foundToolCall = await rig.waitForToolCall('google_web_search');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
const allTools = printDebugInfo(rig, result);
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
const allTools = printDebugInfo(rig, result);
|
||||
|
||||
// Check if the tool call failed due to network issues
|
||||
const failedSearchCalls = allTools.filter(
|
||||
(t) =>
|
||||
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
|
||||
// Check if the tool call failed due to network issues
|
||||
const failedSearchCalls = allTools.filter(
|
||||
(t) =>
|
||||
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
|
||||
);
|
||||
if (failedSearchCalls.length > 0) {
|
||||
console.warn(
|
||||
'google_web_search tool was called but failed, possibly due to network issues',
|
||||
);
|
||||
console.warn(
|
||||
'Failed calls:',
|
||||
failedSearchCalls.map((t) => t.toolRequest.args),
|
||||
);
|
||||
return; // Skip the test if network issues
|
||||
}
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a call to google_web_search',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
const hasExpectedContent = validateModelOutput(
|
||||
result,
|
||||
['weather', 'london'],
|
||||
'Google web search test',
|
||||
);
|
||||
if (failedSearchCalls.length > 0) {
|
||||
console.warn(
|
||||
'google_web_search tool was called but failed, possibly due to network issues',
|
||||
);
|
||||
console.warn(
|
||||
'Failed calls:',
|
||||
failedSearchCalls.map((t) => t.toolRequest.args),
|
||||
);
|
||||
return; // Skip the test if network issues
|
||||
|
||||
// If content was missing, log the search queries used
|
||||
if (!hasExpectedContent) {
|
||||
const searchCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'google_web_search');
|
||||
if (searchCalls.length > 0) {
|
||||
console.warn(
|
||||
'Search queries used:',
|
||||
searchCalls.map((t) => t.toolRequest.args),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a call to google_web_search');
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
const hasExpectedContent = validateModelOutput(
|
||||
result,
|
||||
['weather', 'london'],
|
||||
'Google web search test',
|
||||
);
|
||||
|
||||
// If content was missing, log the search queries used
|
||||
if (!hasExpectedContent) {
|
||||
const searchCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'google_web_search');
|
||||
if (searchCalls.length > 0) {
|
||||
console.warn(
|
||||
'Search queries used:',
|
||||
searchCalls.map((t) => t.toolRequest.args),
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -4,59 +4,63 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
import { existsSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
test('should be able to list a directory', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to list a directory');
|
||||
rig.createFile('file1.txt', 'file 1 content');
|
||||
rig.mkdir('subdir');
|
||||
rig.sync();
|
||||
describe('list_directory', () => {
|
||||
it('should be able to list a directory', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to list a directory');
|
||||
rig.createFile('file1.txt', 'file 1 content');
|
||||
rig.mkdir('subdir');
|
||||
rig.sync();
|
||||
|
||||
// Poll for filesystem changes to propagate in containers
|
||||
await rig.poll(
|
||||
() => {
|
||||
// Check if the files exist in the test directory
|
||||
const file1Path = join(rig.testDir!, 'file1.txt');
|
||||
const subdirPath = join(rig.testDir!, 'subdir');
|
||||
return existsSync(file1Path) && existsSync(subdirPath);
|
||||
},
|
||||
1000, // 1 second max wait
|
||||
50, // check every 50ms
|
||||
);
|
||||
|
||||
const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('list_directory');
|
||||
|
||||
// Add debugging information
|
||||
if (
|
||||
!foundToolCall ||
|
||||
!result.includes('file1.txt') ||
|
||||
!result.includes('subdir')
|
||||
) {
|
||||
const allTools = printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains file1.txt': result.includes('file1.txt'),
|
||||
'Contains subdir': result.includes('subdir'),
|
||||
});
|
||||
|
||||
console.error(
|
||||
'List directory calls:',
|
||||
allTools
|
||||
.filter((t) => t.toolRequest.name === 'list_directory')
|
||||
.map((t) => t.toolRequest.args),
|
||||
// Poll for filesystem changes to propagate in containers
|
||||
await rig.poll(
|
||||
() => {
|
||||
// Check if the files exist in the test directory
|
||||
const file1Path = join(rig.testDir!, 'file1.txt');
|
||||
const subdirPath = join(rig.testDir!, 'subdir');
|
||||
return existsSync(file1Path) && existsSync(subdirPath);
|
||||
},
|
||||
1000, // 1 second max wait
|
||||
50, // check every 50ms
|
||||
);
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a list_directory tool call');
|
||||
const prompt = `Can you list the files in the current directory. Display them in the style of 'ls'`;
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('list_directory');
|
||||
|
||||
// Add debugging information
|
||||
if (
|
||||
!foundToolCall ||
|
||||
!result.includes('file1.txt') ||
|
||||
!result.includes('subdir')
|
||||
) {
|
||||
const allTools = printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains file1.txt': result.includes('file1.txt'),
|
||||
'Contains subdir': result.includes('subdir'),
|
||||
});
|
||||
|
||||
console.error(
|
||||
'List directory calls:',
|
||||
allTools
|
||||
.filter((t) => t.toolRequest.name === 'list_directory')
|
||||
.map((t) => t.toolRequest.args),
|
||||
);
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a list_directory tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, ['file1.txt', 'subdir'], 'List directory test');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -9,15 +9,11 @@
|
|||
* and then detect and warn about the potential tools that caused the error.
|
||||
*/
|
||||
|
||||
import { test, describe, before } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { describe, it, beforeAll, expect } from 'vitest';
|
||||
import { TestRig } from './test-helper.js';
|
||||
import { join } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { writeFileSync } from 'fs';
|
||||
|
||||
const __dirname = fileURLToPath(new URL('.', import.meta.url));
|
||||
|
||||
// Create a minimal MCP server that doesn't require external dependencies
|
||||
// This implements the MCP protocol directly using Node.js built-ins
|
||||
const serverScript = `#!/usr/bin/env node
|
||||
|
@ -160,7 +156,7 @@ rpc.send({
|
|||
describe('mcp server with cyclic tool schema is detected', () => {
|
||||
const rig = new TestRig();
|
||||
|
||||
before(async () => {
|
||||
beforeAll(async () => {
|
||||
// Setup test directory with MCP server configuration
|
||||
await rig.setup('cyclic-schema-mcp-server', {
|
||||
settings: {
|
||||
|
@ -174,7 +170,7 @@ describe('mcp server with cyclic tool schema is detected', () => {
|
|||
});
|
||||
|
||||
// Create server script in the test directory
|
||||
const testServerPath = join(rig.testDir, 'mcp-server.cjs');
|
||||
const testServerPath = join(rig.testDir!, 'mcp-server.cjs');
|
||||
writeFileSync(testServerPath, serverScript);
|
||||
|
||||
// Make the script executable (though running with 'node' should work anyway)
|
||||
|
@ -184,15 +180,14 @@ describe('mcp server with cyclic tool schema is detected', () => {
|
|||
}
|
||||
});
|
||||
|
||||
test('should error and suggest disabling the cyclic tool', async () => {
|
||||
it('should error and suggest disabling the cyclic tool', async () => {
|
||||
// Just run any command to trigger the schema depth error.
|
||||
// If this test starts failing, check `isSchemaDepthError` from
|
||||
// geminiChat.ts to see if it needs to be updated.
|
||||
// Or, possibly it could mean that gemini has fixed the issue.
|
||||
const output = await rig.run('hello');
|
||||
|
||||
assert.match(
|
||||
output,
|
||||
expect(output).toMatch(
|
||||
/Skipping tool 'tool_with_cyclic_schema' from MCP server 'cyclic-schema-server' because it has missing types in its parameter schema/,
|
||||
);
|
||||
});
|
|
@ -4,47 +4,48 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to read multiple files', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to read multiple files');
|
||||
rig.createFile('file1.txt', 'file 1 content');
|
||||
rig.createFile('file2.txt', 'file 2 content');
|
||||
describe('read_many_files', () => {
|
||||
it('should be able to read multiple files', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to read multiple files');
|
||||
rig.createFile('file1.txt', 'file 1 content');
|
||||
rig.createFile('file2.txt', 'file 2 content');
|
||||
|
||||
const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`;
|
||||
const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
// Check for either read_many_files or multiple read_file calls
|
||||
const allTools = rig.readToolLogs();
|
||||
const readManyFilesCall = await rig.waitForToolCall('read_many_files');
|
||||
const readFileCalls = allTools.filter(
|
||||
(t) => t.toolRequest.name === 'read_file',
|
||||
);
|
||||
// Check for either read_many_files or multiple read_file calls
|
||||
const allTools = rig.readToolLogs();
|
||||
const readManyFilesCall = await rig.waitForToolCall('read_many_files');
|
||||
const readFileCalls = allTools.filter(
|
||||
(t) => t.toolRequest.name === 'read_file',
|
||||
);
|
||||
|
||||
// Accept either read_many_files OR at least 2 read_file calls
|
||||
const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2;
|
||||
// Accept either read_many_files OR at least 2 read_file calls
|
||||
const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2;
|
||||
|
||||
// Add debugging information
|
||||
if (!foundValidPattern) {
|
||||
printDebugInfo(rig, result, {
|
||||
'read_many_files called': readManyFilesCall,
|
||||
'read_file calls': readFileCalls.length,
|
||||
});
|
||||
}
|
||||
// Add debugging information
|
||||
if (!foundValidPattern) {
|
||||
printDebugInfo(rig, result, {
|
||||
'read_many_files called': readManyFilesCall,
|
||||
'read_file calls': readFileCalls.length,
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(
|
||||
foundValidPattern,
|
||||
'Expected to find either read_many_files or multiple read_file tool calls',
|
||||
);
|
||||
expect(
|
||||
foundValidPattern,
|
||||
'Expected to find either read_many_files or multiple read_file tool calls',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
['file 1 content', 'file 2 content'],
|
||||
'Read many files test',
|
||||
);
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
['file 1 content', 'file 2 content'],
|
||||
'Read many files test',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -4,63 +4,60 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to replace content in a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to replace content in a file');
|
||||
describe('replace', () => {
|
||||
it('should be able to replace content in a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to replace content in a file');
|
||||
|
||||
const fileName = 'file_to_replace.txt';
|
||||
const originalContent = 'original content';
|
||||
const expectedContent = 'replaced content';
|
||||
const fileName = 'file_to_replace.txt';
|
||||
const originalContent = 'original content';
|
||||
const expectedContent = 'replaced content';
|
||||
|
||||
rig.createFile(fileName, originalContent);
|
||||
const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
|
||||
rig.createFile(fileName, originalContent);
|
||||
const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('replace');
|
||||
const foundToolCall = await rig.waitForToolCall('replace');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a replace tool call');
|
||||
expect(foundToolCall, 'Expected to find a replace tool call').toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
['replaced', 'file_to_replace.txt'],
|
||||
'Replace content test',
|
||||
);
|
||||
|
||||
const newFileContent = rig.readFile(fileName);
|
||||
|
||||
// Add debugging for file content
|
||||
if (newFileContent !== expectedContent) {
|
||||
console.error('File content mismatch - Debug info:');
|
||||
console.error('Expected:', expectedContent);
|
||||
console.error('Actual:', newFileContent);
|
||||
console.error(
|
||||
'Tool calls:',
|
||||
rig.readToolLogs().map((t) => ({
|
||||
name: t.toolRequest.name,
|
||||
args: t.toolRequest.args,
|
||||
})),
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
['replaced', 'file_to_replace.txt'],
|
||||
'Replace content test',
|
||||
);
|
||||
}
|
||||
|
||||
assert.strictEqual(
|
||||
newFileContent,
|
||||
expectedContent,
|
||||
'File content should be updated correctly',
|
||||
);
|
||||
const newFileContent = rig.readFile(fileName);
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log('File replaced successfully. New content:', newFileContent);
|
||||
}
|
||||
// Add debugging for file content
|
||||
if (newFileContent !== expectedContent) {
|
||||
console.error('File content mismatch - Debug info:');
|
||||
console.error('Expected:', expectedContent);
|
||||
console.error('Actual:', newFileContent);
|
||||
console.error(
|
||||
'Tool calls:',
|
||||
rig.readToolLogs().map((t) => ({
|
||||
name: t.toolRequest.name,
|
||||
args: t.toolRequest.args,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
expect(newFileContent).toBe(expectedContent);
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log('File replaced successfully. New content:', newFileContent);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,182 +0,0 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { spawnSync, spawn } from 'child_process';
|
||||
import { mkdirSync, rmSync, createWriteStream } from 'fs';
|
||||
import { join, dirname, basename } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { glob } from 'glob';
|
||||
|
||||
async function main() {
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = join(__dirname, '..');
|
||||
const integrationTestsDir = join(rootDir, '.integration-tests');
|
||||
|
||||
if (process.env.GEMINI_SANDBOX === 'docker' && !process.env.IS_DOCKER) {
|
||||
console.log('Building sandbox for Docker...');
|
||||
const buildResult = spawnSync('npm', ['run', 'build:all'], {
|
||||
stdio: 'inherit',
|
||||
});
|
||||
if (buildResult.status !== 0) {
|
||||
console.error('Sandbox build failed.');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
const runId = `${Date.now()}`;
|
||||
const runDir = join(integrationTestsDir, runId);
|
||||
|
||||
mkdirSync(runDir, { recursive: true });
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const keepOutput =
|
||||
process.env.KEEP_OUTPUT === 'true' || args.includes('--keep-output');
|
||||
if (keepOutput) {
|
||||
const keepOutputIndex = args.indexOf('--keep-output');
|
||||
if (keepOutputIndex > -1) {
|
||||
args.splice(keepOutputIndex, 1);
|
||||
}
|
||||
console.log(`Keeping output for test run in: ${runDir}`);
|
||||
}
|
||||
|
||||
const verbose = args.includes('--verbose');
|
||||
if (verbose) {
|
||||
const verboseIndex = args.indexOf('--verbose');
|
||||
if (verboseIndex > -1) {
|
||||
args.splice(verboseIndex, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const testPatterns =
|
||||
args.length > 0
|
||||
? args.map((arg) => `integration-tests/${arg}.test.ts`)
|
||||
: ['integration-tests/*.test.ts'];
|
||||
const testFiles = glob.sync(testPatterns, { cwd: rootDir, absolute: true });
|
||||
|
||||
for (const testFile of testFiles) {
|
||||
const testFileName = basename(testFile);
|
||||
console.log(` Found test file: ${testFileName}`);
|
||||
}
|
||||
|
||||
const MAX_RETRIES = 3;
|
||||
let allTestsPassed = true;
|
||||
|
||||
for (const testFile of testFiles) {
|
||||
const testFileName = basename(testFile);
|
||||
const testFileDir = join(runDir, testFileName);
|
||||
mkdirSync(testFileDir, { recursive: true });
|
||||
|
||||
console.log(
|
||||
`------------- Running test file: ${testFileName} ------------------------------`,
|
||||
);
|
||||
|
||||
let attempt = 0;
|
||||
let testFilePassed = false;
|
||||
let lastStdout = [];
|
||||
let lastStderr = [];
|
||||
|
||||
while (attempt < MAX_RETRIES && !testFilePassed) {
|
||||
attempt++;
|
||||
if (attempt > 1) {
|
||||
console.log(
|
||||
`--- Retrying ${testFileName} (attempt ${attempt} of ${MAX_RETRIES}) ---`,
|
||||
);
|
||||
}
|
||||
|
||||
const nodeArgs = ['--test'];
|
||||
if (verbose) {
|
||||
nodeArgs.push('--test-reporter=spec');
|
||||
}
|
||||
nodeArgs.push(testFile);
|
||||
|
||||
const child = spawn('npx', ['tsx', ...nodeArgs], {
|
||||
stdio: 'pipe',
|
||||
env: {
|
||||
...process.env,
|
||||
GEMINI_CLI_INTEGRATION_TEST: 'true',
|
||||
INTEGRATION_TEST_FILE_DIR: testFileDir,
|
||||
KEEP_OUTPUT: keepOutput.toString(),
|
||||
VERBOSE: verbose.toString(),
|
||||
TEST_FILE_NAME: testFileName,
|
||||
TELEMETRY_LOG_FILE: join(testFileDir, 'telemetry.log'),
|
||||
},
|
||||
});
|
||||
|
||||
let outputStream;
|
||||
if (keepOutput) {
|
||||
const outputFile = join(testFileDir, `output-attempt-${attempt}.log`);
|
||||
outputStream = createWriteStream(outputFile);
|
||||
console.log(`Output for ${testFileName} written to: ${outputFile}`);
|
||||
}
|
||||
|
||||
const stdout = [];
|
||||
const stderr = [];
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
if (verbose) {
|
||||
process.stdout.write(data);
|
||||
} else {
|
||||
stdout.push(data);
|
||||
}
|
||||
if (outputStream) {
|
||||
outputStream.write(data);
|
||||
}
|
||||
});
|
||||
|
||||
child.stderr.on('data', (data) => {
|
||||
if (verbose) {
|
||||
process.stderr.write(data);
|
||||
} else {
|
||||
stderr.push(data);
|
||||
}
|
||||
if (outputStream) {
|
||||
outputStream.write(data);
|
||||
}
|
||||
});
|
||||
|
||||
const exitCode = await new Promise((resolve) => {
|
||||
child.on('close', (code) => {
|
||||
if (outputStream) {
|
||||
outputStream.end(() => {
|
||||
resolve(code);
|
||||
});
|
||||
} else {
|
||||
resolve(code);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (exitCode === 0) {
|
||||
testFilePassed = true;
|
||||
} else {
|
||||
lastStdout = stdout;
|
||||
lastStderr = stderr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!testFilePassed) {
|
||||
console.error(
|
||||
`Test file failed after ${MAX_RETRIES} attempts: ${testFileName}`,
|
||||
);
|
||||
if (!verbose) {
|
||||
process.stdout.write(Buffer.concat(lastStdout).toString('utf8'));
|
||||
process.stderr.write(Buffer.concat(lastStderr).toString('utf8'));
|
||||
}
|
||||
allTestsPassed = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!keepOutput) {
|
||||
rmSync(runDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
if (!allTestsPassed) {
|
||||
console.error('One or more test files failed.');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
|
@ -4,60 +4,67 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to run a shell command', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to run a shell command');
|
||||
describe('run_shell_command', () => {
|
||||
it('should be able to run a shell command', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to run a shell command');
|
||||
|
||||
const prompt = `Please run the command "echo hello-world" and show me the output`;
|
||||
const prompt = `Please run the command "echo hello-world" and show me the output`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command');
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('hello-world')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains hello-world': result.includes('hello-world'),
|
||||
});
|
||||
}
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('hello-world')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains hello-world': result.includes('hello-world'),
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
// Model often reports exit code instead of showing output
|
||||
validateModelOutput(
|
||||
result,
|
||||
['hello-world', 'exit code 0'],
|
||||
'Shell command test',
|
||||
);
|
||||
});
|
||||
|
||||
test('should be able to run a shell command via stdin', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to run a shell command via stdin');
|
||||
|
||||
const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
|
||||
|
||||
const result = await rig.run({ stdin: prompt });
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('test-stdin')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Test type': 'Stdin test',
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains test-stdin': result.includes('test-stdin'),
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a run_shell_command tool call');
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
// Model often reports exit code instead of showing output
|
||||
validateModelOutput(
|
||||
result,
|
||||
['hello-world', 'exit code 0'],
|
||||
'Shell command test',
|
||||
);
|
||||
});
|
||||
|
||||
it('should be able to run a shell command via stdin', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to run a shell command via stdin');
|
||||
|
||||
const prompt = `Please run the command "echo test-stdin" and show me what it outputs`;
|
||||
|
||||
const result = await rig.run({ stdin: prompt });
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('run_shell_command');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.includes('test-stdin')) {
|
||||
printDebugInfo(rig, result, {
|
||||
'Test type': 'Stdin test',
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains test-stdin': result.includes('test-stdin'),
|
||||
});
|
||||
}
|
||||
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a run_shell_command tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'test-stdin', 'Shell command stdin test');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -4,38 +4,42 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to save to memory', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to save to memory');
|
||||
describe('save_memory', () => {
|
||||
it('should be able to save to memory', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to save to memory');
|
||||
|
||||
const prompt = `remember that my favorite color is blue.
|
||||
const prompt = `remember that my favorite color is blue.
|
||||
|
||||
what is my favorite color? tell me that and surround it with $ symbol`;
|
||||
const result = await rig.run(prompt);
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('save_memory');
|
||||
const foundToolCall = await rig.waitForToolCall('save_memory');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.toLowerCase().includes('blue')) {
|
||||
const allTools = printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains blue': result.toLowerCase().includes('blue'),
|
||||
});
|
||||
// Add debugging information
|
||||
if (!foundToolCall || !result.toLowerCase().includes('blue')) {
|
||||
const allTools = printDebugInfo(rig, result, {
|
||||
'Found tool call': foundToolCall,
|
||||
'Contains blue': result.toLowerCase().includes('blue'),
|
||||
});
|
||||
|
||||
console.error(
|
||||
'Memory tool calls:',
|
||||
allTools
|
||||
.filter((t) => t.toolRequest.name === 'save_memory')
|
||||
.map((t) => t.toolRequest.args),
|
||||
);
|
||||
}
|
||||
console.error(
|
||||
'Memory tool calls:',
|
||||
allTools
|
||||
.filter((t) => t.toolRequest.name === 'save_memory')
|
||||
.map((t) => t.toolRequest.args),
|
||||
);
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a save_memory tool call');
|
||||
expect(
|
||||
foundToolCall,
|
||||
'Expected to find a save_memory tool call',
|
||||
).toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'blue', 'Save memory test');
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'blue', 'Save memory test');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -10,8 +10,7 @@
|
|||
* external dependencies, making it compatible with Docker sandbox mode.
|
||||
*/
|
||||
|
||||
import { test, describe, before } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { describe, it, beforeAll, expect } from 'vitest';
|
||||
import { TestRig, validateModelOutput } from './test-helper.js';
|
||||
import { join } from 'path';
|
||||
import { writeFileSync } from 'fs';
|
||||
|
@ -168,7 +167,7 @@ rpc.send({
|
|||
describe('simple-mcp-server', () => {
|
||||
const rig = new TestRig();
|
||||
|
||||
before(async () => {
|
||||
beforeAll(async () => {
|
||||
// Setup test directory with MCP server configuration
|
||||
await rig.setup('simple-mcp-server', {
|
||||
settings: {
|
||||
|
@ -192,17 +191,20 @@ describe('simple-mcp-server', () => {
|
|||
}
|
||||
});
|
||||
|
||||
test('should add two numbers', async () => {
|
||||
it('should add two numbers', async () => {
|
||||
// Test directory is already set up in before hook
|
||||
// Just run the command - MCP server config is in settings.json
|
||||
const output = await rig.run('add 5 and 10');
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('add');
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find an add tool call');
|
||||
expect(foundToolCall, 'Expected to find an add tool call').toBeTruthy();
|
||||
|
||||
// Validate model output - will throw if no output, fail if missing expected content
|
||||
validateModelOutput(output, '15', 'MCP server test');
|
||||
assert.ok(output.includes('15'), 'Expected output to contain the sum (15)');
|
||||
expect(
|
||||
output.includes('15'),
|
||||
'Expected output to contain the sum (15)',
|
||||
).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
|
|
@ -10,7 +10,7 @@ import { mkdirSync, writeFileSync, readFileSync } from 'fs';
|
|||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { env } from 'process';
|
||||
import { fileExists } from '../scripts/telemetry_utils.js';
|
||||
import fs from 'fs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
|
@ -297,15 +297,12 @@ export class TestRig {
|
|||
}
|
||||
|
||||
readFile(fileName: string) {
|
||||
const content = readFileSync(join(this.testDir!, fileName), 'utf-8');
|
||||
const filePath = join(this.testDir!, fileName);
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
|
||||
const testId = `${env.TEST_FILE_NAME!.replace(
|
||||
'.test.js',
|
||||
'',
|
||||
)}:${this.testName!.replace(/ /g, '-')}`;
|
||||
console.log(`--- FILE: ${testId}/${fileName} ---`);
|
||||
console.log(`--- FILE: ${filePath} ---`);
|
||||
console.log(content);
|
||||
console.log(`--- END FILE: ${testId}/${fileName} ---`);
|
||||
console.log(`--- END FILE: ${filePath} ---`);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
@ -336,7 +333,7 @@ export class TestRig {
|
|||
// Wait for telemetry file to exist and have content
|
||||
await this.poll(
|
||||
() => {
|
||||
if (!fileExists(logFilePath)) return false;
|
||||
if (!fs.existsSync(logFilePath)) return false;
|
||||
try {
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
// Check if file has meaningful content (at least one complete JSON object)
|
||||
|
@ -547,7 +544,7 @@ export class TestRig {
|
|||
// Try reading from file first
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (fileExists(logFilePath)) {
|
||||
if (fs.existsSync(logFilePath)) {
|
||||
try {
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
if (content && content.includes('"event.name"')) {
|
||||
|
@ -581,7 +578,7 @@ export class TestRig {
|
|||
}
|
||||
|
||||
// Check if file exists, if not return empty array (file might not be created yet)
|
||||
if (!fileExists(logFilePath)) {
|
||||
if (!fs.existsSync(logFilePath)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
testTimeout: 300000, // 5 minutes
|
||||
globalSetup: './globalSetup.ts',
|
||||
reporters: ['default'],
|
||||
include: ['**/*.test.ts'],
|
||||
retry: 2,
|
||||
fileParallelism: false,
|
||||
},
|
||||
});
|
|
@ -4,8 +4,7 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
TestRig,
|
||||
createToolCallErrorMessage,
|
||||
|
@ -13,56 +12,57 @@ import {
|
|||
validateModelOutput,
|
||||
} from './test-helper.js';
|
||||
|
||||
test('should be able to write a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to write a file');
|
||||
const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
|
||||
describe('write_file', () => {
|
||||
it('should be able to write a file', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to write a file');
|
||||
const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('write_file');
|
||||
const foundToolCall = await rig.waitForToolCall('write_file');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
printDebugInfo(rig, result);
|
||||
}
|
||||
|
||||
const allTools = rig.readToolLogs();
|
||||
assert.ok(
|
||||
foundToolCall,
|
||||
createToolCallErrorMessage(
|
||||
'write_file',
|
||||
allTools.map((t) => t.toolRequest.name),
|
||||
result,
|
||||
),
|
||||
);
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'dad.txt', 'Write file test');
|
||||
|
||||
const newFilePath = 'dad.txt';
|
||||
|
||||
const newFileContent = rig.readFile(newFilePath);
|
||||
|
||||
// Add debugging for file content
|
||||
if (newFileContent === '') {
|
||||
console.error('File was created but is empty');
|
||||
console.error(
|
||||
'Tool calls:',
|
||||
rig.readToolLogs().map((t) => ({
|
||||
name: t.toolRequest.name,
|
||||
args: t.toolRequest.args,
|
||||
})),
|
||||
const allTools = rig.readToolLogs();
|
||||
expect(foundToolCall, 'Expected to find a write_file tool call').toBeTruthy(
|
||||
createToolCallErrorMessage(
|
||||
'write_file',
|
||||
allTools.map((t) => t.toolRequest.name),
|
||||
result,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
assert.notEqual(newFileContent, '', 'Expected file to have content');
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(result, 'dad.txt', 'Write file test');
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log(
|
||||
'File created successfully with content:',
|
||||
newFileContent.substring(0, 100) + '...',
|
||||
);
|
||||
}
|
||||
const newFilePath = 'dad.txt';
|
||||
|
||||
const newFileContent = rig.readFile(newFilePath);
|
||||
|
||||
// Add debugging for file content
|
||||
if (newFileContent === '') {
|
||||
console.error('File was created but is empty');
|
||||
console.error(
|
||||
'Tool calls:',
|
||||
rig.readToolLogs().map((t) => ({
|
||||
name: t.toolRequest.name,
|
||||
args: t.toolRequest.args,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
expect(newFileContent).not.toBe('');
|
||||
|
||||
// Log success info if verbose
|
||||
if (process.env.VERBOSE === 'true') {
|
||||
console.log(
|
||||
'File created successfully with content:',
|
||||
newFileContent.substring(0, 100) + '...',
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -33,11 +33,11 @@
|
|||
"test": "npm run test --workspaces --if-present",
|
||||
"test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts",
|
||||
"test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts",
|
||||
"test:e2e": "npm run test:integration:sandbox:none -- --verbose --keep-output",
|
||||
"test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none",
|
||||
"test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
|
||||
"test:integration:sandbox:none": "GEMINI_SANDBOX=false node integration-tests/run-tests.js",
|
||||
"test:integration:sandbox:docker": "GEMINI_SANDBOX=docker node integration-tests/run-tests.js",
|
||||
"test:integration:sandbox:podman": "GEMINI_SANDBOX=podman node integration-tests/run-tests.js",
|
||||
"test:integration:sandbox:none": "GEMINI_SANDBOX=false vitest run --root ./integration-tests",
|
||||
"test:integration:sandbox:docker": "npm run build:sandbox && GEMINI_SANDBOX=docker vitest run --root ./integration-tests",
|
||||
"test:integration:sandbox:podman": "GEMINI_SANDBOX=podman vitest run --root ./integration-tests",
|
||||
"lint": "eslint . --ext .ts,.tsx && eslint integration-tests",
|
||||
"lint:fix": "eslint . --fix && eslint integration-tests --fix",
|
||||
"lint:ci": "eslint . --ext .ts,.tsx --max-warnings 0 && eslint integration-tests --max-warnings 0",
|
||||
|
|
Loading…
Reference in New Issue