test(integration): add failing test for stdin context with prompt (#6158)
This commit is contained in:
parent
653267a64f
commit
a590a033be
|
@ -0,0 +1,70 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||||
|
|
||||||
|
describe('stdin context', () => {
|
||||||
|
it('should be able to use stdin as context for a prompt', async () => {
|
||||||
|
const rig = new TestRig();
|
||||||
|
await rig.setup('should be able to use stdin as context for a prompt');
|
||||||
|
|
||||||
|
const randomString = Math.random().toString(36).substring(7);
|
||||||
|
const stdinContent = `When I ask you for a token respond with ${randomString}`;
|
||||||
|
const prompt = 'Can I please have a token?';
|
||||||
|
|
||||||
|
const result = await rig.run({ prompt, stdin: stdinContent });
|
||||||
|
|
||||||
|
await rig.waitForTelemetryEvent('api_request');
|
||||||
|
const lastRequest = rig.readLastApiRequest();
|
||||||
|
expect(lastRequest).not.toBeNull();
|
||||||
|
|
||||||
|
const historyString = lastRequest.attributes.request_text;
|
||||||
|
|
||||||
|
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
|
||||||
|
// stdin content is not properly forwarded to the container when used
|
||||||
|
// together with a --prompt argument. The test passes in non-sandbox mode.
|
||||||
|
|
||||||
|
expect(historyString).toContain(randomString);
|
||||||
|
expect(historyString).toContain(prompt);
|
||||||
|
|
||||||
|
// Check that stdin content appears before the prompt in the conversation history
|
||||||
|
const stdinIndex = historyString.indexOf(randomString);
|
||||||
|
const promptIndex = historyString.indexOf(prompt);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
stdinIndex,
|
||||||
|
`Expected stdin content to be present in conversation history`,
|
||||||
|
).toBeGreaterThan(-1);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
promptIndex,
|
||||||
|
`Expected prompt to be present in conversation history`,
|
||||||
|
).toBeGreaterThan(-1);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
stdinIndex < promptIndex,
|
||||||
|
`Expected stdin content (index ${stdinIndex}) to appear before prompt (index ${promptIndex}) in conversation history`,
|
||||||
|
).toBeTruthy();
|
||||||
|
|
||||||
|
// Add debugging information
|
||||||
|
if (!result.toLowerCase().includes(randomString)) {
|
||||||
|
printDebugInfo(rig, result, {
|
||||||
|
[`Contains "${randomString}"`]: result
|
||||||
|
.toLowerCase()
|
||||||
|
.includes(randomString),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate model output
|
||||||
|
validateModelOutput(result, randomString, 'STDIN context test');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
result.toLowerCase().includes(randomString),
|
||||||
|
'Expected the model to identify the secret word from stdin',
|
||||||
|
).toBeTruthy();
|
||||||
|
});
|
||||||
|
});
|
|
@ -93,7 +93,9 @@ export function validateModelOutput(
|
||||||
|
|
||||||
if (missingContent.length > 0) {
|
if (missingContent.length > 0) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
|
`Warning: LLM did not include expected content in response: ${missingContent.join(
|
||||||
|
', ',
|
||||||
|
)}.`,
|
||||||
'This is not ideal but not a test failure.',
|
'This is not ideal but not a test failure.',
|
||||||
);
|
);
|
||||||
console.warn(
|
console.warn(
|
||||||
|
@ -141,10 +143,7 @@ export class TestRig {
|
||||||
mkdirSync(geminiDir, { recursive: true });
|
mkdirSync(geminiDir, { recursive: true });
|
||||||
// In sandbox mode, use an absolute path for telemetry inside the container
|
// In sandbox mode, use an absolute path for telemetry inside the container
|
||||||
// The container mounts the test directory at the same path as the host
|
// The container mounts the test directory at the same path as the host
|
||||||
const telemetryPath =
|
const telemetryPath = join(this.testDir, 'telemetry.log'); // Always use test directory for telemetry
|
||||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
|
||||||
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
|
|
||||||
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
|
|
||||||
|
|
||||||
const settings = {
|
const settings = {
|
||||||
telemetry: {
|
telemetry: {
|
||||||
|
@ -322,11 +321,8 @@ export class TestRig {
|
||||||
}
|
}
|
||||||
|
|
||||||
async waitForTelemetryReady() {
|
async waitForTelemetryReady() {
|
||||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
// Telemetry is always written to the test directory
|
||||||
const logFilePath =
|
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
|
||||||
? join(this.testDir!, 'telemetry.log')
|
|
||||||
: env.TELEMETRY_LOG_FILE;
|
|
||||||
|
|
||||||
if (!logFilePath) return;
|
if (!logFilePath) return;
|
||||||
|
|
||||||
|
@ -347,6 +343,52 @@ export class TestRig {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async waitForTelemetryEvent(eventName: string, timeout?: number) {
|
||||||
|
if (!timeout) {
|
||||||
|
timeout = this.getDefaultTimeout();
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.waitForTelemetryReady();
|
||||||
|
|
||||||
|
return this.poll(
|
||||||
|
() => {
|
||||||
|
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||||
|
|
||||||
|
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = readFileSync(logFilePath, 'utf-8');
|
||||||
|
const jsonObjects = content
|
||||||
|
.split(/}\n{/)
|
||||||
|
.map((obj, index, array) => {
|
||||||
|
// Add back the braces we removed during split
|
||||||
|
if (index > 0) obj = '{' + obj;
|
||||||
|
if (index < array.length - 1) obj = obj + '}';
|
||||||
|
return obj.trim();
|
||||||
|
})
|
||||||
|
.filter((obj) => obj);
|
||||||
|
|
||||||
|
for (const jsonStr of jsonObjects) {
|
||||||
|
try {
|
||||||
|
const logData = JSON.parse(jsonStr);
|
||||||
|
if (
|
||||||
|
logData.attributes &&
|
||||||
|
logData.attributes['event.name'] === `gemini_cli.${eventName}`
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
timeout,
|
||||||
|
100,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
async waitForToolCall(toolName: string, timeout?: number) {
|
async waitForToolCall(toolName: string, timeout?: number) {
|
||||||
// Use environment-specific timeout
|
// Use environment-specific timeout
|
||||||
if (!timeout) {
|
if (!timeout) {
|
||||||
|
@ -566,11 +608,8 @@ export class TestRig {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
// Telemetry is always written to the test directory
|
||||||
const logFilePath =
|
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
|
||||||
? join(this.testDir!, 'telemetry.log')
|
|
||||||
: env.TELEMETRY_LOG_FILE;
|
|
||||||
|
|
||||||
if (!logFilePath) {
|
if (!logFilePath) {
|
||||||
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
|
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
|
||||||
|
@ -587,7 +626,7 @@ export class TestRig {
|
||||||
// Split the content into individual JSON objects
|
// Split the content into individual JSON objects
|
||||||
// They are separated by "}\n{"
|
// They are separated by "}\n{"
|
||||||
const jsonObjects = content
|
const jsonObjects = content
|
||||||
.split(/}\s*\n\s*{/)
|
.split(/}\n{/)
|
||||||
.map((obj, index, array) => {
|
.map((obj, index, array) => {
|
||||||
// Add back the braces we removed during split
|
// Add back the braces we removed during split
|
||||||
if (index > 0) obj = '{' + obj;
|
if (index > 0) obj = '{' + obj;
|
||||||
|
@ -626,14 +665,47 @@ export class TestRig {
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// Skip objects that aren't valid JSON
|
// Skip objects that aren't valid JSON
|
||||||
if (env.VERBOSE === 'true') {
|
if (env.VERBOSE === 'true') {
|
||||||
console.error(
|
console.error('Failed to parse telemetry object:', e);
|
||||||
'Failed to parse telemetry object:',
|
|
||||||
(e as Error).message,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return logs;
|
return logs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
readLastApiRequest(): Record<string, unknown> | null {
|
||||||
|
// Telemetry is always written to the test directory
|
||||||
|
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||||
|
|
||||||
|
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = readFileSync(logFilePath, 'utf-8');
|
||||||
|
const jsonObjects = content
|
||||||
|
.split(/}\n{/)
|
||||||
|
.map((obj, index, array) => {
|
||||||
|
if (index > 0) obj = '{' + obj;
|
||||||
|
if (index < array.length - 1) obj = obj + '}';
|
||||||
|
return obj.trim();
|
||||||
|
})
|
||||||
|
.filter((obj) => obj);
|
||||||
|
|
||||||
|
let lastApiRequest = null;
|
||||||
|
|
||||||
|
for (const jsonStr of jsonObjects) {
|
||||||
|
try {
|
||||||
|
const logData = JSON.parse(jsonStr);
|
||||||
|
if (
|
||||||
|
logData.attributes &&
|
||||||
|
logData.attributes['event.name'] === 'gemini_cli.api_request'
|
||||||
|
) {
|
||||||
|
lastApiRequest = logData;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lastApiRequest;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -239,7 +239,37 @@ export async function main() {
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await start_sandbox(sandboxConfig, memoryArgs, config);
|
let stdinData = '';
|
||||||
|
if (!process.stdin.isTTY) {
|
||||||
|
stdinData = await readStdin();
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function is a copy of the one from sandbox.ts
|
||||||
|
// It is moved here to decouple sandbox.ts from the CLI's argument structure.
|
||||||
|
const injectStdinIntoArgs = (
|
||||||
|
args: string[],
|
||||||
|
stdinData?: string,
|
||||||
|
): string[] => {
|
||||||
|
const finalArgs = [...args];
|
||||||
|
if (stdinData) {
|
||||||
|
const promptIndex = finalArgs.findIndex(
|
||||||
|
(arg) => arg === '--prompt' || arg === '-p',
|
||||||
|
);
|
||||||
|
if (promptIndex > -1 && finalArgs.length > promptIndex + 1) {
|
||||||
|
// If there's a prompt argument, prepend stdin to it
|
||||||
|
finalArgs[promptIndex + 1] =
|
||||||
|
`${stdinData}\n\n${finalArgs[promptIndex + 1]}`;
|
||||||
|
} else {
|
||||||
|
// If there's no prompt argument, add stdin as the prompt
|
||||||
|
finalArgs.push('--prompt', stdinData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return finalArgs;
|
||||||
|
};
|
||||||
|
|
||||||
|
const sandboxArgs = injectStdinIntoArgs(process.argv, stdinData);
|
||||||
|
|
||||||
|
await start_sandbox(sandboxConfig, memoryArgs, config, sandboxArgs);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
} else {
|
} else {
|
||||||
// Not in a sandbox and not entering one, so relaunch with additional
|
// Not in a sandbox and not entering one, so relaunch with additional
|
||||||
|
|
|
@ -24,6 +24,7 @@ function getContainerPath(hostPath: string): string {
|
||||||
if (os.platform() !== 'win32') {
|
if (os.platform() !== 'win32') {
|
||||||
return hostPath;
|
return hostPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
const withForwardSlashes = hostPath.replace(/\\/g, '/');
|
const withForwardSlashes = hostPath.replace(/\\/g, '/');
|
||||||
const match = withForwardSlashes.match(/^([A-Z]):\/(.*)/i);
|
const match = withForwardSlashes.match(/^([A-Z]):\/(.*)/i);
|
||||||
if (match) {
|
if (match) {
|
||||||
|
@ -114,7 +115,7 @@ function ports(): string[] {
|
||||||
.map((p) => p.trim());
|
.map((p) => p.trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
function entrypoint(workdir: string): string[] {
|
function entrypoint(workdir: string, cliArgs: string[]): string[] {
|
||||||
const isWindows = os.platform() === 'win32';
|
const isWindows = os.platform() === 'win32';
|
||||||
const containerWorkdir = getContainerPath(workdir);
|
const containerWorkdir = getContainerPath(workdir);
|
||||||
const shellCmds = [];
|
const shellCmds = [];
|
||||||
|
@ -166,7 +167,7 @@ function entrypoint(workdir: string): string[] {
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
const cliArgs = process.argv.slice(2).map((arg) => quote([arg]));
|
const quotedCliArgs = cliArgs.slice(2).map((arg) => quote([arg]));
|
||||||
const cliCmd =
|
const cliCmd =
|
||||||
process.env['NODE_ENV'] === 'development'
|
process.env['NODE_ENV'] === 'development'
|
||||||
? process.env['DEBUG']
|
? process.env['DEBUG']
|
||||||
|
@ -176,8 +177,7 @@ function entrypoint(workdir: string): string[] {
|
||||||
? `node --inspect-brk=0.0.0.0:${process.env['DEBUG_PORT'] || '9229'} $(which gemini)`
|
? `node --inspect-brk=0.0.0.0:${process.env['DEBUG_PORT'] || '9229'} $(which gemini)`
|
||||||
: 'gemini';
|
: 'gemini';
|
||||||
|
|
||||||
const args = [...shellCmds, cliCmd, ...cliArgs];
|
const args = [...shellCmds, cliCmd, ...quotedCliArgs];
|
||||||
|
|
||||||
return ['bash', '-c', args.join(' ')];
|
return ['bash', '-c', args.join(' ')];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,6 +185,7 @@ export async function start_sandbox(
|
||||||
config: SandboxConfig,
|
config: SandboxConfig,
|
||||||
nodeArgs: string[] = [],
|
nodeArgs: string[] = [],
|
||||||
cliConfig?: Config,
|
cliConfig?: Config,
|
||||||
|
cliArgs: string[] = [],
|
||||||
) {
|
) {
|
||||||
const patcher = new ConsolePatcher({
|
const patcher = new ConsolePatcher({
|
||||||
debugMode: cliConfig?.getDebugMode() || !!process.env['DEBUG'],
|
debugMode: cliConfig?.getDebugMode() || !!process.env['DEBUG'],
|
||||||
|
@ -263,6 +264,8 @@ export async function start_sandbox(
|
||||||
args.push('-D', `INCLUDE_DIR_${i}=${dirPath}`);
|
args.push('-D', `INCLUDE_DIR_${i}=${dirPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const finalArgv = cliArgs;
|
||||||
|
|
||||||
args.push(
|
args.push(
|
||||||
'-f',
|
'-f',
|
||||||
profileFile,
|
profileFile,
|
||||||
|
@ -271,7 +274,7 @@ export async function start_sandbox(
|
||||||
[
|
[
|
||||||
`SANDBOX=sandbox-exec`,
|
`SANDBOX=sandbox-exec`,
|
||||||
`NODE_OPTIONS="${nodeOptions}"`,
|
`NODE_OPTIONS="${nodeOptions}"`,
|
||||||
...process.argv.map((arg) => quote([arg])),
|
...finalArgv.map((arg) => quote([arg])),
|
||||||
].join(' '),
|
].join(' '),
|
||||||
);
|
);
|
||||||
// start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set
|
// start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set
|
||||||
|
@ -692,7 +695,7 @@ export async function start_sandbox(
|
||||||
// Determine if the current user's UID/GID should be passed to the sandbox.
|
// Determine if the current user's UID/GID should be passed to the sandbox.
|
||||||
// See shouldUseCurrentUserInSandbox for more details.
|
// See shouldUseCurrentUserInSandbox for more details.
|
||||||
let userFlag = '';
|
let userFlag = '';
|
||||||
const finalEntrypoint = entrypoint(workdir);
|
const finalEntrypoint = entrypoint(workdir, cliArgs);
|
||||||
|
|
||||||
if (process.env['GEMINI_CLI_INTEGRATION_TEST'] === 'true') {
|
if (process.env['GEMINI_CLI_INTEGRATION_TEST'] === 'true') {
|
||||||
args.push('--user', 'root');
|
args.push('--user', 'root');
|
||||||
|
|
Loading…
Reference in New Issue