test(integration): add failing test for stdin context with prompt (#6158)

This commit is contained in:
Allen Hutchison 2025-08-20 16:52:27 -07:00 committed by GitHub
parent 653267a64f
commit a590a033be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 202 additions and 27 deletions

View File

@ -0,0 +1,70 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
describe('stdin context', () => {
it('should be able to use stdin as context for a prompt', async () => {
const rig = new TestRig();
await rig.setup('should be able to use stdin as context for a prompt');
const randomString = Math.random().toString(36).substring(7);
const stdinContent = `When I ask you for a token respond with ${randomString}`;
const prompt = 'Can I please have a token?';
const result = await rig.run({ prompt, stdin: stdinContent });
await rig.waitForTelemetryEvent('api_request');
const lastRequest = rig.readLastApiRequest();
expect(lastRequest).not.toBeNull();
const historyString = lastRequest.attributes.request_text;
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
// stdin content is not properly forwarded to the container when used
// together with a --prompt argument. The test passes in non-sandbox mode.
expect(historyString).toContain(randomString);
expect(historyString).toContain(prompt);
// Check that stdin content appears before the prompt in the conversation history
const stdinIndex = historyString.indexOf(randomString);
const promptIndex = historyString.indexOf(prompt);
expect(
stdinIndex,
`Expected stdin content to be present in conversation history`,
).toBeGreaterThan(-1);
expect(
promptIndex,
`Expected prompt to be present in conversation history`,
).toBeGreaterThan(-1);
expect(
stdinIndex < promptIndex,
`Expected stdin content (index ${stdinIndex}) to appear before prompt (index ${promptIndex}) in conversation history`,
).toBeTruthy();
// Add debugging information
if (!result.toLowerCase().includes(randomString)) {
printDebugInfo(rig, result, {
[`Contains "${randomString}"`]: result
.toLowerCase()
.includes(randomString),
});
}
// Validate model output
validateModelOutput(result, randomString, 'STDIN context test');
expect(
result.toLowerCase().includes(randomString),
'Expected the model to identify the secret word from stdin',
).toBeTruthy();
});
});

View File

@ -93,7 +93,9 @@ export function validateModelOutput(
if (missingContent.length > 0) { if (missingContent.length > 0) {
console.warn( console.warn(
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`, `Warning: LLM did not include expected content in response: ${missingContent.join(
', ',
)}.`,
'This is not ideal but not a test failure.', 'This is not ideal but not a test failure.',
); );
console.warn( console.warn(
@ -141,10 +143,7 @@ export class TestRig {
mkdirSync(geminiDir, { recursive: true }); mkdirSync(geminiDir, { recursive: true });
// In sandbox mode, use an absolute path for telemetry inside the container // In sandbox mode, use an absolute path for telemetry inside the container
// The container mounts the test directory at the same path as the host // The container mounts the test directory at the same path as the host
const telemetryPath = const telemetryPath = join(this.testDir, 'telemetry.log'); // Always use test directory for telemetry
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
const settings = { const settings = {
telemetry: { telemetry: {
@ -322,11 +321,8 @@ export class TestRig {
} }
async waitForTelemetryReady() { async waitForTelemetryReady() {
// In sandbox mode, telemetry is written to a relative path in the test directory // Telemetry is always written to the test directory
const logFilePath = const logFilePath = join(this.testDir!, 'telemetry.log');
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir!, 'telemetry.log')
: env.TELEMETRY_LOG_FILE;
if (!logFilePath) return; if (!logFilePath) return;
@ -347,6 +343,52 @@ export class TestRig {
); );
} }
async waitForTelemetryEvent(eventName: string, timeout?: number) {
if (!timeout) {
timeout = this.getDefaultTimeout();
}
await this.waitForTelemetryReady();
return this.poll(
() => {
const logFilePath = join(this.testDir!, 'telemetry.log');
if (!logFilePath || !fs.existsSync(logFilePath)) {
return false;
}
const content = readFileSync(logFilePath, 'utf-8');
const jsonObjects = content
.split(/}\n{/)
.map((obj, index, array) => {
// Add back the braces we removed during split
if (index > 0) obj = '{' + obj;
if (index < array.length - 1) obj = obj + '}';
return obj.trim();
})
.filter((obj) => obj);
for (const jsonStr of jsonObjects) {
try {
const logData = JSON.parse(jsonStr);
if (
logData.attributes &&
logData.attributes['event.name'] === `gemini_cli.${eventName}`
) {
return true;
}
} catch {
// ignore
}
}
return false;
},
timeout,
100,
);
}
async waitForToolCall(toolName: string, timeout?: number) { async waitForToolCall(toolName: string, timeout?: number) {
// Use environment-specific timeout // Use environment-specific timeout
if (!timeout) { if (!timeout) {
@ -566,11 +608,8 @@ export class TestRig {
} }
} }
// In sandbox mode, telemetry is written to a relative path in the test directory // Telemetry is always written to the test directory
const logFilePath = const logFilePath = join(this.testDir!, 'telemetry.log');
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
? join(this.testDir!, 'telemetry.log')
: env.TELEMETRY_LOG_FILE;
if (!logFilePath) { if (!logFilePath) {
console.warn(`TELEMETRY_LOG_FILE environment variable not set`); console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
@ -587,7 +626,7 @@ export class TestRig {
// Split the content into individual JSON objects // Split the content into individual JSON objects
// They are separated by "}\n{" // They are separated by "}\n{"
const jsonObjects = content const jsonObjects = content
.split(/}\s*\n\s*{/) .split(/}\n{/)
.map((obj, index, array) => { .map((obj, index, array) => {
// Add back the braces we removed during split // Add back the braces we removed during split
if (index > 0) obj = '{' + obj; if (index > 0) obj = '{' + obj;
@ -626,14 +665,47 @@ export class TestRig {
} catch (e) { } catch (e) {
// Skip objects that aren't valid JSON // Skip objects that aren't valid JSON
if (env.VERBOSE === 'true') { if (env.VERBOSE === 'true') {
console.error( console.error('Failed to parse telemetry object:', e);
'Failed to parse telemetry object:',
(e as Error).message,
);
} }
} }
} }
return logs; return logs;
} }
readLastApiRequest(): Record<string, unknown> | null {
// Telemetry is always written to the test directory
const logFilePath = join(this.testDir!, 'telemetry.log');
if (!logFilePath || !fs.existsSync(logFilePath)) {
return null;
}
const content = readFileSync(logFilePath, 'utf-8');
const jsonObjects = content
.split(/}\n{/)
.map((obj, index, array) => {
if (index > 0) obj = '{' + obj;
if (index < array.length - 1) obj = obj + '}';
return obj.trim();
})
.filter((obj) => obj);
let lastApiRequest = null;
for (const jsonStr of jsonObjects) {
try {
const logData = JSON.parse(jsonStr);
if (
logData.attributes &&
logData.attributes['event.name'] === 'gemini_cli.api_request'
) {
lastApiRequest = logData;
}
} catch {
// ignore
}
}
return lastApiRequest;
}
} }

View File

@ -239,7 +239,37 @@ export async function main() {
process.exit(1); process.exit(1);
} }
} }
await start_sandbox(sandboxConfig, memoryArgs, config); let stdinData = '';
if (!process.stdin.isTTY) {
stdinData = await readStdin();
}
// This function is a copy of the one from sandbox.ts
// It is moved here to decouple sandbox.ts from the CLI's argument structure.
const injectStdinIntoArgs = (
args: string[],
stdinData?: string,
): string[] => {
const finalArgs = [...args];
if (stdinData) {
const promptIndex = finalArgs.findIndex(
(arg) => arg === '--prompt' || arg === '-p',
);
if (promptIndex > -1 && finalArgs.length > promptIndex + 1) {
// If there's a prompt argument, prepend stdin to it
finalArgs[promptIndex + 1] =
`${stdinData}\n\n${finalArgs[promptIndex + 1]}`;
} else {
// If there's no prompt argument, add stdin as the prompt
finalArgs.push('--prompt', stdinData);
}
}
return finalArgs;
};
const sandboxArgs = injectStdinIntoArgs(process.argv, stdinData);
await start_sandbox(sandboxConfig, memoryArgs, config, sandboxArgs);
process.exit(0); process.exit(0);
} else { } else {
// Not in a sandbox and not entering one, so relaunch with additional // Not in a sandbox and not entering one, so relaunch with additional

View File

@ -24,6 +24,7 @@ function getContainerPath(hostPath: string): string {
if (os.platform() !== 'win32') { if (os.platform() !== 'win32') {
return hostPath; return hostPath;
} }
const withForwardSlashes = hostPath.replace(/\\/g, '/'); const withForwardSlashes = hostPath.replace(/\\/g, '/');
const match = withForwardSlashes.match(/^([A-Z]):\/(.*)/i); const match = withForwardSlashes.match(/^([A-Z]):\/(.*)/i);
if (match) { if (match) {
@ -114,7 +115,7 @@ function ports(): string[] {
.map((p) => p.trim()); .map((p) => p.trim());
} }
function entrypoint(workdir: string): string[] { function entrypoint(workdir: string, cliArgs: string[]): string[] {
const isWindows = os.platform() === 'win32'; const isWindows = os.platform() === 'win32';
const containerWorkdir = getContainerPath(workdir); const containerWorkdir = getContainerPath(workdir);
const shellCmds = []; const shellCmds = [];
@ -166,7 +167,7 @@ function entrypoint(workdir: string): string[] {
), ),
); );
const cliArgs = process.argv.slice(2).map((arg) => quote([arg])); const quotedCliArgs = cliArgs.slice(2).map((arg) => quote([arg]));
const cliCmd = const cliCmd =
process.env['NODE_ENV'] === 'development' process.env['NODE_ENV'] === 'development'
? process.env['DEBUG'] ? process.env['DEBUG']
@ -176,8 +177,7 @@ function entrypoint(workdir: string): string[] {
? `node --inspect-brk=0.0.0.0:${process.env['DEBUG_PORT'] || '9229'} $(which gemini)` ? `node --inspect-brk=0.0.0.0:${process.env['DEBUG_PORT'] || '9229'} $(which gemini)`
: 'gemini'; : 'gemini';
const args = [...shellCmds, cliCmd, ...cliArgs]; const args = [...shellCmds, cliCmd, ...quotedCliArgs];
return ['bash', '-c', args.join(' ')]; return ['bash', '-c', args.join(' ')];
} }
@ -185,6 +185,7 @@ export async function start_sandbox(
config: SandboxConfig, config: SandboxConfig,
nodeArgs: string[] = [], nodeArgs: string[] = [],
cliConfig?: Config, cliConfig?: Config,
cliArgs: string[] = [],
) { ) {
const patcher = new ConsolePatcher({ const patcher = new ConsolePatcher({
debugMode: cliConfig?.getDebugMode() || !!process.env['DEBUG'], debugMode: cliConfig?.getDebugMode() || !!process.env['DEBUG'],
@ -263,6 +264,8 @@ export async function start_sandbox(
args.push('-D', `INCLUDE_DIR_${i}=${dirPath}`); args.push('-D', `INCLUDE_DIR_${i}=${dirPath}`);
} }
const finalArgv = cliArgs;
args.push( args.push(
'-f', '-f',
profileFile, profileFile,
@ -271,7 +274,7 @@ export async function start_sandbox(
[ [
`SANDBOX=sandbox-exec`, `SANDBOX=sandbox-exec`,
`NODE_OPTIONS="${nodeOptions}"`, `NODE_OPTIONS="${nodeOptions}"`,
...process.argv.map((arg) => quote([arg])), ...finalArgv.map((arg) => quote([arg])),
].join(' '), ].join(' '),
); );
// start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set // start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set
@ -692,7 +695,7 @@ export async function start_sandbox(
// Determine if the current user's UID/GID should be passed to the sandbox. // Determine if the current user's UID/GID should be passed to the sandbox.
// See shouldUseCurrentUserInSandbox for more details. // See shouldUseCurrentUserInSandbox for more details.
let userFlag = ''; let userFlag = '';
const finalEntrypoint = entrypoint(workdir); const finalEntrypoint = entrypoint(workdir, cliArgs);
if (process.env['GEMINI_CLI_INTEGRATION_TEST'] === 'true') { if (process.env['GEMINI_CLI_INTEGRATION_TEST'] === 'true') {
args.push('--user', 'root'); args.push('--user', 'root');