test(integration): add failing test for stdin context with prompt (#6158)
This commit is contained in:
parent
653267a64f
commit
a590a033be
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
describe('stdin context', () => {
|
||||
it('should be able to use stdin as context for a prompt', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to use stdin as context for a prompt');
|
||||
|
||||
const randomString = Math.random().toString(36).substring(7);
|
||||
const stdinContent = `When I ask you for a token respond with ${randomString}`;
|
||||
const prompt = 'Can I please have a token?';
|
||||
|
||||
const result = await rig.run({ prompt, stdin: stdinContent });
|
||||
|
||||
await rig.waitForTelemetryEvent('api_request');
|
||||
const lastRequest = rig.readLastApiRequest();
|
||||
expect(lastRequest).not.toBeNull();
|
||||
|
||||
const historyString = lastRequest.attributes.request_text;
|
||||
|
||||
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
|
||||
// stdin content is not properly forwarded to the container when used
|
||||
// together with a --prompt argument. The test passes in non-sandbox mode.
|
||||
|
||||
expect(historyString).toContain(randomString);
|
||||
expect(historyString).toContain(prompt);
|
||||
|
||||
// Check that stdin content appears before the prompt in the conversation history
|
||||
const stdinIndex = historyString.indexOf(randomString);
|
||||
const promptIndex = historyString.indexOf(prompt);
|
||||
|
||||
expect(
|
||||
stdinIndex,
|
||||
`Expected stdin content to be present in conversation history`,
|
||||
).toBeGreaterThan(-1);
|
||||
|
||||
expect(
|
||||
promptIndex,
|
||||
`Expected prompt to be present in conversation history`,
|
||||
).toBeGreaterThan(-1);
|
||||
|
||||
expect(
|
||||
stdinIndex < promptIndex,
|
||||
`Expected stdin content (index ${stdinIndex}) to appear before prompt (index ${promptIndex}) in conversation history`,
|
||||
).toBeTruthy();
|
||||
|
||||
// Add debugging information
|
||||
if (!result.toLowerCase().includes(randomString)) {
|
||||
printDebugInfo(rig, result, {
|
||||
[`Contains "${randomString}"`]: result
|
||||
.toLowerCase()
|
||||
.includes(randomString),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate model output
|
||||
validateModelOutput(result, randomString, 'STDIN context test');
|
||||
|
||||
expect(
|
||||
result.toLowerCase().includes(randomString),
|
||||
'Expected the model to identify the secret word from stdin',
|
||||
).toBeTruthy();
|
||||
});
|
||||
});
|
|
@ -93,7 +93,9 @@ export function validateModelOutput(
|
|||
|
||||
if (missingContent.length > 0) {
|
||||
console.warn(
|
||||
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
|
||||
`Warning: LLM did not include expected content in response: ${missingContent.join(
|
||||
', ',
|
||||
)}.`,
|
||||
'This is not ideal but not a test failure.',
|
||||
);
|
||||
console.warn(
|
||||
|
@ -141,10 +143,7 @@ export class TestRig {
|
|||
mkdirSync(geminiDir, { recursive: true });
|
||||
// In sandbox mode, use an absolute path for telemetry inside the container
|
||||
// The container mounts the test directory at the same path as the host
|
||||
const telemetryPath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
|
||||
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
|
||||
const telemetryPath = join(this.testDir, 'telemetry.log'); // Always use test directory for telemetry
|
||||
|
||||
const settings = {
|
||||
telemetry: {
|
||||
|
@ -322,11 +321,8 @@ export class TestRig {
|
|||
}
|
||||
|
||||
async waitForTelemetryReady() {
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath) return;
|
||||
|
||||
|
@ -347,6 +343,52 @@ export class TestRig {
|
|||
);
|
||||
}
|
||||
|
||||
async waitForTelemetryEvent(eventName: string, timeout?: number) {
|
||||
if (!timeout) {
|
||||
timeout = this.getDefaultTimeout();
|
||||
}
|
||||
|
||||
await this.waitForTelemetryReady();
|
||||
|
||||
return this.poll(
|
||||
() => {
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
const jsonObjects = content
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
// Add back the braces we removed during split
|
||||
if (index > 0) obj = '{' + obj;
|
||||
if (index < array.length - 1) obj = obj + '}';
|
||||
return obj.trim();
|
||||
})
|
||||
.filter((obj) => obj);
|
||||
|
||||
for (const jsonStr of jsonObjects) {
|
||||
try {
|
||||
const logData = JSON.parse(jsonStr);
|
||||
if (
|
||||
logData.attributes &&
|
||||
logData.attributes['event.name'] === `gemini_cli.${eventName}`
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
timeout,
|
||||
100,
|
||||
);
|
||||
}
|
||||
|
||||
async waitForToolCall(toolName: string, timeout?: number) {
|
||||
// Use environment-specific timeout
|
||||
if (!timeout) {
|
||||
|
@ -566,11 +608,8 @@ export class TestRig {
|
|||
}
|
||||
}
|
||||
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath) {
|
||||
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
|
||||
|
@ -587,7 +626,7 @@ export class TestRig {
|
|||
// Split the content into individual JSON objects
|
||||
// They are separated by "}\n{"
|
||||
const jsonObjects = content
|
||||
.split(/}\s*\n\s*{/)
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
// Add back the braces we removed during split
|
||||
if (index > 0) obj = '{' + obj;
|
||||
|
@ -626,14 +665,47 @@ export class TestRig {
|
|||
} catch (e) {
|
||||
// Skip objects that aren't valid JSON
|
||||
if (env.VERBOSE === 'true') {
|
||||
console.error(
|
||||
'Failed to parse telemetry object:',
|
||||
(e as Error).message,
|
||||
);
|
||||
console.error('Failed to parse telemetry object:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return logs;
|
||||
}
|
||||
|
||||
readLastApiRequest(): Record<string, unknown> | null {
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
const jsonObjects = content
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
if (index > 0) obj = '{' + obj;
|
||||
if (index < array.length - 1) obj = obj + '}';
|
||||
return obj.trim();
|
||||
})
|
||||
.filter((obj) => obj);
|
||||
|
||||
let lastApiRequest = null;
|
||||
|
||||
for (const jsonStr of jsonObjects) {
|
||||
try {
|
||||
const logData = JSON.parse(jsonStr);
|
||||
if (
|
||||
logData.attributes &&
|
||||
logData.attributes['event.name'] === 'gemini_cli.api_request'
|
||||
) {
|
||||
lastApiRequest = logData;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return lastApiRequest;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -239,7 +239,37 @@ export async function main() {
|
|||
process.exit(1);
|
||||
}
|
||||
}
|
||||
await start_sandbox(sandboxConfig, memoryArgs, config);
|
||||
let stdinData = '';
|
||||
if (!process.stdin.isTTY) {
|
||||
stdinData = await readStdin();
|
||||
}
|
||||
|
||||
// This function is a copy of the one from sandbox.ts
|
||||
// It is moved here to decouple sandbox.ts from the CLI's argument structure.
|
||||
const injectStdinIntoArgs = (
|
||||
args: string[],
|
||||
stdinData?: string,
|
||||
): string[] => {
|
||||
const finalArgs = [...args];
|
||||
if (stdinData) {
|
||||
const promptIndex = finalArgs.findIndex(
|
||||
(arg) => arg === '--prompt' || arg === '-p',
|
||||
);
|
||||
if (promptIndex > -1 && finalArgs.length > promptIndex + 1) {
|
||||
// If there's a prompt argument, prepend stdin to it
|
||||
finalArgs[promptIndex + 1] =
|
||||
`${stdinData}\n\n${finalArgs[promptIndex + 1]}`;
|
||||
} else {
|
||||
// If there's no prompt argument, add stdin as the prompt
|
||||
finalArgs.push('--prompt', stdinData);
|
||||
}
|
||||
}
|
||||
return finalArgs;
|
||||
};
|
||||
|
||||
const sandboxArgs = injectStdinIntoArgs(process.argv, stdinData);
|
||||
|
||||
await start_sandbox(sandboxConfig, memoryArgs, config, sandboxArgs);
|
||||
process.exit(0);
|
||||
} else {
|
||||
// Not in a sandbox and not entering one, so relaunch with additional
|
||||
|
|
|
@ -24,6 +24,7 @@ function getContainerPath(hostPath: string): string {
|
|||
if (os.platform() !== 'win32') {
|
||||
return hostPath;
|
||||
}
|
||||
|
||||
const withForwardSlashes = hostPath.replace(/\\/g, '/');
|
||||
const match = withForwardSlashes.match(/^([A-Z]):\/(.*)/i);
|
||||
if (match) {
|
||||
|
@ -114,7 +115,7 @@ function ports(): string[] {
|
|||
.map((p) => p.trim());
|
||||
}
|
||||
|
||||
function entrypoint(workdir: string): string[] {
|
||||
function entrypoint(workdir: string, cliArgs: string[]): string[] {
|
||||
const isWindows = os.platform() === 'win32';
|
||||
const containerWorkdir = getContainerPath(workdir);
|
||||
const shellCmds = [];
|
||||
|
@ -166,7 +167,7 @@ function entrypoint(workdir: string): string[] {
|
|||
),
|
||||
);
|
||||
|
||||
const cliArgs = process.argv.slice(2).map((arg) => quote([arg]));
|
||||
const quotedCliArgs = cliArgs.slice(2).map((arg) => quote([arg]));
|
||||
const cliCmd =
|
||||
process.env['NODE_ENV'] === 'development'
|
||||
? process.env['DEBUG']
|
||||
|
@ -176,8 +177,7 @@ function entrypoint(workdir: string): string[] {
|
|||
? `node --inspect-brk=0.0.0.0:${process.env['DEBUG_PORT'] || '9229'} $(which gemini)`
|
||||
: 'gemini';
|
||||
|
||||
const args = [...shellCmds, cliCmd, ...cliArgs];
|
||||
|
||||
const args = [...shellCmds, cliCmd, ...quotedCliArgs];
|
||||
return ['bash', '-c', args.join(' ')];
|
||||
}
|
||||
|
||||
|
@ -185,6 +185,7 @@ export async function start_sandbox(
|
|||
config: SandboxConfig,
|
||||
nodeArgs: string[] = [],
|
||||
cliConfig?: Config,
|
||||
cliArgs: string[] = [],
|
||||
) {
|
||||
const patcher = new ConsolePatcher({
|
||||
debugMode: cliConfig?.getDebugMode() || !!process.env['DEBUG'],
|
||||
|
@ -263,6 +264,8 @@ export async function start_sandbox(
|
|||
args.push('-D', `INCLUDE_DIR_${i}=${dirPath}`);
|
||||
}
|
||||
|
||||
const finalArgv = cliArgs;
|
||||
|
||||
args.push(
|
||||
'-f',
|
||||
profileFile,
|
||||
|
@ -271,7 +274,7 @@ export async function start_sandbox(
|
|||
[
|
||||
`SANDBOX=sandbox-exec`,
|
||||
`NODE_OPTIONS="${nodeOptions}"`,
|
||||
...process.argv.map((arg) => quote([arg])),
|
||||
...finalArgv.map((arg) => quote([arg])),
|
||||
].join(' '),
|
||||
);
|
||||
// start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set
|
||||
|
@ -692,7 +695,7 @@ export async function start_sandbox(
|
|||
// Determine if the current user's UID/GID should be passed to the sandbox.
|
||||
// See shouldUseCurrentUserInSandbox for more details.
|
||||
let userFlag = '';
|
||||
const finalEntrypoint = entrypoint(workdir);
|
||||
const finalEntrypoint = entrypoint(workdir, cliArgs);
|
||||
|
||||
if (process.env['GEMINI_CLI_INTEGRATION_TEST'] === 'true') {
|
||||
args.push('--user', 'root');
|
||||
|
|
Loading…
Reference in New Issue