gemini-cli/integration-tests/test-helper.js

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

import { execSync, spawn } from 'child_process';
import { parse } from 'shell-quote';
import { mkdirSync, writeFileSync, readFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { env } from 'process';
import { fileExists } from '../scripts/telemetry_utils.js';

const __dirname = dirname(fileURLToPath(import.meta.url));

function sanitizeTestName(name) {
  return name
    .toLowerCase()
    .replace(/[^a-z0-9]/g, '-')
    .replace(/-+/g, '-');
}

// Helper to create detailed error messages
export function createToolCallErrorMessage(expectedTools, foundTools, result) {
  const expectedStr = Array.isArray(expectedTools)
    ? expectedTools.join(' or ')
    : expectedTools;
  return (
    `Expected to find ${expectedStr} tool call(s). ` +
    `Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
    `Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
  );
}

// Helper to print debug information when tests fail
export function printDebugInfo(rig, result, context = {}) {
  console.error('Test failed - Debug info:');
  console.error('Result length:', result.length);
  console.error('Result (first 500 chars):', result.substring(0, 500));
  console.error(
    'Result (last 500 chars):',
    result.substring(result.length - 500),
  );

  // Print any additional context provided
  Object.entries(context).forEach(([key, value]) => {
    console.error(`${key}:`, value);
  });

  // Check what tools were actually called
  const allTools = rig.readToolLogs();
  console.error(
    'All tool calls found:',
    allTools.map((t) => t.toolRequest.name),
  );

  return allTools;
}

// Helper to validate model output and warn about unexpected content
export function validateModelOutput(
  result,
  expectedContent = null,
  testName = '',
) {
  // First, check if there's any output at all (this should fail the test if missing)
  if (!result || result.trim().length === 0) {
    throw new Error('Expected LLM to return some output');
  }

  // If expectedContent is provided, check for it and warn if missing
  if (expectedContent) {
    const contents = Array.isArray(expectedContent)
      ? expectedContent
      : [expectedContent];
    const missingContent = contents.filter((content) => {
      if (typeof content === 'string') {
        return !result.toLowerCase().includes(content.toLowerCase());
      } else if (content instanceof RegExp) {
        return !content.test(result);
      }
      return false;
    });

    if (missingContent.length > 0) {
      console.warn(
        `Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
        'This is not ideal but not a test failure.',
      );
      console.warn(
        'The tool was called successfully, which is the main requirement.',
      );
      return false;
    } else if (process.env.VERBOSE === 'true') {
      console.log(`${testName}: Model output validated successfully.`);
    }
    return true;
  }

  return true;
}

export class TestRig {
  constructor() {
    this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
    this.testDir = null;
  }

  // Get timeout based on environment
  getDefaultTimeout() {
    if (env.CI) return 60000; // 1 minute in CI
    if (env.GEMINI_SANDBOX) return 30000; // 30s in containers
    return 15000; // 15s locally
  }

  setup(testName, options = {}) {
    this.testName = testName;
    const sanitizedName = sanitizeTestName(testName);
    this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName);
    mkdirSync(this.testDir, { recursive: true });

    // Create a settings file to point the CLI to the local collector
    const geminiDir = join(this.testDir, '.gemini');
    mkdirSync(geminiDir, { recursive: true });
    // In sandbox mode, use an absolute path for telemetry inside the container
    // The container mounts the test directory at the same path as the host
    const telemetryPath =
      env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
        ? join(this.testDir, 'telemetry.log') // Absolute path in test directory
        : env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox

    const settings = {
      telemetry: {
        enabled: true,
        target: 'local',
        otlpEndpoint: '',
        outfile: telemetryPath,
      },
      sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false,
      ...options.settings, // Allow tests to override/add settings
    };
    writeFileSync(
      join(geminiDir, 'settings.json'),
      JSON.stringify(settings, null, 2),
    );
  }

  createFile(fileName, content) {
    const filePath = join(this.testDir, fileName);
    writeFileSync(filePath, content);
    return filePath;
  }

  mkdir(dir) {
    mkdirSync(join(this.testDir, dir), { recursive: true });
  }

  sync() {
    // ensure file system is done before spawning
    execSync('sync', { cwd: this.testDir });
  }

  run(promptOrOptions, ...args) {
    let command = `node ${this.bundlePath} --yolo`;
    const execOptions = {
      cwd: this.testDir,
      encoding: 'utf-8',
    };

    if (typeof promptOrOptions === 'string') {
      command += ` --prompt "${promptOrOptions}"`;
    } else if (
      typeof promptOrOptions === 'object' &&
      promptOrOptions !== null
    ) {
      if (promptOrOptions.prompt) {
        command += ` --prompt "${promptOrOptions.prompt}"`;
      }
      if (promptOrOptions.stdin) {
        execOptions.input = promptOrOptions.stdin;
      }
    }

    command += ` ${args.join(' ')}`;

    const commandArgs = parse(command);
    const node = commandArgs.shift();

    const child = spawn(node, commandArgs, {
      cwd: this.testDir,
      stdio: 'pipe',
    });

    let stdout = '';
    let stderr = '';

    // Handle stdin if provided
    if (execOptions.input) {
      child.stdin.write(execOptions.input);
      child.stdin.end();
    }

    child.stdout.on('data', (data) => {
      stdout += data;
      if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
        process.stdout.write(data);
      }
    });

    child.stderr.on('data', (data) => {
      stderr += data;
      if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
        process.stderr.write(data);
      }
    });

    const promise = new Promise((resolve, reject) => {
      child.on('close', (code) => {
        if (code === 0) {
          // Store the raw stdout for Podman telemetry parsing
          this._lastRunStdout = stdout;

          // Filter out telemetry output when running with Podman
          // Podman seems to output telemetry to stdout even when writing to file
          let result = stdout;
          if (env.GEMINI_SANDBOX === 'podman') {
            // Remove telemetry JSON objects from output
            // They are multi-line JSON objects that start with { and contain telemetry fields
            const lines = result.split('\n');
            const filteredLines = [];
            let inTelemetryObject = false;
            let braceDepth = 0;

            for (const line of lines) {
              if (!inTelemetryObject && line.trim() === '{') {
                // Check if this might be start of telemetry object
                inTelemetryObject = true;
                braceDepth = 1;
              } else if (inTelemetryObject) {
                // Count braces to track nesting
                for (const char of line) {
                  if (char === '{') braceDepth++;
                  else if (char === '}') braceDepth--;
                }

                // Check if we've closed all braces
                if (braceDepth === 0) {
                  inTelemetryObject = false;
                  // Skip this line (the closing brace)
                  continue;
                }
              } else {
                // Not in telemetry object, keep the line
                filteredLines.push(line);
              }
            }

            result = filteredLines.join('\n');
          }
          resolve(result);
        } else {
          reject(new Error(`Process exited with code ${code}:\n${stderr}`));
        }
      });
    });

    return promise;
  }

  readFile(fileName) {
    const content = readFileSync(join(this.testDir, fileName), 'utf-8');
    if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
      const testId = `${env.TEST_FILE_NAME.replace(
        '.test.js',
        '',
      )}:${this.testName.replace(/ /g, '-')}`;
      console.log(`--- FILE: ${testId}/${fileName} ---`);
      console.log(content);
      console.log(`--- END FILE: ${testId}/${fileName} ---`);
    }
    return content;
  }

  async cleanup() {
    // Clean up test directory
    if (this.testDir && !env.KEEP_OUTPUT) {
      try {
        execSync(`rm -rf ${this.testDir}`);
      } catch (error) {
        // Ignore cleanup errors
        if (env.VERBOSE === 'true') {
          console.warn('Cleanup warning:', error.message);
        }
      }
    }
  }

  async waitForTelemetryReady() {
    // In sandbox mode, telemetry is written to a relative path in the test directory
    const logFilePath =
      env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
        ? join(this.testDir, 'telemetry.log')
        : env.TELEMETRY_LOG_FILE;

    if (!logFilePath) return;

    // Wait for telemetry file to exist and have content
    await this.poll(
      () => {
        if (!fileExists(logFilePath)) return false;
        try {
          const content = readFileSync(logFilePath, 'utf-8');
          // Check if file has meaningful content (at least one complete JSON object)
          return content.includes('"event.name"');
        } catch (_e) {
          return false;
        }
      },
      2000, // 2 seconds max - reduced since telemetry should flush on exit now
      100, // check every 100ms
    );
  }

  async waitForToolCall(toolName, timeout) {
    // Use environment-specific timeout
    if (!timeout) {
      timeout = this.getDefaultTimeout();
    }

    // Wait for telemetry to be ready before polling for tool calls
    await this.waitForTelemetryReady();

    return this.poll(
      () => {
        const toolLogs = this.readToolLogs();
        return toolLogs.some((log) => log.toolRequest.name === toolName);
      },
      timeout,
      100,
    );
  }

  async waitForAnyToolCall(toolNames, timeout) {
    // Use environment-specific timeout
    if (!timeout) {
      timeout = this.getDefaultTimeout();
    }

    // Wait for telemetry to be ready before polling for tool calls
    await this.waitForTelemetryReady();

    return this.poll(
      () => {
        const toolLogs = this.readToolLogs();
        return toolNames.some((name) =>
          toolLogs.some((log) => log.toolRequest.name === name),
        );
      },
      timeout,
      100,
    );
  }

  async poll(predicate, timeout, interval) {
    const startTime = Date.now();
    let attempts = 0;
    while (Date.now() - startTime < timeout) {
      attempts++;
      const result = predicate();
      if (env.VERBOSE === 'true' && attempts % 5 === 0) {
        console.log(
          `Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
        );
      }
      if (result) {
        return true;
      }
      await new Promise((resolve) => setTimeout(resolve, interval));
    }
    if (env.VERBOSE === 'true') {
      console.log(`Poll timed out after ${attempts} attempts`);
    }
    return false;
  }

  _parseToolLogsFromStdout(stdout) {
    const logs = [];

    // The console output from Podman is JavaScript object notation, not JSON
    // Look for tool call events in the output
    // Updated regex to handle tool names with hyphens and underscores
    const toolCallPattern =
      /body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
    const matches = [...stdout.matchAll(toolCallPattern)];

    for (const match of matches) {
      const toolName = match[1];
      const success = match[2] === 'true';
      const duration = parseInt(match[3], 10);

      // Try to find function_args nearby
      const matchIndex = match.index || 0;
      const contextStart = Math.max(0, matchIndex - 500);
      const contextEnd = Math.min(stdout.length, matchIndex + 500);
      const context = stdout.substring(contextStart, contextEnd);

      // Look for function_args in the context
      let args = '{}';
      const argsMatch = context.match(/function_args:\s*'([^']+)'/);
      if (argsMatch) {
        args = argsMatch[1];
      }

      // Also try to find function_name to double-check
      // Updated regex to handle tool names with hyphens and underscores
      const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
      const actualToolName = nameMatch ? nameMatch[1] : toolName;

      logs.push({
        timestamp: Date.now(),
        toolRequest: {
          name: actualToolName,
          args: args,
          success: success,
          duration_ms: duration,
        },
      });
    }

    // If no matches found with the simple pattern, try the JSON parsing approach
    // in case the format changes
    if (logs.length === 0) {
      const lines = stdout.split('\n');
      let currentObject = '';
      let inObject = false;
      let braceDepth = 0;

      for (const line of lines) {
        if (!inObject && line.trim() === '{') {
          inObject = true;
          braceDepth = 1;
          currentObject = line + '\n';
        } else if (inObject) {
          currentObject += line + '\n';

          // Count braces
          for (const char of line) {
            if (char === '{') braceDepth++;
            else if (char === '}') braceDepth--;
          }

          // If we've closed all braces, try to parse the object
          if (braceDepth === 0) {
            inObject = false;
            try {
              const obj = JSON.parse(currentObject);

              // Check for tool call in different formats
              if (
                obj.body &&
                obj.body.includes('Tool call:') &&
                obj.attributes
              ) {
                const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
                if (bodyMatch) {
                  logs.push({
                    timestamp: obj.timestamp || Date.now(),
                    toolRequest: {
                      name: bodyMatch[1],
                      args: obj.attributes.function_args || '{}',
                      success: obj.attributes.success !== false,
                      duration_ms: obj.attributes.duration_ms || 0,
                    },
                  });
                }
              } else if (
                obj.attributes &&
                obj.attributes['event.name'] === 'gemini_cli.tool_call'
              ) {
                logs.push({
                  timestamp: obj.attributes['event.timestamp'],
                  toolRequest: {
                    name: obj.attributes.function_name,
                    args: obj.attributes.function_args,
                    success: obj.attributes.success,
                    duration_ms: obj.attributes.duration_ms,
                  },
                });
              }
            } catch (_e) {
              // Not valid JSON
            }
            currentObject = '';
          }
        }
      }
    }

    return logs;
  }

  readToolLogs() {
    // For Podman, first check if telemetry file exists and has content
    // If not, fall back to parsing from stdout
    if (env.GEMINI_SANDBOX === 'podman') {
      // Try reading from file first
      const logFilePath = join(this.testDir, 'telemetry.log');

      if (fileExists(logFilePath)) {
        try {
          const content = readFileSync(logFilePath, 'utf-8');
          if (content && content.includes('"event.name"')) {
            // File has content, use normal file parsing
            // Continue to the normal file parsing logic below
          } else if (this._lastRunStdout) {
            // File exists but is empty or doesn't have events, parse from stdout
            return this._parseToolLogsFromStdout(this._lastRunStdout);
          }
        } catch (_e) {
          // Error reading file, fall back to stdout
          if (this._lastRunStdout) {
            return this._parseToolLogsFromStdout(this._lastRunStdout);
          }
        }
      } else if (this._lastRunStdout) {
        // No file exists, parse from stdout
        return this._parseToolLogsFromStdout(this._lastRunStdout);
      }
    }

    // In sandbox mode, telemetry is written to a relative path in the test directory
    const logFilePath =
      env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
        ? join(this.testDir, 'telemetry.log')
        : env.TELEMETRY_LOG_FILE;

    if (!logFilePath) {
      console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
      return [];
    }

    // Check if file exists, if not return empty array (file might not be created yet)
    if (!fileExists(logFilePath)) {
      return [];
    }

    const content = readFileSync(logFilePath, 'utf-8');

    // Split the content into individual JSON objects
    // They are separated by "}\n{" pattern
    const jsonObjects = content
      .split(/}\s*\n\s*{/)
      .map((obj, index, array) => {
        // Add back the braces we removed during split
        if (index > 0) obj = '{' + obj;
        if (index < array.length - 1) obj = obj + '}';
        return obj.trim();
      })
      .filter((obj) => obj);

    const logs = [];

    for (const jsonStr of jsonObjects) {
      try {
        const logData = JSON.parse(jsonStr);
        // Look for tool call logs
        if (
          logData.attributes &&
          logData.attributes['event.name'] === 'gemini_cli.tool_call'
        ) {
          const toolName = logData.attributes.function_name;
          logs.push({
            toolRequest: {
              name: toolName,
              args: logData.attributes.function_args,
              success: logData.attributes.success,
              duration_ms: logData.attributes.duration_ms,
            },
          });
        }
      } catch (_e) {
        // Skip objects that aren't valid JSON
        if (env.VERBOSE === 'true') {
          console.error('Failed to parse telemetry object:', _e.message);
        }
      }
    }

    return logs;
  }
}