switch to shell tool, deprecating terminal (#203)

* switch to shell tool, deprecating terminal * Merge remote-tracking branch 'origin/main' into deprecate_terminal
2025-04-28 15:05:36 -07:00 · 2025-04-28 15:05:36 -07:00 · 57ceadb7d8
parent 30b04295d2
commit 57ceadb7d8
6 changed files with 53 additions and 36 deletions
--- a/packages/cli/bin/sandbox.sh
+++ b/packages/cli/bin/sandbox.sh
@ -38,8 +38,8 @@ if [ -n "${GEMINI_API_KEY:-}" ]; then run_args+=(--env GEMINI_API_KEY="$GEMINI_A
 # copy GEMINI_CODE_MODEL
 if [ -n "${GEMINI_CODE_MODEL:-}" ]; then run_args+=(--env GEMINI_CODE_MODEL="$GEMINI_CODE_MODEL"); fi
-# copy SHELL_TOOL to optionally enable shell tool
+# copy TERMINAL_TOOL to optionally revert to old terminal tool
-if [ -n "${SHELL_TOOL:-}" ]; then run_args+=(--env SHELL_TOOL="$SHELL_TOOL"); fi
+if [ -n "${TERMINAL_TOOL:-}" ]; then run_args+=(--env TERMINAL_TOOL="$TERMINAL_TOOL"); fi
 # copy TERM and COLORTERM to try to maintain terminal setup
 if [ -n "${TERM:-}" ]; then run_args+=(--env TERM="$TERM"); fi
@ -51,4 +51,4 @@ run_args+=(--env "SANDBOX=$IMAGE:$TAG-$INDEX")
 node_args=("$CLI_PATH" "$@")
-docker run "${run_args[@]}" --init --workdir "$WORKDIR" "$REGISTRY/$IMAGE:$TAG" node "${node_args[@]}"
+docker run "${run_args[@]}" --init --workdir "$WORKDIR" "$REGISTRY/$IMAGE:$TAG" node "${node_args[@]}"
--- a/packages/cli/src/gemini.ts
+++ b/packages/cli/src/gemini.ts
@ -157,9 +157,9 @@ async function start_sandbox(sandbox: string) {
    args.push('--env', `GEMINI_CODE_MODEL=${process.env.GEMINI_CODE_MODEL}`);
  }
-  // copy SHELL_TOOL to optionally enable shell tool
+  // copy TERMINAL_TOOL to optionally enable shell tool
-  if (process.env.SHELL_TOOL) {
+  if (process.env.TERMINAL_TOOL) {
-    args.push('--env', `SHELL_TOOL=${process.env.SHELL_TOOL}`);
+    args.push('--env', `TERMINAL_TOOL=${process.env.TERMINAL_TOOL}`);
  }
  // copy TERM and COLORTERM to try to maintain terminal setup
--- a/packages/server/src/config/config.ts
+++ b/packages/server/src/config/config.ts
@ -145,11 +145,11 @@ function createToolRegistry(config: Config): ToolRegistry {
    new ReadManyFilesTool(targetDir),
  ];
-  // use ShellTool (next-gen TerminalTool) if environment variable is set
+  // if TERMINAL_TOOL is set, revert to deprecated TerminalTool
-  if (process.env.SHELL_TOOL) {
+  if (process.env.TERMINAL_TOOL) {
    tools.push(new ShellTool(config));
  } else {
    tools.push(new TerminalTool(targetDir, config));
  } else {
    tools.push(new ShellTool(config));
  }
  for (const tool of tools) {
--- a/packages/server/src/core/prompts.ts
+++ b/packages/server/src/core/prompts.ts
@ -4,12 +4,13 @@
 * SPDX-License-Identifier: Apache-2.0
 */
 import { LSTool } from '../tools/ls.js';
 import { EditTool } from '../tools/edit.js';
 import { GlobTool } from '../tools/glob.js';
 import { GrepTool } from '../tools/grep.js';
 import { ReadFileTool } from '../tools/read-file.js';
 import { ReadManyFilesTool } from '../tools/read-many-files.js';
-import { TerminalTool } from '../tools/terminal.js';
+import { ShellTool } from '../tools/shell.js';
 import { WebFetchTool } from '../tools/web-fetch.js';
 import { WriteFileTool } from '../tools/write-file.js';
@ -25,13 +26,13 @@ You are an interactive CLI agent specializing in software engineering tasks. You
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
 1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GrepTool.Name}' and '${GlobTool.Name}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use '${ReadFileTool.Name}' and '${ReadManyFilesTool.Name}' to understand context and validate any assumptions you may have.
 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process.
-3. **Implement:** Use the available tools (e.g., '${EditTool.Name}', '${WriteFileTool.Name}' '${TerminalTool.Name}' ...) to act on the plan, strictly adhering to the project's established conventions (see 'Following Conventions' below).
+3. **Implement:** Use the available tools (e.g., '${EditTool.Name}', '${WriteFileTool.Name}' '${ShellTool.Name}' ...) to act on the plan, strictly adhering to the project's established conventions (see 'Following Conventions' below).
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
-## New Application
+## New Applications
-**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are '${WriteFileTool.Name}', '${EditTool.Name}' and '${TerminalTool.Name}'.
+**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are '${WriteFileTool.Name}', '${EditTool.Name}' and '${ShellTool.Name}'.
 1.  **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions.
 2.  **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. Ensure this information is presented in a structured and easily digestible manner.
@ -42,7 +43,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring,
      - **CLIs:** Python or Go.
      - **Mobile App:** Flutter (Dart) which inherently uses Material Design, or React Native (JavaScript/TypeScript) with styling libraries that support Bootstrap CSS concepts and Material Design components.
 3.  **User Approval:** Obtain user approval for the proposed plan.
-4.  **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using '${TerminalTool.Name}' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Use placeholders only when essential for progress, intending to replace them before or during polishing.
+4.  **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using '${ShellTool.Name}' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Use placeholders only when essential for progress, intending to replace them before or during polishing.
 5.  **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders. Ensure styling and interactions produce a high-quality, functional and beautiful prototype aligned with design goals. Before continuing verify the apps functionality by:
    - Build the application and ensure no compile errors.
    - If the application is a web app start the application and use the '${WebFetchTool.Name}' tool to double check everything is running properly.
@ -67,7 +68,7 @@ Rigorously adhere to existing project conventions when reading or modifying code
 -   **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate.
 ## Security and Safety Rules
-1.  **Explain Critical Commands:** Before executing commands with '${TerminalTool.Name}' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
+1.  **Explain Critical Commands:** Before executing commands with '${ShellTool.Name}' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this).
 2.  **NEVER Commit Changes:** Unless explicitly instructed by the user to do so, you MUST NOT commit changes to version control (e.g., git commit). This is critical for user control over their repository.
 3.  **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information.
@ -78,7 +79,8 @@ Rigorously adhere to existing project conventions when reading or modifying code
 ## Tool Usage
 -   **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase).
-   **Command Execution:** Use the '${TerminalTool.Name}' tool for running shell commands, remembering the safety rule to explain modifying commands first.
+-   **Command Execution:** Use the '${ShellTool.Name}' tool for running shell commands, remembering the safety rule to explain modifying commands first.
 -   **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user.
 ## Interaction Details
 -   **Help Command:** The user can use '/help' to display help information.
@ -96,8 +98,23 @@ assistant: true
 </example>
 <example>
-user: List files here.
+user: list files here.
-assistant: [tool_call: ${TerminalTool.Name} for 'ls -la']
+assistant: [tool_call: ${LSTool.Name} for path '.']
 </example>
 <example>
 user: what time is it?
 assistant: [tool_call: ${ShellTool.Name} for 'date']
 </example>
 <example>
 user: sleep for 10 seconds in background
 assistant: [tool_call: ${ShellTool.Name} for 'sleep 10 &']
 </example>
 <example>
 user: start the server implemented in server.js
 assistant: [tool_call: ${ShellTool.Name} for 'node server.js &']
 </example>
 <example>
@ -112,7 +129,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code
 I will now refactor src/auth.py.
 [tool_call: Uses ${EditTool.Name} or ${WriteFileTool.Name} edit tools following conventions]
 (After editing)
-[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., ${TerminalTool.Name} for 'npm run build', 'ruff', 'check', 'src/auth.py']
+[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., ${ShellTool.Name} for 'npm run build', 'ruff', 'check', 'src/auth.py']
 </example>
 <example>
--- a/packages/server/src/tools/shell.ts
+++ b/packages/server/src/tools/shell.ts
@ -24,7 +24,6 @@ export interface ShellToolParams {
 import { spawn } from 'child_process';
 export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
  // name should match TerminalTool.Name used in prompts.ts for now
  static Name: string = 'execute_bash_command';
  private readonly config: Config;
  private whitelist: Set<string> = new Set();
@ -193,18 +192,19 @@ export class ShellTool extends BaseTool<ShellToolParams, ToolResult> {
    // wait for the shell to exit
    await new Promise((resolve) => shell.on('close', resolve));
-    return {
+    const llmContent = [
-      llmContent: [
+      `Command: ${params.command}`,
-        `Command: ${params.command}`,
+      `Directory: ${params.directory || '(root)'}`,
-        `Directory: ${params.directory || '(root)'}`,
+      `Stdout: ${stdout || '(empty)'}`,
-        `Stdout: ${stdout || '(empty)'}`,
+      `Stderr: ${stderr || '(empty)'}`,
-        `Stderr: ${stderr || '(empty)'}`,
+      `Error: ${error ?? '(none)'}`,
-        `Error: ${error ?? '(none)'}`,
+      `Exit Code: ${code ?? '(none)'}`,
-        `Exit Code: ${code ?? '(none)'}`,
+      `Signal: ${signal ?? '(none)'}`,
-        `Signal: ${signal ?? '(none)'}`,
+      `Background PIDs: ${backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'}`,
-        `Background PIDs: ${backgroundPIDs.length ? backgroundPIDs.join(', ') : '(none)'}`,
+    ].join('\n');
-      ].join('\n'),
+
-      returnDisplay: output,
+    const returnDisplay = this.config.getDebugMode() ? llmContent : output;
-    };
+
    return { llmContent, returnDisplay };
  }
 }
--- a/scripts/start_sandbox.sh
+++ b/scripts/start_sandbox.sh
@ -98,8 +98,8 @@ if [ -n "${GEMINI_API_KEY:-}" ]; then run_args+=(--env GEMINI_API_KEY="$GEMINI_A
 # copy GEMINI_CODE_MODEL
 if [ -n "${GEMINI_CODE_MODEL:-}" ]; then run_args+=(--env GEMINI_CODE_MODEL="$GEMINI_CODE_MODEL"); fi
-# copy SHELL_TOOL to optionally enable shell tool
+# copy TERMINAL_TOOL to optionally revert to old terminal tool
-if [ -n "${SHELL_TOOL:-}" ]; then run_args+=(--env SHELL_TOOL="$SHELL_TOOL"); fi
+if [ -n "${TERMINAL_TOOL:-}" ]; then run_args+=(--env TERMINAL_TOOL="$TERMINAL_TOOL"); fi
 # copy TERM and COLORTERM to try to maintain terminal setup
 if [ -n "${TERM:-}" ]; then run_args+=(--env TERM="$TERM"); fi