From 87474e52d7e0f3abcaf74272b11c7b9244d98af5 Mon Sep 17 00:00:00 2001 From: Olcan Date: Mon, 9 Jun 2025 08:57:30 -0700 Subject: [PATCH] rename shell tool more intuitively as run_shell_command (from historical name of execute_bash_command inherited from terminal tool) (#869) --- docs/architecture.md | 2 +- docs/cli/commands.md | 2 +- docs/cli/configuration.md | 2 +- docs/core/index.md | 2 +- docs/index.md | 2 +- docs/tools/index.md | 4 +- docs/tools/shell.md | 8 +- docs/troubleshooting.md | 2 +- .../core/__snapshots__/prompts.test.ts.snap | 112 +++++++++--------- packages/core/src/core/prompts.test.ts | 2 +- packages/core/src/tools/shell.ts | 2 +- 11 files changed, 70 insertions(+), 70 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index c061fb4e..bd33a42b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -44,7 +44,7 @@ A typical interaction with the Gemini CLI follows this general flow: 5. **Tool Execution (if applicable):** - If the Gemini API requests a tool, the Core package prepares to execute it. - **User Confirmation for Potentially Impactful Tools:** If the requested tool can modify the file system (e.g., file edits, writes) or execute shell commands, the CLI (`packages/cli`) displays a confirmation prompt to the user. This prompt details the tool and its arguments, and the user must approve the execution. Read-only operations (e.g., reading files, listing directories) may not always require this explicit confirmation step. - - If confirmed (or if confirmation is not required for the specific tool), the Core package identifies and executes the relevant tool (e.g., `read_file`, `execute_bash_command`). + - If confirmed (or if confirmation is not required for the specific tool), the Core package identifies and executes the relevant tool (e.g., `read_file`, `run_shell_command`). - The tool performs its action (e.g., reads a file from the disk). - The result of the tool execution is sent back to the Gemini API by the Core. - The Gemini API processes the tool result and generates a final response. diff --git a/docs/cli/commands.md b/docs/cli/commands.md index 859ebd3c..ad091afc 100644 --- a/docs/cli/commands.md +++ b/docs/cli/commands.md @@ -114,6 +114,6 @@ The `!` prefix provides a powerful way to interact with your system's shell dire - Type your shell commands (e.g., `cd my_project`, `npm run dev`, `cat file.txt`). - Type `!` and press Enter again to exit Shell Mode. -- **Caution for all `!` usage:** Be mindful of the commands you execute, as they have the same permissions and impact as if you ran them directly in your terminal. The Shell Mode feature does not inherently add extra sandboxing beyond what's already configured for the underlying `execute_bash_command` tool. +- **Caution for all `!` usage:** Be mindful of the commands you execute, as they have the same permissions and impact as if you ran them directly in your terminal. The Shell Mode feature does not inherently add extra sandboxing beyond what's already configured for the underlying `run_shell_command` tool. This integrated shell capability allows for seamless switching between AI-assisted tasks and direct system interaction. diff --git a/docs/cli/configuration.md b/docs/cli/configuration.md index 703cf2c4..7c8ff81e 100644 --- a/docs/cli/configuration.md +++ b/docs/cli/configuration.md @@ -70,7 +70,7 @@ When you create a `.gemini/settings.json` file for project-specific settings, or - **Default:** `false` (users will be prompted for most tool calls). - **Behavior:** - If set to `true`, the CLI will bypass the confirmation prompt for tools deemed safe. An indicator may be shown in the UI when auto-accept is active. - - Potentially destructive or system-modifying tools (like `execute_bash_command` or `write_file`) will likely still require confirmation regardless of this setting. + - Potentially destructive or system-modifying tools (like `run_shell_command` or `write_file`) will likely still require confirmation regardless of this setting. - **Example:** `"autoAccept": true` - **`theme`** (string): diff --git a/docs/core/index.md b/docs/core/index.md index da7e8c3c..7811633c 100644 --- a/docs/core/index.md +++ b/docs/core/index.md @@ -47,7 +47,7 @@ The CLI and Core typically communicate over a local interface (e.g., standard in The core plays a vital role in security: - **API Key Management:** It handles the `GEMINI_API_KEY` and ensures it is used securely when communicating with the Gemini API. -- **Tool Execution:** When tools interact with the local system (e.g., `execute_bash_command`), the core (and its underlying tool implementations) must do so with appropriate caution, often involving sandboxing mechanisms to prevent unintended side effects. +- **Tool Execution:** When tools interact with the local system (e.g., `run_shell_command`), the core (and its underlying tool implementations) must do so with appropriate caution, often involving sandboxing mechanisms to prevent unintended side effects. ## Chat History Compression diff --git a/docs/index.md b/docs/index.md index ea0c31b4..1745d288 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,7 +26,7 @@ This documentation is organized into the following sections: - **Tools:** - **[Tools Overview](./tools/index.md):** A general look at the available tools. - **[File System Tools](./tools/file-system.md):** Documentation for tools like `read_file`, `write_file`, etc. - - **[Shell Tool](./tools/shell.md):** Using the `execute_bash_command` tool. + - **[Shell Tool](./tools/shell.md):** Using the `run_shell_command` tool. - **[Web Fetch Tool](./tools/web-fetch.md):** Using the `web_fetch` tool. - **[Web Search Tool](./tools/web-search.md):** Using the `google_web_search` tool. - **[Multi-File Read Tool](./tools/multi-file.md):** Using the `read_many_files` tool. diff --git a/docs/tools/index.md b/docs/tools/index.md index 2e3b7c26..2efce3bd 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -11,7 +11,7 @@ The core component (`packages/core`) manages these tools, presents their definit ## Why are Tools Important? - **Access to Local Information:** Tools allow Gemini to access your local file system, read file contents, list directories, etc. -- **Execution of Commands:** With tools like `execute_bash_command`, Gemini can run shell commands (with appropriate safety measures and user confirmation). +- **Execution of Commands:** With tools like `run_shell_command`, Gemini can run shell commands (with appropriate safety measures and user confirmation). - **Interaction with the Web:** Tools can fetch content from URLs. - **Action Taking:** Tools can modify files, write new files, or perform other actions on your system (again, typically with safeguards). - **Grounding Responses:** By using tools to fetch real-time or specific local data, Gemini's responses can be more accurate, relevant, and grounded in your actual context. @@ -30,7 +30,7 @@ You will typically see messages in the CLI indicating when a tool is being calle ## Security and Confirmation -Many tools, especially those that can modify your file system or execute commands (`write_file`, `edit`, `execute_bash_command`), are designed with safety in mind. The Gemini CLI will typically: +Many tools, especially those that can modify your file system or execute commands (`write_file`, `edit`, `run_shell_command`), are designed with safety in mind. The Gemini CLI will typically: - **Require Confirmation:** Prompt you before executing potentially sensitive operations, showing you what action is about to be taken. - **Utilize Sandboxing:** All tools are subject to restrictions enforced by sandboxing (see [README](../../README.md#sandboxing)). diff --git a/docs/tools/shell.md b/docs/tools/shell.md index 3c14c657..d8055deb 100644 --- a/docs/tools/shell.md +++ b/docs/tools/shell.md @@ -2,7 +2,7 @@ This document provides details on the shell tool. -## `execute_bash_command` +## `run_shell_command` - **Purpose:** Executes a given shell command using `bash -c `. This tool is essential for interacting with the underlying operating system, running scripts, or performing command-line operations. - **Arguments:** @@ -24,15 +24,15 @@ This document provides details on the shell tool. - **Examples:** - Listing files in the current directory: ``` - execute_bash_command(command="ls -la") + run_shell_command(command="ls -la") ``` - Running a script in a specific directory: ``` - execute_bash_command(command="./my_script.sh", directory="scripts", description="Run my custom script") + run_shell_command(command="./my_script.sh", directory="scripts", description="Run my custom script") ``` - Starting a background server: ``` - execute_bash_command(command="npm run dev &", description="Start development server in background") + run_shell_command(command="npm run dev &", description="Start development server in background") ``` - **Important Notes:** - **Security:** Be cautious when executing commands, especially those constructed from user input, to prevent security vulnerabilities. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index c1b65f45..190b24dd 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -59,7 +59,7 @@ This guide provides solutions to common issues and debugging tips. - **Tool Issues:** - If a specific tool is failing, try to isolate the issue by running the simplest possible version of the command or operation the tool performs. - - For `execute_bash_command`, ensure the command works directly in your shell first. + - For `run_shell_command`, ensure the command works directly in your shell first. - For file system tools, double-check paths and permissions. - **Pre-flight Checks:** diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 2f2abb95..729b316a 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -20,13 +20,13 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -39,7 +39,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -55,12 +55,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -110,7 +110,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -125,7 +125,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -143,7 +143,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -198,13 +198,13 @@ exports[`Core System Prompt (prompts.ts) > should include non-sandbox instructio When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -217,7 +217,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -233,12 +233,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -288,7 +288,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -303,7 +303,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -321,7 +321,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -371,13 +371,13 @@ exports[`Core System Prompt (prompts.ts) > should include sandbox-specific instr When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -390,7 +390,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -406,12 +406,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -461,7 +461,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -476,7 +476,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -494,7 +494,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -544,13 +544,13 @@ exports[`Core System Prompt (prompts.ts) > should include seatbelt-specific inst When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -563,7 +563,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -579,12 +579,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -634,7 +634,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -649,7 +649,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -667,7 +667,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -717,13 +717,13 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when no When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -736,7 +736,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -752,12 +752,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -807,7 +807,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -822,7 +822,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -840,7 +840,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -890,13 +890,13 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -909,7 +909,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -925,12 +925,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -980,7 +980,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -995,7 +995,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -1013,7 +1013,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] @@ -1063,13 +1063,13 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence: 1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use 'read_file' and 'read_many_files' to understand context and validate any assumptions you may have. 2. **Plan:** Build a coherent and grounded (based off of the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. -3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'execute_bash_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). +3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates'). 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands. 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to. ## New Applications -**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'execute_bash_command'. +**Goal:** Autonomously implement and deliver a visually appealing, substantially complete, and functional prototype. Utilize all tools at your disposal to implement the application. Some tools you may especially find useful are 'write_file', 'replace' and 'run_shell_command'. 1. **Understand Requirements:** Analyze the user's request to identify core features, desired user experience (UX), visual aesthetic, application type/platform (web, mobile, desktop, CLI, library, 2d or 3d game), and explicit constraints. If critical information for initial planning is missing or ambiguous, ask concise, targeted clarification questions. 2. **Propose Plan:** Formulate an internal development plan. Present a clear, concise, high-level summary to the user. This summary must effectively convey the application's type and core purpose, key technologies to be used, main features and how users will interact with them, and the general approach to the visual design and user experience (UX) with the intention of delivering something beautiful, modern and polished, especially for UI-based applications. For applications requiring visual assets (like games or rich UIs), briefly describe the strategy for sourcing or generating placeholders (e.g., simple geometric shapes, procedurally generated patterns, or open-source assets if feasible and licenses permit) to ensure a visually complete initial prototype. Ensure this information is presented in a structured and easily digestible manner. @@ -1082,7 +1082,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **3d Games:** HTML/CSS/JavaScript with Three.js. - **2d Games:** HTML/CSS/JavaScript. 3. **User Approval:** Obtain user approval for the proposed plan. -4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'execute_bash_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. +4. **Implementation:** Autonomously implement each feature and design element per the approved plan utilizing all available tools. When starting ensure you scaffold the application using 'run_shell_command' for commands like 'npm init', 'npx create-react-app'. Aim for full scope completion. Proactively create or source necessary placeholder assets (e.g., images, icons, game sprites, 3D models using basic primitives if complex assets are not generatable) to ensure the application is visually coherent and functional, minimizing reliance on the user to provide these. If the model can generate simple assets (e.g., a uniformly colored square sprite, a simple 3D cube), it should do so. Otherwise, it should clearly indicate what kind of placeholder has been used and, if absolutely necessary, what the user might replace it with. Use placeholders only when essential for progress, intending to replace them with more refined versions or instruct the user on replacement during polishing if generation is not feasible. 5. **Verify:** Review work against the original request, the approved plan. Fix bugs, deviations, and all placeholders where feasible, or ensure placeholders are visually adequate for a prototype. Ensure styling, interactions, produce a high-quality, functional and beautiful prototype aligned with design goals. Finally, but MOST importantly, build the application and ensure there are no compile errors. 6. **Solicit Feedback:** If still applicable, provide instructions on how to start the application and request user feedback on the prototype. @@ -1098,12 +1098,12 @@ When requested to perform tasks like fixing bugs, adding features, refactoring, - **Handling Inability:** If unable/unwilling to fulfill a request, state so briefly (1-2 sentences) without excessive justification. Offer alternatives if appropriate. ## Security and Safety Rules -- **Explain Critical Commands:** Before executing commands with 'execute_bash_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). +- **Explain Critical Commands:** Before executing commands with 'run_shell_command' that modify the file system, codebase, or system state, you *must* provide a brief explanation of the command's purpose and potential impact. Prioritize user understanding and safety. You should not ask permission to use the tool; the user will be presented with a confirmation dialogue upon use (you do not need to tell them this). - **Security First:** Always apply security best practices. Never introduce code that exposes, logs, or commits secrets, API keys, or other sensitive information. ## Tool Usage - **Parallelism:** Execute multiple independent tool calls in parallel when feasible (i.e. searching the codebase). -- **Command Execution:** Use the 'execute_bash_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. +- **Command Execution:** Use the 'run_shell_command' tool for running shell commands, remembering the safety rule to explain modifying commands first. - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until cancelled by the user. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information that belongs in project-specific \`GEMINI.md\` files. If unsure whether to save something, you can ask the user, "Should I remember that for you?" @@ -1153,7 +1153,7 @@ model: [tool_call: list_directory for path '.'] user: start the server implemented in server.js -model: [tool_call: execute_bash_command for 'node server.js &' because it must run in the background] +model: [tool_call: run_shell_command for 'node server.js &' because it must run in the background] @@ -1168,7 +1168,7 @@ Okay, 'requests' is available. Let me double check how it's used across the code I will now refactor src/auth.py. [tool_call: Uses replace or write_file edit tools following conventions] (After editing) -[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., execute_bash_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] +[tool_call: Runs project-specific lint/typecheck commands found previously, e.g., run_shell_command for 'npm run build', 'ruff', 'check', 'src/auth.py'] @@ -1186,7 +1186,7 @@ Now I'll look for existing or related test files to understand current testing c (After reviewing existing tests and the file content) [tool_call: write_file to create /path/to/someFile.test.ts with the test code] I've written the tests. Now I'll run the project's test command to verify them. -[tool_call: execute_bash_command for 'npm run test'] +[tool_call: run_shell_command for 'npm run test'] diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 49502f92..902ac708 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -18,7 +18,7 @@ vi.mock('../tools/read-many-files', () => ({ ReadManyFilesTool: { Name: 'read_many_files' }, })); vi.mock('../tools/shell', () => ({ - ShellTool: { Name: 'execute_bash_command' }, + ShellTool: { Name: 'run_shell_command' }, })); vi.mock('../tools/write-file', () => ({ WriteFileTool: { Name: 'write_file' }, diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index fea276ad..e1cde43b 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -30,7 +30,7 @@ import { spawn } from 'child_process'; const OUTPUT_UPDATE_INTERVAL_MS = 1000; export class ShellTool extends BaseTool { - static Name: string = 'execute_bash_command'; + static Name: string = 'run_shell_command'; private whitelist: Set = new Set(); constructor(private readonly config: Config) {