Preflight and integration npx (#1096)

2025-06-16 08:27:29 -07:00 · 2025-06-16 08:27:29 -07:00 · df938d6ee8
parent a600588c20
commit df938d6ee8
24 changed files with 703 additions and 73 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -51,7 +51,6 @@ jobs:
          path: |
            packages/*/dist
            package-lock.json # Only upload dist and lockfile
-
  test:
    name: Test
    runs-on: ubuntu-latest
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@ -0,0 +1,49 @@
+# .github/workflows/e2e.yml
+
+name: E2E Tests
+
+on:
+  push:
+    branches: [main, release]
+  pull_request:
+    branches: [main, release]
+
+jobs:
+  e2e-test:
+    name: E2E Test - ${{ matrix.sandbox }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        sandbox: [sandbox:none, sandbox:docker]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20.x
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Build project
+        run: npm run build
+
+      - name: Set up Docker
+        if: matrix.sandbox == 'sandbox:docker'
+        uses: docker/setup-buildx-action@v3
+
+      - name: Set up Podman
+        if: matrix.sandbox == 'sandbox:podman'
+        uses: redhat-actions/podman-login@v1
+        with:
+          registry: docker.io
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Run E2E tests
+        env:
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+        run: npm run test:integration:${{ matrix.sandbox }} -- --verbose --keep-output
--- a/.gitignore
+++ b/.gitignore
@ -35,3 +35,4 @@ packages/*/coverage/

 # Generated files
 packages/cli/src/generated/
+.integration-tests/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -4,6 +4,24 @@
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
  "version": "0.2.0",
  "configurations": [
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Launch CLI",
+      "runtimeExecutable": "npm",
+      "runtimeArgs": ["run", "start"],
+      "skipFiles": ["<node_internals>/**"],
+      "cwd": "${workspaceFolder}"
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Launch E2E",
+      "runtimeExecutable": "npm",
+      "runtimeArgs": ["run", "test:e2e", "read_many_files"],
+      "skipFiles": ["<node_internals>/**"],
+      "cwd": "${workspaceFolder}"
+    },
    {
      "name": "Attach",
      "port": 9229,
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -59,10 +59,7 @@ If you'd like to get early feedback on your work, please use GitHub's **Draft Pu

 #### 4. Ensure All Checks Pass

-Before submitting your PR (and before marking a draft as "Ready for Review"), please ensure that all automated checks are passing. This includes:
-
- **Tests:** All existing tests must pass, and new code should be accompanied by new tests. Run `npm run test`.
- **Linting and Style:** Your code must adhere to our project's style guidelines. Run `npm run preflight` to check everything.
+Before submitting your PR, ensure that all automated checks are passing by running `npm run preflight`. This command runs all tests, linting, and other style checks.

 #### 5. Update Documentation

@ -109,7 +106,7 @@ To build the entire project (all packages):
 npm run build
 ```

-This command typically compiles TypeScript to JavaScript, bundles assets, and prepares the packages for execution. Refer to `scripts/build.sh` and `package.json` scripts for more details on what happens during the build.
+This command typically compiles TypeScript to JavaScript, bundles assets, and prepares the packages for execution. Refer to `scripts/build.js` and `package.json` scripts for more details on what happens during the build.

 ### Enabling Sandboxing

@ -135,46 +132,53 @@ If you’d like to run the source build outside of the gemini-cli folder you can

 ### Running Tests

-To execute the test suite for the project:
+This project contains two types of tests: unit tests and integration tests.
+
+#### Unit Tests
+
+To execute the unit test suite for the project:

 ```bash
 npm run test
 ```

-This will run tests located in the `packages/core` and `packages/cli` directories. Ensure tests pass before submitting any changes.
+This will run tests located in the `packages/core` and `packages/cli` directories. Ensure tests pass before submitting any changes. For a more comprehensive check, it is recommended to run `npm run preflight`.

-#### Important Note for Sandbox Users on macOS/Windows
+#### Integration Tests

-This project uses native dependencies (e.g., `tree-sitter`) that are compiled for a specific operating system.
+The integration tests are designed to validate the end-to-end functionality of the Gemini CLI. They are not run as part of the default `npm run test` command.

-When you run the application in the development sandbox via `npm start`, these dependencies are automatically rebuilt for the container's Linux environment.
-
-Because of this, if you then try to run `npm run test` directly on your host machine (e.g., macOS), the tests will fail with an error similar to `dlopen` or `not a valid mach-o file`. This is because the test runner on your Mac cannot load the Linux-compiled dependencies from your `node_modules` folder.
-
-#### The Solution:
-
-To fix this, you must rebuild the native dependencies for your host machine's architecture before running the tests.
+To run the integration tests, use the following command:

 ```bash
-npm rebuild
+npm run test:e2e
 ```

-#### Recommended Workflow:
-
-1. After using the sandboxed `npm start`, and before you want to run tests locally, run `npm rebuild` in your terminal.
-2. Then, run `npm run test` as usual.
-
-You will need to repeat the npm rebuild step any time you switch from running the sandboxed application back to running local tests.
+For more detailed information on the integration testing framework, please see the [Integration Tests documentation](./docs/integration-tests.md).

 ### Linting and Preflight Checks

-To ensure code quality, formatting consistency, and run final checks before committing:
+To ensure code quality and formatting consistency, run the preflight check:

 ```bash
 npm run preflight
 ```

-This command usually runs ESLint, Prettier, and potentially other checks as defined in the project's `package.json`.
+This command will run ESLint, Prettier, all tests, and other checks as defined in the project's `package.json`.
+
+_ProTip_
+
+after cloning create a git precommit hook file to ensure your commits are always clean.
+
+```bash
+echo "
+# Run npm build and check for errors
+if ! npm run preflight; then
+  echo "npm build failed. Commit aborted."
+  exit 1
+fi
+" > .git/hooks/pre-commit && chmod +x .git/hooks/pre-commit
+```

 #### Formatting

--- a/GEMINI.md
+++ b/GEMINI.md
@ -1,18 +1,14 @@
 ## Building and running

-Every time we make a set of changes you should run the following commands:
+Before submitting any changes, it is crucial to validate them by running the full preflight check. This command will build the repository, run all tests, check for type errors, and lint the code.

-Build the repo:
-npm run build
+To run the full suite of checks, execute the following command:

-Run tests:
-npm run test
-
-Type Check:
-npm run typecheck
-
-Lint and final checks:
+```bash
 npm run preflight
+```
+
+This single command ensures that your changes meet all the quality gates of the project. While you can run the individual steps (`build`, `test`, `typecheck`, `lint`) separately, it is highly recommended to use `npm run preflight` to ensure a comprehensive validation.

 ## Writing Tests

--- a/docs/integration-tests.md
+++ b/docs/integration-tests.md
@ -0,0 +1,149 @@
+# Integration Tests
+
+This document provides a detailed overview of the integration testing framework used in this project.
+
+## Overview
+
+The integration tests are designed to validate the end-to-end functionality of the Gemini CLI. They execute the built binary in a controlled environment and verify that it behaves as expected when interacting with the file system.
+
+These tests are located in the `integration-tests` directory and are run using a custom test runner that provides a consistent and configurable testing environment.
+
+## Running the Tests
+
+The integration tests are not run as part of the default `npm run test` command. They must be run explicitly using the `npm run test:integration:sandbox:none` script.
+
+Also as a developer for full context a shortcut can be found at
+
+```bash
+npm run test:e2e
+```
+
+## Running a specfic set of tests
+
+To run a 1 or more test files you can use `npm run <integration test command> <file_name1> ....` where <integration test command> is any of `test:e2e` or `test:integration*` and <file_name> is any of the files in `integration/<file_name>.test.js`
+
+```bash
+npm run test:e2e write_file
+```
+
+### Running a Single Test by Name
+
+To run a single test by its name, use the `--test-name-pattern` flag:
+
+```bash
+npm run test:e2e -- --test-name-pattern "reads a file"
+```
+
+### Running All Tests
+
+To run the entire suite of integration tests, use the following command:
+
+```bash
+npm run test:integration:all
+```
+
+### Sandbox Matrix
+
+The `all` command will run tests for `no sandboxing`, `docker` and `podman`.
+Each individual type can be run as
+
+```bash
+npm run test:integration:all
+```
+
+```bash
+npm run test:integration:sandbox-none
+```
+
+```bash
+npm run test:integration:sandbox:docker
+```
+
+```bash
+npm run test:integration:sandbox:podman
+```
+
+## Diagnostics
+
+The integration test runner provides several options for diagnostics to help track down test failures.
+
+### Keeping Test Output
+
+You can preserve the temporary files created during a test run for inspection. This is useful for debugging issues with file system operations.
+
+To keep the test output, you can either use the `--keep-output` flag or set the `KEEP_OUTPUT` environment variable to `true`.
+
+```bash
+# Using the flag
+npm run test:integration:sandbox:none -- --keep-output
+
+# Using the environment variable
+KEEP_OUTPUT=true npm run test:integration:sandbox:none
+```
+
+When output is kept, the test runner will print the path to the unique directory for the test run.
+
+### Verbose Output
+
+For more detailed debugging, the `--verbose` flag will stream the real-time output from the `gemini` command to the console. This is useful for observing the command's behavior as it runs.
+
+```bash
+npm run test:integration:sandbox:none -- --verbose
+```
+
+When using `--verbose` with `--keep-output`, the output is streamed to the console and also saved to a log file within the test's temporary directory.
+
+The verbose output is formatted to clearly identify the source of the logs:
+
+```
+--- TEST: <file-name-without-js>:<test-name> ---
+... output from the gemini command ...
+--- END TEST: <file-name-without-js>:<test-name> ---
+```
+
+## Linting and Formatting
+
+To ensure code quality and consistency, the integration test files are linted as part of the main build process. You can also manually run the linter and auto-fixer.
+
+### Running the Linter
+
+To check for linting errors, run the following command:
+
+```bash
+npm run lint
+```
+
+### Automatically Fixing Issues
+
+To automatically fix any fixable linting errors, run:
+
+```bash
+npm run lint --fix
+```
+
+## Directory Structure
+
+The integration tests create a unique directory for each test run inside the `.integration-tests` directory. Within this directory, a subdirectory is created for each test file, and within that, a subdirectory is created for each individual test case.
+
+This structure makes it easy to locate the artifacts for a specific test run, file, or case.
+
+```
+.integration-tests/
+└── <run-id>/
+    └── <test-file-name>.test.js/
+        └── <test-case-name>/
+            ├── output.log
+            └── ...other test artifacts...
+```
+
+## Continuous Integration
+
+To ensure the integration tests are always run, a GitHub Actions workflow is defined in `.github/workflows/e2e.yml`. This workflow automatically runs the integration tests on every pull request and push to the `main` branch.
+
+The workflow uses a matrix strategy to run the tests in different sandboxing environments:
+
+- `sandbox:none`: Runs the tests without any sandboxing.
+- `sandbox:docker`: Runs the tests in a Docker container.
+- `sandbox:podman`: Runs the tests in a Podman container.
+
+This ensures that the Gemini CLI is tested across a variety of environments, improving its robustness and reliability.
--- a/eslint.config.js
+++ b/eslint.config.js
@ -177,6 +177,27 @@ export default tseslint.config(
  },
  // Prettier config must be last
  prettierConfig,
+  // extra settings for scripts that we run directly with node
+  {
+    files: ['./integration-tests/**/*.js'],
+    languageOptions: {
+      globals: {
+        ...globals.node,
+        process: 'readonly',
+        console: 'readonly',
+      },
+    },
+    rules: {
+      '@typescript-eslint/no-unused-vars': [
+        'error',
+        {
+          argsIgnorePattern: '^_',
+          varsIgnorePattern: '^_',
+          caughtErrorsIgnorePattern: '^_',
+        },
+      ],
+    },
+  },
  // Custom eslint rules for this repo
  {
    files: ['packages/**/*.{js,jsx,ts,tsx}'],
--- a/integration-tests/file-system.test.js
+++ b/integration-tests/file-system.test.js
@ -0,0 +1,30 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { strict as assert } from 'assert';
+import { test } from 'node:test';
+import { TestRig } from './test-helper.js';
+
+test('reads a file', (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  rig.createFile('test.txt', 'hello world');
+
+  const output = rig.run(`read the file name test.txt`);
+
+  assert.ok(output.toLowerCase().includes('hello'));
+});
+
+test('writes a file', (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  rig.createFile('test.txt', '');
+
+  rig.run(`edit test.txt to have a hello world message`);
+
+  const fileContent = rig.readFile('test.txt');
+  assert.ok(fileContent.toLowerCase().includes('hello'));
+});
--- a/integration-tests/google_web_search.test.js
+++ b/integration-tests/google_web_search.test.js
@ -0,0 +1,19 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to search the web', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+
+  const prompt = `what planet do we live on`;
+  const result = await rig.run(prompt);
+
+  assert.ok(result.toLowerCase().includes('earth'));
+});
--- a/integration-tests/list_directory.test.js
+++ b/integration-tests/list_directory.test.js
@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to list a directory', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  rig.createFile('file1.txt', 'file 1 content');
+  rig.mkdir('subdir');
+
+  const prompt = `Can you list the files in the current directory`;
+  const result = await rig.run(prompt);
+
+  assert.ok(result.includes('file1.txt'));
+  assert.ok(result.includes('subdir'));
+});
--- a/integration-tests/read_many_files.test.js
+++ b/integration-tests/read_many_files.test.js
@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test.skip('should be able to read multiple files', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  rig.createFile('file1.txt', 'file 1 content');
+  rig.createFile('file2.txt', 'file 2 content');
+
+  const prompt = `Read the files in this directory, list them and print them to the screen`;
+  const result = await rig.run(prompt);
+
+  assert.ok(result.includes('file 1 content'));
+  assert.ok(result.includes('file 2 content'));
+});
--- a/integration-tests/replace.test.js
+++ b/integration-tests/replace.test.js
@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to replace content in a file', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+
+  const fileName = 'file_to_replace.txt';
+  rig.createFile(fileName, 'original content');
+  const prompt = `Can you replace 'original' with 'replaced' in the file 'file_to_replace.txt'`;
+
+  await rig.run(prompt);
+  const newFileContent = rig.readFile(fileName);
+  assert.strictEqual(newFileContent, 'replaced content');
+});
--- a/integration-tests/run-tests.js
+++ b/integration-tests/run-tests.js
@ -0,0 +1,122 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { spawnSync } from 'child_process';
+import { spawn } from 'child_process';
+import { mkdirSync, rmSync, createWriteStream } from 'fs';
+import { join, dirname, basename } from 'path';
+import { fileURLToPath } from 'url';
+import { glob } from 'glob';
+
+async function main() {
+  const __dirname = dirname(fileURLToPath(import.meta.url));
+  const rootDir = join(__dirname, '..');
+  const integrationTestsDir = join(rootDir, '.integration-tests');
+
+  if (process.env.GEMINI_SANDBOX === 'docker' && !process.env.IS_DOCKER) {
+    console.log('Building sandbox for Docker...');
+    const buildResult = spawnSync('npm', ['run', 'build:all'], {
+      stdio: 'inherit',
+    });
+    if (buildResult.status !== 0) {
+      console.error('Sandbox build failed.');
+      process.exit(1);
+    }
+  }
+
+  const runId = `${Date.now()}`;
+  const runDir = join(integrationTestsDir, runId);
+
+  mkdirSync(runDir, { recursive: true });
+
+  const args = process.argv.slice(2);
+  const keepOutput =
+    process.env.KEEP_OUTPUT === 'true' || args.includes('--keep-output');
+  if (keepOutput) {
+    const keepOutputIndex = args.indexOf('--keep-output');
+    if (keepOutputIndex > -1) {
+      args.splice(keepOutputIndex, 1);
+    }
+    console.log(`Keeping output for test run in: ${runDir}`);
+  }
+
+  const verbose = args.includes('--verbose');
+  if (verbose) {
+    const verboseIndex = args.indexOf('--verbose');
+    if (verboseIndex > -1) {
+      args.splice(verboseIndex, 1);
+    }
+  }
+
+  const testPatterns =
+    args.length > 0
+      ? args.map((arg) => `integration-tests/${arg}.test.js`)
+      : ['integration-tests/*.test.js'];
+  const testFiles = glob.sync(testPatterns, { cwd: rootDir, absolute: true });
+
+  for (const testFile of testFiles) {
+    const testFileName = basename(testFile);
+    console.log(`\tFound test file: ${testFileName}`);
+  }
+
+  let allTestsPassed = true;
+
+  for (const testFile of testFiles) {
+    const testFileName = basename(testFile);
+    const testFileDir = join(runDir, testFileName);
+    mkdirSync(testFileDir, { recursive: true });
+
+    console.log(
+      `------------- Running test file: ${testFileName} ------------------------------`,
+    );
+
+    const child = spawn('node', ['--test', testFile], {
+      stdio: 'pipe',
+      env: {
+        ...process.env,
+        INTEGRATION_TEST_FILE_DIR: testFileDir,
+        KEEP_OUTPUT: keepOutput.toString(),
+        TEST_FILE_NAME: testFileName,
+      },
+    });
+
+    if (verbose) {
+      child.stdout.pipe(process.stdout);
+      child.stderr.pipe(process.stderr);
+    }
+
+    if (keepOutput) {
+      const outputFile = join(testFileDir, 'output.log');
+      const outputStream = createWriteStream(outputFile);
+      child.stdout.pipe(outputStream);
+      child.stderr.pipe(outputStream);
+      console.log(`Output for ${testFileName} written to: ${outputFile}`);
+    } else if (!verbose) {
+      child.stdout.pipe(process.stdout);
+      child.stderr.pipe(process.stderr);
+    }
+
+    const exitCode = await new Promise((resolve) => {
+      child.on('close', resolve);
+    });
+
+    if (exitCode !== 0) {
+      console.error(`Test file failed: ${testFileName}`);
+      allTestsPassed = false;
+    }
+  }
+
+  if (!keepOutput) {
+    rmSync(runDir, { recursive: true, force: true });
+  }
+
+  if (!allTestsPassed) {
+    console.error('One or more test files failed.');
+    process.exit(1);
+  }
+}
+
+main();
--- a/integration-tests/run_shell_command.test.js
+++ b/integration-tests/run_shell_command.test.js
@ -0,0 +1,20 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to run a shell command', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  rig.createFile('blah.txt', 'some content');
+
+  const prompt = `Can you use ls to list the contexts of the current folder`;
+  const result = await rig.run(prompt);
+
+  assert.ok(result.includes('blah.txt'));
+});
--- a/integration-tests/save_memory.test.js
+++ b/integration-tests/save_memory.test.js
@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to save to memory', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+
+  const prompt = `remember that my favorite color is  blue`;
+  await rig.run(prompt);
+  const result = await rig.run(
+    'what is my favorite color? tell me that and surround it with $ symbol',
+  );
+
+  assert.ok(result.toLowerCase().includes('$blue$'));
+});
--- a/integration-tests/test-helper.js
+++ b/integration-tests/test-helper.js
@ -0,0 +1,80 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { execSync } from 'child_process';
+import { mkdirSync, writeFileSync, readFileSync } from 'fs';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
+import { env } from 'process';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+function sanitizeTestName(name) {
+  return name
+    .toLowerCase()
+    .replace(/[^a-z0-9]/g, '-')
+    .replace(/-+/g, '-');
+}
+
+export class TestRig {
+  constructor() {
+    this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
+    this.testDir = null;
+  }
+
+  setup(testName) {
+    this.testName = testName;
+    const sanitizedName = sanitizeTestName(testName);
+    this.testDir = join(env.INTEGRATION_TEST_FILE_DIR, sanitizedName);
+    mkdirSync(this.testDir, { recursive: true });
+  }
+
+  createFile(fileName, content) {
+    const filePath = join(this.testDir, fileName);
+    writeFileSync(filePath, content);
+    return filePath;
+  }
+
+  mkdir(dir) {
+    mkdirSync(join(this.testDir, dir));
+  }
+
+  run(prompt, ...args) {
+    const output = execSync(
+      `node ${this.bundlePath} --yolo --prompt "${prompt}" ${args.join(' ')}`,
+      {
+        cwd: this.testDir,
+        encoding: 'utf-8',
+      },
+    );
+
+    if (env.KEEP_OUTPUT === 'true') {
+      const testId = `${env.TEST_FILE_NAME.replace(
+        '.test.js',
+        '',
+      )}:${this.testName.replace(/ /g, '-')}`;
+      console.log(`--- TEST: ${testId} ---`);
+      console.log(output);
+      console.log(`--- END TEST: ${testId} ---`);
+    }
+
+    return output;
+  }
+
+  readFile(fileName) {
+    const content = readFileSync(join(this.testDir, fileName), 'utf-8');
+    if (env.KEEP_OUTPUT === 'true') {
+      const testId = `${env.TEST_FILE_NAME.replace(
+        '.test.js',
+        '',
+      )}:${this.testName.replace(/ /g, '-')}`;
+      console.log(`--- FILE: ${testId}/${fileName} ---`);
+      console.log(content);
+      console.log(`--- END FILE: ${testId}/${fileName} ---`);
+    }
+    return content;
+  }
+}
--- a/integration-tests/write_file.test.js
+++ b/integration-tests/write_file.test.js
@ -0,0 +1,21 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'assert';
+import { TestRig } from './test-helper.js';
+
+test('should be able to write a file', async (t) => {
+  const rig = new TestRig();
+  rig.setup(t.name);
+  const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
+
+  await rig.run(prompt);
+  const newFilePath = 'dad.txt';
+
+  const newFileContent = rig.readFile(newFilePath);
+  assert.notEqual(newFileContent, '');
+});
--- a/package-lock.json
+++ b/package-lock.json
@ -26,7 +26,7 @@
        "eslint-plugin-license-header": "^0.8.0",
        "eslint-plugin-react": "^7.37.5",
        "eslint-plugin-react-hooks": "^5.2.0",
-        "glob": "^10.4.2",
+        "glob": "^10.4.5",
        "globals": "^16.0.0",
        "json": "^11.0.0",
        "lodash": "^4.17.21",
--- a/package.json
+++ b/package.json
@ -14,13 +14,18 @@
    "prepare": "npm run bundle",
    "test": "npm run test --workspaces",
    "test:ci": "npm run test:ci --workspaces --if-present",
+    "test:e2e": "npm run test:integration:sandbox:none -- --verbose --keep-output",
+    "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
+    "test:integration:sandbox:none": "GEMINI_SANDBOX=false node integration-tests/run-tests.js",
+    "test:integration:sandbox:docker": "GEMINI_SANDBOX=docker node integration-tests/run-tests.js",
+    "test:integration:sandbox:podman": "GEMINI_SANDBOX=podman node integration-tests/run-tests.js",
    "start": "node scripts/start.js",
    "debug": "cross-env DEBUG=1 node scripts/start.js",
-    "lint:fix": "eslint . --fix",
-    "lint": "eslint . --ext .ts,.tsx",
+    "lint:fix": "eslint . --fix && eslint integration-tests --fix",
+    "lint": "eslint . --ext .ts,.tsx && eslint integration-tests",
    "typecheck": "npm run typecheck --workspaces --if-present",
    "format": "prettier --write .",
-    "preflight": "npm run format --workspaces --if-present && npm run lint && npm run test --workspaces --if-present",
+    "preflight": "npm ci && npm run format && npm run lint:fix && npm run build && npm run typecheck && npm run test:ci",
    "auth:npm": "npx google-artifactregistry-auth",
    "auth:docker": "gcloud auth configure-docker us-west1-docker.pkg.dev",
    "auth": "npm run auth:npm && npm run auth:docker",
@ -59,7 +64,7 @@
    "eslint-plugin-license-header": "^0.8.0",
    "eslint-plugin-react": "^7.37.5",
    "eslint-plugin-react-hooks": "^5.2.0",
-    "glob": "^10.4.2",
+    "glob": "^10.4.5",
    "globals": "^16.0.0",
    "json": "^11.0.0",
    "lodash": "^4.17.21",
--- a/packages/cli/src/nonInteractiveCli.ts
+++ b/packages/cli/src/nonInteractiveCli.ts
@ -97,6 +97,7 @@ export async function runNonInteractive(
            console.error(
              `Error executing tool ${fc.name}: ${toolResponse.resultDisplay || toolResponse.error.message}`,
            );
+            process.exit(1);
          }

          if (toolResponse.responseParts) {
--- a/packages/cli/src/utils/sandbox.ts
+++ b/packages/cli/src/utils/sandbox.ts
@ -342,7 +342,6 @@ export async function start_sandbox(sandbox: string) {
    // spawn child and let it inherit stdio
    sandboxProcess = spawn(sandbox, args, {
      stdio: 'inherit',
-      env: sandboxEnv,
    });
    await new Promise((resolve) => sandboxProcess?.on('close', resolve));
    return;
@ -506,38 +505,41 @@ export async function start_sandbox(sandbox: string) {
  // copy as both upper-case and lower-case as is required by some utilities
  // GEMINI_SANDBOX_PROXY_COMMAND implies HTTPS_PROXY unless HTTP_PROXY is set
  const proxyCommand = process.env.GEMINI_SANDBOX_PROXY_COMMAND;
-  let proxy =
-    process.env.HTTPS_PROXY ||
-    process.env.https_proxy ||
-    process.env.HTTP_PROXY ||
-    process.env.http_proxy ||
-    'http://localhost:8877';
-  proxy = proxy.replace('localhost', SANDBOX_PROXY_NAME);
-  if (proxy) {
-    args.push('--env', `HTTPS_PROXY=${proxy}`);
-    args.push('--env', `https_proxy=${proxy}`); // lower-case can be required, e.g. for curl
-    args.push('--env', `HTTP_PROXY=${proxy}`);
-    args.push('--env', `http_proxy=${proxy}`);
-  }
-  const noProxy = process.env.NO_PROXY || process.env.no_proxy;
-  if (noProxy) {
-    args.push('--env', `NO_PROXY=${noProxy}`);
-    args.push('--env', `no_proxy=${noProxy}`);
-  }

-  // if using proxy, switch to internal networking through proxy
-  if (proxy) {
-    execSync(
-      `${sandbox} network inspect ${SANDBOX_NETWORK_NAME} || ${sandbox} network create --internal ${SANDBOX_NETWORK_NAME}`,
-    );
-    args.push('--network', SANDBOX_NETWORK_NAME);
-    // if proxy command is set, create a separate network w/ host access (i.e. non-internal)
-    // we will run proxy in its own container connected to both host network and internal network
-    // this allows proxy to work even on rootless podman on macos with host<->vm<->container isolation
-    if (proxyCommand) {
+  if (proxyCommand) {
+    let proxy =
+      process.env.HTTPS_PROXY ||
+      process.env.https_proxy ||
+      process.env.HTTP_PROXY ||
+      process.env.http_proxy ||
+      'http://localhost:8877';
+    proxy = proxy.replace('localhost', SANDBOX_PROXY_NAME);
+    if (proxy) {
+      args.push('--env', `HTTPS_PROXY=${proxy}`);
+      args.push('--env', `https_proxy=${proxy}`); // lower-case can be required, e.g. for curl
+      args.push('--env', `HTTP_PROXY=${proxy}`);
+      args.push('--env', `http_proxy=${proxy}`);
+    }
+    const noProxy = process.env.NO_PROXY || process.env.no_proxy;
+    if (noProxy) {
+      args.push('--env', `NO_PROXY=${noProxy}`);
+      args.push('--env', `no_proxy=${noProxy}`);
+    }
+
+    // if using proxy, switch to internal networking through proxy
+    if (proxy) {
      execSync(
-        `${sandbox} network inspect ${SANDBOX_PROXY_NAME} || ${sandbox} network create ${SANDBOX_PROXY_NAME}`,
+        `${sandbox} network inspect ${SANDBOX_NETWORK_NAME} || ${sandbox} network create --internal ${SANDBOX_NETWORK_NAME}`,
      );
+      args.push('--network', SANDBOX_NETWORK_NAME);
+      // if proxy command is set, create a separate network w/ host access (i.e. non-internal)
+      // we will run proxy in its own container connected to both host network and internal network
+      // this allows proxy to work even on rootless podman on macos with host<->vm<->container isolation
+      if (proxyCommand) {
+        execSync(
+          `${sandbox} network inspect ${SANDBOX_PROXY_NAME} || ${sandbox} network create ${SANDBOX_PROXY_NAME}`,
+        );
+      }
    }
  }

@ -675,6 +677,7 @@ export async function start_sandbox(sandbox: string) {
  // start and set up proxy if GEMINI_SANDBOX_PROXY_COMMAND is set
  let proxyProcess: ChildProcess | undefined = undefined;
  let sandboxProcess: ChildProcess | undefined = undefined;
+
  if (proxyCommand) {
    // run proxyCommand in its own container
    const proxyContainerCommand = `${sandbox} run --rm --init ${userFlag} --name ${SANDBOX_PROXY_NAME} --network ${SANDBOX_PROXY_NAME} -p 8877:8877 -v ${process.cwd()}:${workdir} --workdir ${workdir} ${image} ${proxyCommand}`;
--- a/scripts/build_sandbox.js
+++ b/scripts/build_sandbox.js
@ -107,7 +107,7 @@ function buildImage(imageName, dockerfile) {
  const buildCommand =
    sandboxCommand === 'podman'
      ? `${sandboxCommand} build --authfile=<(echo '{}')`
-      : `${sandboxCommand} --config=".docker" buildx build`;
+      : `${sandboxCommand} build`;

  const npmPackageVersion = JSON.parse(
    readFileSync(join(process.cwd(), 'package.json'), 'utf-8'),
--- a/scripts/start.js
+++ b/scripts/start.js
@ -64,4 +64,8 @@ const env = {
  DEV: 'true',
 };

-spawn('node', nodeArgs, { stdio: 'inherit', env });
+const child = spawn('node', nodeArgs, { stdio: 'inherit', env });
+
+child.on('close', (code) => {
+  process.exit(code);
+});