fix(glob): Improve glob tool accuracy and output

This commit enhances the glob tool by:

- Ensuring that glob patterns are used effectively. Previously, simple file names without glob syntax (e.g., "file.ts") would only search the root directory. This change encourages more precise glob patterns (e.g., "**\/file.ts") for broader searches.
- Returning absolute file paths instead of relative paths. This provides clearer, less ambiguous output and avoids encouraging the use of relative paths in subsequent operations.
- Adding comprehensive tests for various globbing scenarios, including case sensitivity and path specifications.

These changes address an issue where the glob tool could not find an expected item when a specific path was provided without appropriate glob syntax, and improve the overall reliability and usability of the tool.

Fixes https://b.corp.google.com/issues/418486553
This commit is contained in:
Taylor Mullen 2025-05-18 00:04:32 -07:00 committed by N. Taylor Mullen
parent f0b9199a77
commit a0eb8e67c7
2 changed files with 263 additions and 22 deletions

View File

@ -0,0 +1,241 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { GlobTool, GlobToolParams } from './glob.js';
// import { ToolResult } from './tools.js'; // ToolResult is implicitly used by execute
import path from 'path';
import fs from 'fs/promises';
import os from 'os';
import { describe, it, expect, beforeEach, afterEach } from 'vitest'; // Removed vi
describe('GlobTool', () => {
let tempRootDir: string; // This will be the rootDirectory for the GlobTool instance
let globTool: GlobTool;
const abortSignal = new AbortController().signal;
beforeEach(async () => {
// Create a unique root directory for each test run
tempRootDir = await fs.mkdtemp(path.join(os.tmpdir(), 'glob-tool-root-'));
globTool = new GlobTool(tempRootDir);
// Create some test files and directories within this root
// Top-level files
await fs.writeFile(path.join(tempRootDir, 'fileA.txt'), 'contentA');
await fs.writeFile(path.join(tempRootDir, 'FileB.TXT'), 'contentB'); // Different case for testing
// Subdirectory and files within it
await fs.mkdir(path.join(tempRootDir, 'sub'));
await fs.writeFile(path.join(tempRootDir, 'sub', 'fileC.md'), 'contentC');
await fs.writeFile(path.join(tempRootDir, 'sub', 'FileD.MD'), 'contentD'); // Different case
// Deeper subdirectory
await fs.mkdir(path.join(tempRootDir, 'sub', 'deep'));
await fs.writeFile(
path.join(tempRootDir, 'sub', 'deep', 'fileE.log'),
'contentE',
);
// Files for mtime sorting test
await fs.writeFile(path.join(tempRootDir, 'older.sortme'), 'older_content');
// Ensure a noticeable difference in modification time
await new Promise((resolve) => setTimeout(resolve, 50));
await fs.writeFile(path.join(tempRootDir, 'newer.sortme'), 'newer_content');
});
afterEach(async () => {
// Clean up the temporary root directory
await fs.rm(tempRootDir, { recursive: true, force: true });
});
describe('execute', () => {
it('should find files matching a simple pattern in the root', async () => {
const params: GlobToolParams = { pattern: '*.txt' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
expect(result.returnDisplay).toBe('Found 2 matching file(s)');
});
it('should find files case-sensitively when case_sensitive is true', async () => {
const params: GlobToolParams = { pattern: '*.txt', case_sensitive: true };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 1 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).not.toContain(
path.join(tempRootDir, 'FileB.TXT'),
);
});
it('should find files case-insensitively by default (pattern: *.TXT)', async () => {
const params: GlobToolParams = { pattern: '*.TXT' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
it('should find files case-insensitively when case_sensitive is false (pattern: *.TXT)', async () => {
const params: GlobToolParams = {
pattern: '*.TXT',
case_sensitive: false,
};
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
it('should find files using a pattern that includes a subdirectory', async () => {
const params: GlobToolParams = { pattern: 'sub/*.md' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'fileC.md'),
);
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'FileD.MD'),
);
});
it('should find files in a specified relative path (relative to rootDir)', async () => {
const params: GlobToolParams = { pattern: '*.md', path: 'sub' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'fileC.md'),
);
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'FileD.MD'),
);
});
it('should find files using a deep globstar pattern (e.g., **/*.log)', async () => {
const params: GlobToolParams = { pattern: '**/*.log' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 1 file(s)');
expect(result.llmContent).toContain(
path.join(tempRootDir, 'sub', 'deep', 'fileE.log'),
);
});
it('should return "No files found" message when pattern matches nothing', async () => {
const params: GlobToolParams = { pattern: '*.nonexistent' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain(
'No files found matching pattern "*.nonexistent"',
);
expect(result.returnDisplay).toBe('No files found');
});
it('should correctly sort files by modification time (newest first)', async () => {
const params: GlobToolParams = { pattern: '*.sortme' };
const result = await globTool.execute(params, abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
const filesListed = result.llmContent
.substring(result.llmContent.indexOf(':') + 1)
.trim()
.split('\n');
expect(filesListed[0]).toContain(path.join(tempRootDir, 'newer.sortme'));
expect(filesListed[1]).toContain(path.join(tempRootDir, 'older.sortme'));
});
});
describe('validateToolParams', () => {
it('should return null for valid parameters (pattern only)', () => {
const params: GlobToolParams = { pattern: '*.js' };
expect(globTool.validateToolParams(params)).toBeNull();
});
it('should return null for valid parameters (pattern and path)', () => {
const params: GlobToolParams = { pattern: '*.js', path: 'sub' };
expect(globTool.validateToolParams(params)).toBeNull();
});
it('should return null for valid parameters (pattern, path, and case_sensitive)', () => {
const params: GlobToolParams = {
pattern: '*.js',
path: 'sub',
case_sensitive: true,
};
expect(globTool.validateToolParams(params)).toBeNull();
});
it('should return error if pattern is missing (schema validation)', () => {
const params = { path: '.' } as unknown as GlobToolParams;
expect(globTool.validateToolParams(params)).toContain(
'Parameters failed schema validation',
);
});
it('should return error if pattern is an empty string', () => {
const params: GlobToolParams = { pattern: '' };
expect(globTool.validateToolParams(params)).toContain(
"The 'pattern' parameter cannot be empty.",
);
});
it('should return error if pattern is only whitespace', () => {
const params: GlobToolParams = { pattern: ' ' };
expect(globTool.validateToolParams(params)).toContain(
"The 'pattern' parameter cannot be empty.",
);
});
it('should return error if path is provided but is not a string (schema validation)', () => {
const params = {
pattern: '*.ts',
path: 123,
} as unknown as GlobToolParams;
expect(globTool.validateToolParams(params)).toContain(
'Parameters failed schema validation',
);
});
it('should return error if case_sensitive is provided but is not a boolean (schema validation)', () => {
const params = {
pattern: '*.ts',
case_sensitive: 'true',
} as unknown as GlobToolParams;
expect(globTool.validateToolParams(params)).toContain(
'Parameters failed schema validation',
);
});
it("should return error if search path resolves outside the tool's root directory", () => {
// Create a globTool instance specifically for this test, with a deeper root
const deeperRootDir = path.join(tempRootDir, 'sub');
const specificGlobTool = new GlobTool(deeperRootDir);
// const params: GlobToolParams = { pattern: '*.txt', path: '..' }; // This line is unused and will be removed.
// This should be fine as tempRootDir is still within the original tempRootDir (the parent of deeperRootDir)
// Let's try to go further up.
const paramsOutside: GlobToolParams = {
pattern: '*.txt',
path: '../../../../../../../../../../tmp',
}; // Definitely outside
expect(specificGlobTool.validateToolParams(paramsOutside)).toContain(
"resolves outside the tool's root directory",
);
});
it('should return error if specified search path does not exist', async () => {
const params: GlobToolParams = {
pattern: '*.txt',
path: 'nonexistent_subdir',
};
expect(globTool.validateToolParams(params)).toContain(
'Search path does not exist',
);
});
it('should return error if specified search path is a file, not a directory', async () => {
const params: GlobToolParams = { pattern: '*.txt', path: 'fileA.txt' };
expect(globTool.validateToolParams(params)).toContain(
'Search path is not a directory',
);
});
});
});

View File

@ -24,6 +24,11 @@ export interface GlobToolParams {
* The directory to search in (optional, defaults to current directory)
*/
path?: string;
/**
* Whether the search should be case-sensitive (optional, defaults to false)
*/
case_sensitive?: boolean;
}
/**
@ -44,7 +49,7 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
properties: {
pattern: {
description:
"The glob pattern to match against (e.g., '*.py', 'src/**/*.js', 'docs/*.md').",
"The glob pattern to match against (e.g., '**/*.py', 'docs/*.md').",
type: 'string',
},
path: {
@ -52,6 +57,11 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
'Optional: The absolute path to the directory to search within. If omitted, searches the root directory.',
type: 'string',
},
case_sensitive: {
description:
'Optional: Whether the search should be case-sensitive. Defaults to false.',
type: 'boolean',
},
},
required: ['pattern'],
type: 'object',
@ -88,7 +98,7 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
params,
)
) {
return "Parameters failed schema validation. Ensure 'pattern' is a string and 'path' (if provided) is a string.";
return "Parameters failed schema validation. Ensure 'pattern' is a string, 'path' (if provided) is a string, and 'case_sensitive' (if provided) is a boolean.";
}
const searchDirAbsolute = path.resolve(
@ -100,12 +110,13 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
return `Search path ("${searchDirAbsolute}") resolves outside the tool's root directory ("${this.rootDirectory}").`;
}
const targetDir = searchDirAbsolute || this.rootDirectory;
try {
if (!fs.existsSync(searchDirAbsolute)) {
return `Search path does not exist: ${shortenPath(makeRelative(searchDirAbsolute, this.rootDirectory))} (absolute: ${searchDirAbsolute})`;
if (!fs.existsSync(targetDir)) {
return `Search path does not exist ${targetDir}`;
}
if (!fs.statSync(searchDirAbsolute).isDirectory()) {
return `Search path is not a directory: ${shortenPath(makeRelative(searchDirAbsolute, this.rootDirectory))} (absolute: ${searchDirAbsolute})`;
if (!fs.statSync(targetDir).isDirectory()) {
return `Search path is not a directory: ${targetDir}`;
}
} catch (e: unknown) {
return `Error accessing search path: ${e}`;
@ -162,15 +173,15 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
onlyFiles: true,
stats: true,
dot: true,
caseSensitiveMatch: params.case_sensitive ?? false,
ignore: ['**/node_modules/**', '**/.git/**'],
followSymbolicLinks: false,
suppressErrors: true,
});
if (!entries || entries.length === 0) {
const displayPath = makeRelative(searchDirAbsolute, this.rootDirectory);
return {
llmContent: `No files found matching pattern "${params.pattern}" within ${displayPath || '.'}.`,
llmContent: `No files found matching pattern "${params.pattern}" within ${searchDirAbsolute}.`,
returnDisplay: `No files found`,
};
}
@ -182,22 +193,11 @@ export class GlobTool extends BaseTool<GlobToolParams, ToolResult> {
});
const sortedAbsolutePaths = entries.map((entry) => entry.path);
const sortedRelativePaths = sortedAbsolutePaths.map((absPath) =>
makeRelative(absPath, this.rootDirectory),
);
const fileListDescription = sortedRelativePaths.join('\n');
const fileCount = sortedRelativePaths.length;
const relativeSearchDir = makeRelative(
searchDirAbsolute,
this.rootDirectory,
);
const displayPath = shortenPath(
relativeSearchDir === '.' ? 'root directory' : relativeSearchDir,
);
const fileListDescription = sortedAbsolutePaths.join('\n');
const fileCount = sortedAbsolutePaths.length;
return {
llmContent: `Found ${fileCount} file(s) matching "${params.pattern}" within ${displayPath}, sorted by modification time (newest first):\n${fileListDescription}`,
llmContent: `Found ${fileCount} file(s) matching "${params.pattern}" within ${searchDirAbsolute}, sorted by modification time (newest first):\n${fileListDescription}`,
returnDisplay: `Found ${fileCount} matching file(s)`,
};
} catch (error) {