/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { BaseTool, ToolResult } from './tools.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; import { getErrorMessage } from '../utils/errors.js'; import * as fs from 'fs/promises'; import * as path from 'path'; import fg from 'fast-glob'; /** * Parameters for the ReadManyFilesTool. */ export interface ReadManyFilesParams { /** * An array of file paths or directory paths to search within. * Paths are relative to the tool's configured target directory. * Glob patterns can be used directly in these paths. */ paths: string[]; /** * Optional. Glob patterns for files to include. * These are effectively combined with the `paths`. * Example: ["*.ts", "src/** /*.md"] */ include?: string[]; /** * Optional. Glob patterns for files/directories to exclude. * Applied as ignore patterns. * Example: ["*.log", "dist/**"] */ exclude?: string[]; /** * Optional. Search directories recursively. * This is generally controlled by glob patterns (e.g., `**`). * The glob implementation is recursive by default for `**`. * For simplicity, we'll rely on `**` for recursion. */ recursive?: boolean; /** * Optional. Apply default exclusion patterns. Defaults to true. */ useDefaultExcludes?: boolean; } /** * Default exclusion patterns for commonly ignored directories and binary file types. * These are compatible with glob ignore patterns. * TODO(adh): Consider making this configurable or extendable through a command line arguement. * TODO(adh): Look into sharing this list with the glob tool. */ const DEFAULT_EXCLUDES: string[] = [ '**/node_modules/**', '**/.git/**', '**/.vscode/**', '**/.idea/**', '**/dist/**', '**/build/**', '**/coverage/**', '**/__pycache__/**', '**/*.pyc', '**/*.pyo', '**/*.bin', '**/*.exe', '**/*.dll', '**/*.so', '**/*.dylib', '**/*.class', '**/*.jar', '**/*.war', '**/*.zip', '**/*.tar', '**/*.gz', '**/*.bz2', '**/*.rar', '**/*.7z', '**/*.png', '**/*.jpg', '**/*.jpeg', '**/*.gif', '**/*.bmp', '**/*.tiff', '**/*.ico', '**/*.pdf', '**/*.doc', '**/*.docx', '**/*.xls', '**/*.xlsx', '**/*.ppt', '**/*.pptx', '**/*.odt', '**/*.ods', '**/*.odp', '**/*.DS_Store', '**/.env', ]; // Default values for encoding and separator format const DEFAULT_ENCODING: BufferEncoding = 'utf-8'; const DEFAULT_OUTPUT_SEPARATOR_FORMAT: string = '--- {filePath} ---'; /** * Tool implementation for finding and reading multiple text files from the local filesystem * within a specified target directory. The content is concatenated. * It is intended to run in an environment with access to the local file system (e.g., a Node.js backend). */ export class ReadManyFilesTool extends BaseTool< ReadManyFilesParams, ToolResult > { static readonly Name: string = 'readManyFiles'; readonly targetDir: string; /** * Creates an instance of ReadManyFilesTool. * @param targetDir The absolute root directory within which this tool is allowed to operate. * All paths provided in `params` will be resolved relative to this directory. */ constructor(targetDir: string) { const parameterSchema: Record = { type: 'object', properties: { paths: { type: 'array', items: { type: 'string' }, description: "Required. An array of glob patterns or paths relative to the tool's target directory. Examples: ['src/**/*.ts'], ['README.md', 'docs/']", }, include: { type: 'array', items: { type: 'string' }, description: 'Optional. Additional glob patterns to include. These are merged with `paths`. Example: ["*.test.ts"] to specifically add test files if they were broadly excluded.', default: [], }, exclude: { type: 'array', items: { type: 'string' }, description: 'Optional. Glob patterns for files/directories to exclude. Added to default excludes if useDefaultExcludes is true. Example: ["**/*.log", "temp/"]', default: [], }, recursive: { type: 'boolean', description: 'Optional. Whether to search recursively (primarily controlled by `**` in glob patterns). Defaults to true.', default: true, }, useDefaultExcludes: { type: 'boolean', description: 'Optional. Whether to apply a list of default exclusion patterns (e.g., node_modules, .git, binary files). Defaults to true.', default: true, }, }, required: ['paths'], }; super( ReadManyFilesTool.Name, 'Read Many Files', `Reads content from multiple text files specified by paths or glob patterns within a configured target directory and concatenates them into a single string. This tool is useful when you need to understand or analyze a collection of files, such as: - Getting an overview of a codebase or parts of it (e.g., all TypeScript files in the 'src' directory). - Finding where specific functionality is implemented if the user asks broad questions about code. - Reviewing documentation files (e.g., all Markdown files in the 'docs' directory). - Gathering context from multiple configuration files. - When the user asks to "read all files in X directory" or "show me the content of all Y files". Use this tool when the user's query implies needing the content of several files simultaneously for context, analysis, or summarization. It uses default UTF-8 encoding and a '--- {filePath} ---' separator between file contents. Ensure paths are relative to the target directory. Glob patterns like 'src/**/*.js' are supported. Avoid using for single files if a more specific single-file reading tool is available, unless the user specifically requests to process a list containing just one file via this tool. This tool should NOT be used for binary files; it attempts to skip them. Default excludes apply to common non-text files and large dependency directories unless 'useDefaultExcludes' is false.`, parameterSchema, ); this.targetDir = path.resolve(targetDir); } validateParams(params: ReadManyFilesParams): string | null { if ( this.schema.parameters && !SchemaValidator.validate( this.schema.parameters as Record, params, ) ) { if ( !params.paths || !Array.isArray(params.paths) || params.paths.length === 0 ) { return 'The "paths" parameter is required and must be a non-empty array of strings/glob patterns.'; } return 'Parameters failed schema validation. Ensure "paths" is a non-empty array and other parameters match their expected types.'; } for (const p of params.paths) { if (typeof p !== 'string' || p.trim() === '') { return 'Each item in "paths" must be a non-empty string/glob pattern.'; } } if ( params.include && (!Array.isArray(params.include) || !params.include.every((item) => typeof item === 'string')) ) { return 'If provided, "include" must be an array of strings/glob patterns.'; } if ( params.exclude && (!Array.isArray(params.exclude) || !params.exclude.every((item) => typeof item === 'string')) ) { return 'If provided, "exclude" must be an array of strings/glob patterns.'; } return null; } getDescription(params: ReadManyFilesParams): string { const allPatterns = [...params.paths, ...(params.include || [])]; const pathDesc = `using patterns: \`${allPatterns.join('`, `')}\` (within target directory: \`${this.targetDir}\`)`; let effectiveExcludes = params.useDefaultExcludes !== false ? [...DEFAULT_EXCLUDES] : []; if (params.exclude && params.exclude.length > 0) { effectiveExcludes = [...effectiveExcludes, ...params.exclude]; } const excludeDesc = `Excluding: ${effectiveExcludes.length > 0 ? `patterns like \`${effectiveExcludes.slice(0, 2).join('`, `')}${effectiveExcludes.length > 2 ? '...`' : '`'}` : 'none explicitly (beyond default non-text file avoidance).'}`; return `Will attempt to read and concatenate files ${pathDesc}. ${excludeDesc}. File encoding: ${DEFAULT_ENCODING}. Separator: "${DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace('{filePath}', 'path/to/file.ext')}".`; } async execute(params: ReadManyFilesParams): Promise { const validationError = this.validateParams(params); if (validationError) { return { llmContent: `Error: Invalid parameters for ${this.displayName}. Reason: ${validationError}`, returnDisplay: `## Parameter Error\n\n${validationError}`, }; } const { paths: inputPatterns, include = [], exclude = [], useDefaultExcludes = true, } = params; const toolBaseDir = this.targetDir; const filesToConsider = new Set(); const skippedFiles: { path: string; reason: string }[] = []; const processedFilesRelativePaths: string[] = []; let concatenatedContent = ''; const effectiveExcludes = useDefaultExcludes ? [...DEFAULT_EXCLUDES, ...exclude] : [...exclude]; const searchPatterns = [...inputPatterns, ...include]; if (searchPatterns.length === 0) { return { llmContent: 'No search paths or include patterns provided.', returnDisplay: `## Information\n\nNo search paths or include patterns were specified. Nothing to read or concatenate.`, }; } try { // Using fast-glob (fg) for file searching based on patterns. // The `cwd` option scopes the search to the toolBaseDir. // `ignore` handles exclusions. // `onlyFiles` ensures only files are returned. // `dot` allows matching dotfiles (which can still be excluded by patterns). // `absolute` returns absolute paths for consistent handling. const entries = await fg(searchPatterns, { cwd: toolBaseDir, ignore: effectiveExcludes, onlyFiles: true, dot: true, absolute: true, caseSensitiveMatch: false, }); for (const absoluteFilePath of entries) { // Security check: ensure the glob library didn't return something outside targetDir. // This should be guaranteed by `cwd` and the library's sandboxing, but an extra check is good practice. if (!absoluteFilePath.startsWith(toolBaseDir)) { skippedFiles.push({ path: absoluteFilePath, reason: `Security: Glob library returned path outside target directory. Base: ${toolBaseDir}, Path: ${absoluteFilePath}`, }); continue; } filesToConsider.add(absoluteFilePath); } } catch (error) { return { llmContent: `Error during file search: ${getErrorMessage(error)}`, returnDisplay: `## File Search Error\n\nAn error occurred while searching for files:\n\`\`\`\n${getErrorMessage(error)}\n\`\`\``, }; } const sortedFiles = Array.from(filesToConsider).sort(); for (const filePath of sortedFiles) { const relativePathForDisplay = path .relative(toolBaseDir, filePath) .replace(/\\/g, '/'); try { const contentBuffer = await fs.readFile(filePath); // Basic binary detection: check for null bytes in the first 1KB const sample = contentBuffer.subarray( 0, Math.min(contentBuffer.length, 1024), ); if (sample.includes(0)) { skippedFiles.push({ path: relativePathForDisplay, reason: 'Skipped (appears to be binary)', }); continue; } // Using default encoding const fileContent = contentBuffer.toString(DEFAULT_ENCODING); // Using default separator format const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace( '{filePath}', relativePathForDisplay, ); concatenatedContent += `${separator}\n\n${fileContent}\n\n`; processedFilesRelativePaths.push(relativePathForDisplay); } catch (error) { skippedFiles.push({ path: relativePathForDisplay, reason: `Read error: ${getErrorMessage(error)}`, }); } } let displayMessage = `### Read Many Files Result (Target Dir: \`${this.targetDir}\`)\n\n`; if (processedFilesRelativePaths.length > 0) { displayMessage += `Successfully read and concatenated content from **${processedFilesRelativePaths.length} file(s)**.\n`; displayMessage += `\n**Processed Files (up to 10 shown):**\n`; processedFilesRelativePaths .slice(0, 10) .forEach((p) => (displayMessage += `- \`${p}\`\n`)); if (processedFilesRelativePaths.length > 10) { displayMessage += `- ...and ${processedFilesRelativePaths.length - 10} more.\n`; } } else { displayMessage += `No files were read and concatenated based on the criteria.\n`; } if (skippedFiles.length > 0) { displayMessage += `\n**Skipped ${skippedFiles.length} item(s) (up to 5 shown):**\n`; skippedFiles .slice(0, 5) .forEach( (f) => (displayMessage += `- \`${f.path}\` (Reason: ${f.reason})\n`), ); if (skippedFiles.length > 5) { displayMessage += `- ...and ${skippedFiles.length - 5} more.\n`; } } if ( concatenatedContent.length === 0 && processedFilesRelativePaths.length === 0 ) { concatenatedContent = 'No files matching the criteria were found or all were skipped.'; } return { llmContent: concatenatedContent, returnDisplay: displayMessage, }; } }