gemini-cli/packages/core/src/utils/filesearch/fileSearch.ts

292 lines
8.8 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'node:path';
import fs from 'node:fs';
import { fdir } from 'fdir';
import picomatch from 'picomatch';
import { Ignore } from './ignore.js';
import { ResultCache } from './result-cache.js';
import * as cache from './crawlCache.js';
import { Fzf, FzfResultItem } from 'fzf';
export type FileSearchOptions = {
projectRoot: string;
ignoreDirs: string[];
useGitignore: boolean;
useGeminiignore: boolean;
cache: boolean;
cacheTtl: number;
maxDepth?: number;
};
export class AbortError extends Error {
constructor(message = 'Search aborted') {
super(message);
this.name = 'AbortError';
}
}
/**
* Filters a list of paths based on a given pattern.
* @param allPaths The list of all paths to filter.
* @param pattern The picomatch pattern to filter by.
* @param signal An AbortSignal to cancel the operation.
* @returns A promise that resolves to the filtered and sorted list of paths.
*/
export async function filter(
allPaths: string[],
pattern: string,
signal: AbortSignal | undefined,
): Promise<string[]> {
const patternFilter = picomatch(pattern, {
dot: true,
contains: true,
nocase: true,
});
const results: string[] = [];
for (const [i, p] of allPaths.entries()) {
// Yield control to the event loop periodically to prevent blocking.
if (i % 1000 === 0) {
await new Promise((resolve) => setImmediate(resolve));
if (signal?.aborted) {
throw new AbortError();
}
}
if (patternFilter(p)) {
results.push(p);
}
}
results.sort((a, b) => {
const aIsDir = a.endsWith('/');
const bIsDir = b.endsWith('/');
if (aIsDir && !bIsDir) return -1;
if (!aIsDir && bIsDir) return 1;
// This is 40% faster than localeCompare and the only thing we would really
// gain from localeCompare is case-sensitive sort
return a < b ? -1 : a > b ? 1 : 0;
});
return results;
}
/**
* Filters a list of paths based on a given pattern using fzf.
* @param allPaths The list of all paths to filter.
* @param pattern The fzf pattern to filter by.
* @returns The filtered and sorted list of paths.
*/
function filterByFzf(allPaths: string[], pattern: string) {
return new Fzf(allPaths)
.find(pattern)
.map((entry: FzfResultItem) => entry.item);
}
export type SearchOptions = {
signal?: AbortSignal;
maxResults?: number;
};
/**
* Provides a fast and efficient way to search for files within a project,
* respecting .gitignore and .geminiignore rules, and utilizing caching
* for improved performance.
*/
export class FileSearch {
private readonly absoluteDir: string;
private readonly ignore: Ignore = new Ignore();
private resultCache: ResultCache | undefined;
private allFiles: string[] = [];
/**
* Constructs a new `FileSearch` instance.
* @param options Configuration options for the file search.
*/
constructor(private readonly options: FileSearchOptions) {
this.absoluteDir = path.resolve(options.projectRoot);
}
/**
* Initializes the file search engine by loading ignore rules, crawling the
* file system, and building the in-memory cache. This method must be called
* before performing any searches.
*/
async initialize(): Promise<void> {
this.loadIgnoreRules();
await this.crawlFiles();
this.buildResultCache();
}
/**
* Searches for files matching a given pattern.
* @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
* @param options Search options, including an AbortSignal and maxResults.
* @returns A promise that resolves to a list of matching file paths, relative
* to the project root.
*/
async search(
pattern: string,
options: SearchOptions = {},
): Promise<string[]> {
if (!this.resultCache) {
throw new Error('Engine not initialized. Call initialize() first.');
}
pattern = pattern || '*';
const { files: candidates, isExactMatch } =
await this.resultCache!.get(pattern);
let filteredCandidates;
if (isExactMatch) {
filteredCandidates = candidates;
} else {
// Apply the user's picomatch pattern filter
filteredCandidates = pattern.includes('*')
? await filter(candidates, pattern, options.signal)
: filterByFzf(this.allFiles, pattern);
this.resultCache!.set(pattern, filteredCandidates);
}
// Trade-off: We apply a two-stage filtering process.
// 1. During the file system crawl (`performCrawl`), we only apply directory-level
// ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
// a full ignore filter (which includes file-specific patterns like `*.log`)
// during the crawl can significantly slow down `fdir`.
// 2. Here, in the `search` method, we apply the full ignore filter
// (including file patterns) to the `filteredCandidates` (which have already
// been filtered by the user's search pattern and sorted). For autocomplete,
// the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
// so applying the full filter to this truncated list is much more efficient
// than applying it to every file during the initial crawl.
const fileFilter = this.ignore.getFileFilter();
const results: string[] = [];
for (const [i, candidate] of filteredCandidates.entries()) {
// Yield to the event loop to avoid blocking on large result sets.
if (i % 1000 === 0) {
await new Promise((resolve) => setImmediate(resolve));
if (options.signal?.aborted) {
throw new AbortError();
}
}
if (results.length >= (options.maxResults ?? Infinity)) {
break;
}
// The `ignore` library throws an error if the path is '.', so we skip it.
if (candidate === '.') {
continue;
}
if (!fileFilter(candidate)) {
results.push(candidate);
}
}
return results;
}
/**
* Loads ignore rules from .gitignore and .geminiignore files, and applies
* any additional ignore directories specified in the options.
*/
private loadIgnoreRules(): void {
if (this.options.useGitignore) {
const gitignorePath = path.join(this.absoluteDir, '.gitignore');
if (fs.existsSync(gitignorePath)) {
this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
}
}
if (this.options.useGeminiignore) {
const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
if (fs.existsSync(geminiignorePath)) {
this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
}
}
const ignoreDirs = ['.git', ...this.options.ignoreDirs];
this.ignore.add(
ignoreDirs.map((dir) => {
if (dir.endsWith('/')) {
return dir;
}
return `${dir}/`;
}),
);
}
/**
* Crawls the file system to get a list of all files and directories,
* optionally using a cache for faster initialization.
*/
private async crawlFiles(): Promise<void> {
if (this.options.cache) {
const cacheKey = cache.getCacheKey(
this.absoluteDir,
this.ignore.getFingerprint(),
this.options.maxDepth,
);
const cachedResults = cache.read(cacheKey);
if (cachedResults) {
this.allFiles = cachedResults;
return;
}
}
this.allFiles = await this.performCrawl();
if (this.options.cache) {
const cacheKey = cache.getCacheKey(
this.absoluteDir,
this.ignore.getFingerprint(),
this.options.maxDepth,
);
cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
}
}
/**
* Performs the actual file system crawl using `fdir`, applying directory
* ignore rules.
* @returns A promise that resolves to a list of all files and directories.
*/
private async performCrawl(): Promise<string[]> {
const dirFilter = this.ignore.getDirectoryFilter();
// We use `fdir` for fast file system traversal. A key performance
// optimization for large workspaces is to exclude entire directories
// early in the traversal process. This is why we apply directory-specific
// ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
// exclude filter.
const api = new fdir()
.withRelativePaths()
.withDirs()
.withPathSeparator('/') // Always use unix style paths
.exclude((_, dirPath) => {
const relativePath = path.relative(this.absoluteDir, dirPath);
return dirFilter(`${relativePath}/`);
});
if (this.options.maxDepth !== undefined) {
api.withMaxDepth(this.options.maxDepth);
}
return api.crawl(this.absoluteDir).withPromise();
}
/**
* Builds the in-memory cache for fast pattern matching.
*/
private buildResultCache(): void {
this.resultCache = new ResultCache(this.allFiles, this.absoluteDir);
}
}