292 lines
8.8 KiB
TypeScript
292 lines
8.8 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import path from 'node:path';
|
|
import fs from 'node:fs';
|
|
import { fdir } from 'fdir';
|
|
import picomatch from 'picomatch';
|
|
import { Ignore } from './ignore.js';
|
|
import { ResultCache } from './result-cache.js';
|
|
import * as cache from './crawlCache.js';
|
|
import { Fzf, FzfResultItem } from 'fzf';
|
|
|
|
export type FileSearchOptions = {
|
|
projectRoot: string;
|
|
ignoreDirs: string[];
|
|
useGitignore: boolean;
|
|
useGeminiignore: boolean;
|
|
cache: boolean;
|
|
cacheTtl: number;
|
|
maxDepth?: number;
|
|
};
|
|
|
|
export class AbortError extends Error {
|
|
constructor(message = 'Search aborted') {
|
|
super(message);
|
|
this.name = 'AbortError';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Filters a list of paths based on a given pattern.
|
|
* @param allPaths The list of all paths to filter.
|
|
* @param pattern The picomatch pattern to filter by.
|
|
* @param signal An AbortSignal to cancel the operation.
|
|
* @returns A promise that resolves to the filtered and sorted list of paths.
|
|
*/
|
|
export async function filter(
|
|
allPaths: string[],
|
|
pattern: string,
|
|
signal: AbortSignal | undefined,
|
|
): Promise<string[]> {
|
|
const patternFilter = picomatch(pattern, {
|
|
dot: true,
|
|
contains: true,
|
|
nocase: true,
|
|
});
|
|
|
|
const results: string[] = [];
|
|
for (const [i, p] of allPaths.entries()) {
|
|
// Yield control to the event loop periodically to prevent blocking.
|
|
if (i % 1000 === 0) {
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
if (signal?.aborted) {
|
|
throw new AbortError();
|
|
}
|
|
}
|
|
|
|
if (patternFilter(p)) {
|
|
results.push(p);
|
|
}
|
|
}
|
|
|
|
results.sort((a, b) => {
|
|
const aIsDir = a.endsWith('/');
|
|
const bIsDir = b.endsWith('/');
|
|
|
|
if (aIsDir && !bIsDir) return -1;
|
|
if (!aIsDir && bIsDir) return 1;
|
|
|
|
// This is 40% faster than localeCompare and the only thing we would really
|
|
// gain from localeCompare is case-sensitive sort
|
|
return a < b ? -1 : a > b ? 1 : 0;
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Filters a list of paths based on a given pattern using fzf.
|
|
* @param allPaths The list of all paths to filter.
|
|
* @param pattern The fzf pattern to filter by.
|
|
* @returns The filtered and sorted list of paths.
|
|
*/
|
|
function filterByFzf(allPaths: string[], pattern: string) {
|
|
return new Fzf(allPaths)
|
|
.find(pattern)
|
|
.map((entry: FzfResultItem) => entry.item);
|
|
}
|
|
|
|
export type SearchOptions = {
|
|
signal?: AbortSignal;
|
|
maxResults?: number;
|
|
};
|
|
|
|
/**
|
|
* Provides a fast and efficient way to search for files within a project,
|
|
* respecting .gitignore and .geminiignore rules, and utilizing caching
|
|
* for improved performance.
|
|
*/
|
|
export class FileSearch {
|
|
private readonly absoluteDir: string;
|
|
private readonly ignore: Ignore = new Ignore();
|
|
private resultCache: ResultCache | undefined;
|
|
private allFiles: string[] = [];
|
|
|
|
/**
|
|
* Constructs a new `FileSearch` instance.
|
|
* @param options Configuration options for the file search.
|
|
*/
|
|
constructor(private readonly options: FileSearchOptions) {
|
|
this.absoluteDir = path.resolve(options.projectRoot);
|
|
}
|
|
|
|
/**
|
|
* Initializes the file search engine by loading ignore rules, crawling the
|
|
* file system, and building the in-memory cache. This method must be called
|
|
* before performing any searches.
|
|
*/
|
|
async initialize(): Promise<void> {
|
|
this.loadIgnoreRules();
|
|
await this.crawlFiles();
|
|
this.buildResultCache();
|
|
}
|
|
|
|
/**
|
|
* Searches for files matching a given pattern.
|
|
* @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
|
|
* @param options Search options, including an AbortSignal and maxResults.
|
|
* @returns A promise that resolves to a list of matching file paths, relative
|
|
* to the project root.
|
|
*/
|
|
async search(
|
|
pattern: string,
|
|
options: SearchOptions = {},
|
|
): Promise<string[]> {
|
|
if (!this.resultCache) {
|
|
throw new Error('Engine not initialized. Call initialize() first.');
|
|
}
|
|
|
|
pattern = pattern || '*';
|
|
|
|
const { files: candidates, isExactMatch } =
|
|
await this.resultCache!.get(pattern);
|
|
|
|
let filteredCandidates;
|
|
if (isExactMatch) {
|
|
filteredCandidates = candidates;
|
|
} else {
|
|
// Apply the user's picomatch pattern filter
|
|
filteredCandidates = pattern.includes('*')
|
|
? await filter(candidates, pattern, options.signal)
|
|
: filterByFzf(this.allFiles, pattern);
|
|
this.resultCache!.set(pattern, filteredCandidates);
|
|
}
|
|
|
|
// Trade-off: We apply a two-stage filtering process.
|
|
// 1. During the file system crawl (`performCrawl`), we only apply directory-level
|
|
// ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
|
|
// a full ignore filter (which includes file-specific patterns like `*.log`)
|
|
// during the crawl can significantly slow down `fdir`.
|
|
// 2. Here, in the `search` method, we apply the full ignore filter
|
|
// (including file patterns) to the `filteredCandidates` (which have already
|
|
// been filtered by the user's search pattern and sorted). For autocomplete,
|
|
// the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
|
|
// so applying the full filter to this truncated list is much more efficient
|
|
// than applying it to every file during the initial crawl.
|
|
const fileFilter = this.ignore.getFileFilter();
|
|
const results: string[] = [];
|
|
for (const [i, candidate] of filteredCandidates.entries()) {
|
|
// Yield to the event loop to avoid blocking on large result sets.
|
|
if (i % 1000 === 0) {
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
if (options.signal?.aborted) {
|
|
throw new AbortError();
|
|
}
|
|
}
|
|
|
|
if (results.length >= (options.maxResults ?? Infinity)) {
|
|
break;
|
|
}
|
|
// The `ignore` library throws an error if the path is '.', so we skip it.
|
|
if (candidate === '.') {
|
|
continue;
|
|
}
|
|
if (!fileFilter(candidate)) {
|
|
results.push(candidate);
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Loads ignore rules from .gitignore and .geminiignore files, and applies
|
|
* any additional ignore directories specified in the options.
|
|
*/
|
|
private loadIgnoreRules(): void {
|
|
if (this.options.useGitignore) {
|
|
const gitignorePath = path.join(this.absoluteDir, '.gitignore');
|
|
if (fs.existsSync(gitignorePath)) {
|
|
this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
|
|
}
|
|
}
|
|
|
|
if (this.options.useGeminiignore) {
|
|
const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
|
|
if (fs.existsSync(geminiignorePath)) {
|
|
this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
|
|
}
|
|
}
|
|
|
|
const ignoreDirs = ['.git', ...this.options.ignoreDirs];
|
|
this.ignore.add(
|
|
ignoreDirs.map((dir) => {
|
|
if (dir.endsWith('/')) {
|
|
return dir;
|
|
}
|
|
return `${dir}/`;
|
|
}),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Crawls the file system to get a list of all files and directories,
|
|
* optionally using a cache for faster initialization.
|
|
*/
|
|
private async crawlFiles(): Promise<void> {
|
|
if (this.options.cache) {
|
|
const cacheKey = cache.getCacheKey(
|
|
this.absoluteDir,
|
|
this.ignore.getFingerprint(),
|
|
this.options.maxDepth,
|
|
);
|
|
const cachedResults = cache.read(cacheKey);
|
|
|
|
if (cachedResults) {
|
|
this.allFiles = cachedResults;
|
|
return;
|
|
}
|
|
}
|
|
|
|
this.allFiles = await this.performCrawl();
|
|
|
|
if (this.options.cache) {
|
|
const cacheKey = cache.getCacheKey(
|
|
this.absoluteDir,
|
|
this.ignore.getFingerprint(),
|
|
this.options.maxDepth,
|
|
);
|
|
cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Performs the actual file system crawl using `fdir`, applying directory
|
|
* ignore rules.
|
|
* @returns A promise that resolves to a list of all files and directories.
|
|
*/
|
|
private async performCrawl(): Promise<string[]> {
|
|
const dirFilter = this.ignore.getDirectoryFilter();
|
|
|
|
// We use `fdir` for fast file system traversal. A key performance
|
|
// optimization for large workspaces is to exclude entire directories
|
|
// early in the traversal process. This is why we apply directory-specific
|
|
// ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
|
|
// exclude filter.
|
|
const api = new fdir()
|
|
.withRelativePaths()
|
|
.withDirs()
|
|
.withPathSeparator('/') // Always use unix style paths
|
|
.exclude((_, dirPath) => {
|
|
const relativePath = path.relative(this.absoluteDir, dirPath);
|
|
return dirFilter(`${relativePath}/`);
|
|
});
|
|
|
|
if (this.options.maxDepth !== undefined) {
|
|
api.withMaxDepth(this.options.maxDepth);
|
|
}
|
|
|
|
return api.crawl(this.absoluteDir).withPromise();
|
|
}
|
|
|
|
/**
|
|
* Builds the in-memory cache for fast pattern matching.
|
|
*/
|
|
private buildResultCache(): void {
|
|
this.resultCache = new ResultCache(this.allFiles, this.absoluteDir);
|
|
}
|
|
}
|