feat(core): Create BFS file search utility (#903)

This commit is contained in:
Allen Hutchison 2025-06-11 09:21:23 -07:00 committed by GitHub
parent e2d689ff2f
commit 1d7090b8ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 256 additions and 105 deletions

View File

@ -0,0 +1,145 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { Dirent, PathLike } from 'fs';
import { vi, describe, it, expect, beforeEach } from 'vitest';
import * as fs from 'fs/promises';
import * as gitUtils from './gitUtils.js';
import { bfsFileSearch } from './bfsFileSearch.js';
vi.mock('fs/promises');
vi.mock('./gitUtils.js');
const createMockDirent = (name: string, isFile: boolean): Dirent => {
const dirent = new Dirent();
dirent.name = name;
dirent.isFile = () => isFile;
dirent.isDirectory = () => !isFile;
return dirent;
};
// Type for the specific overload we're using
type ReaddirWithFileTypes = (
path: PathLike,
options: { withFileTypes: true },
) => Promise<Dirent[]>;
describe('bfsFileSearch', () => {
beforeEach(() => {
vi.resetAllMocks();
});
it('should find a file in the root directory', async () => {
const mockFs = vi.mocked(fs);
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
vi.mocked(mockReaddir).mockResolvedValue([
createMockDirent('file1.txt', true),
createMockDirent('file2.txt', true),
]);
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
expect(result).toEqual(['/test/file1.txt']);
});
it('should find a file in a subdirectory', async () => {
const mockFs = vi.mocked(fs);
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
if (dir === '/test') {
return [createMockDirent('subdir', false)];
}
if (dir === '/test/subdir') {
return [createMockDirent('file1.txt', true)];
}
return [];
});
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
expect(result).toEqual(['/test/subdir/file1.txt']);
});
it('should ignore specified directories', async () => {
const mockFs = vi.mocked(fs);
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
if (dir === '/test') {
return [
createMockDirent('subdir1', false),
createMockDirent('subdir2', false),
];
}
if (dir === '/test/subdir1') {
return [createMockDirent('file1.txt', true)];
}
if (dir === '/test/subdir2') {
return [createMockDirent('file1.txt', true)];
}
return [];
});
const result = await bfsFileSearch('/test', {
fileName: 'file1.txt',
ignoreDirs: ['subdir2'],
});
expect(result).toEqual(['/test/subdir1/file1.txt']);
});
it('should respect maxDirs limit', async () => {
const mockFs = vi.mocked(fs);
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
if (dir === '/test') {
return [
createMockDirent('subdir1', false),
createMockDirent('subdir2', false),
];
}
if (dir === '/test/subdir1') {
return [createMockDirent('file1.txt', true)];
}
if (dir === '/test/subdir2') {
return [createMockDirent('file1.txt', true)];
}
return [];
});
const result = await bfsFileSearch('/test', {
fileName: 'file1.txt',
maxDirs: 2,
});
expect(result).toEqual(['/test/subdir1/file1.txt']);
});
it('should respect .gitignore files', async () => {
const mockFs = vi.mocked(fs);
const mockGitUtils = vi.mocked(gitUtils);
mockGitUtils.isGitRepository.mockReturnValue(true);
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
if (dir === '/test') {
return [
createMockDirent('.gitignore', true),
createMockDirent('subdir1', false),
createMockDirent('subdir2', false),
];
}
if (dir === '/test/subdir1') {
return [createMockDirent('file1.txt', true)];
}
if (dir === '/test/subdir2') {
return [createMockDirent('file1.txt', true)];
}
return [];
});
mockFs.readFile.mockResolvedValue('subdir2');
const result = await bfsFileSearch('/test', {
fileName: 'file1.txt',
respectGitIgnore: true,
});
expect(result).toEqual(['/test/subdir1/file1.txt']);
});
});

View File

@ -0,0 +1,97 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { GitIgnoreParser, GitIgnoreFilter } from './gitIgnoreParser.js';
import { isGitRepository } from './gitUtils.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import { Dirent } from 'fs';
// Simple console logger for now.
// TODO: Integrate with a more robust server-side logger.
const logger = {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
debug: (...args: any[]) => console.debug('[DEBUG] [BfsFileSearch]', ...args),
};
interface BfsFileSearchOptions {
fileName: string;
ignoreDirs?: string[];
maxDirs?: number;
debug?: boolean;
respectGitIgnore?: boolean;
projectRoot?: string;
}
/**
* Performs a breadth-first search for a specific file within a directory structure.
*
* @param rootDir The directory to start the search from.
* @param options Configuration for the search.
* @returns A promise that resolves to an array of paths where the file was found.
*/
export async function bfsFileSearch(
rootDir: string,
options: BfsFileSearchOptions,
): Promise<string[]> {
const {
fileName,
ignoreDirs = [],
maxDirs = Infinity,
debug = false,
respectGitIgnore = true,
projectRoot = rootDir,
} = options;
const foundFiles: string[] = [];
const queue: string[] = [rootDir];
const visited = new Set<string>();
let scannedDirCount = 0;
let gitIgnoreFilter: GitIgnoreFilter | null = null;
if (respectGitIgnore && isGitRepository(projectRoot)) {
const parser = new GitIgnoreParser(projectRoot);
await parser.initialize();
gitIgnoreFilter = parser;
}
while (queue.length > 0 && scannedDirCount < maxDirs) {
const currentDir = queue.shift()!;
if (visited.has(currentDir)) {
continue;
}
visited.add(currentDir);
scannedDirCount++;
if (debug) {
logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`);
}
let entries: Dirent[];
try {
entries = await fs.readdir(currentDir, { withFileTypes: true });
} catch {
// Ignore errors for directories we can't read (e.g., permissions)
continue;
}
for (const entry of entries) {
const fullPath = path.join(currentDir, entry.name);
if (gitIgnoreFilter?.isIgnored(fullPath)) {
continue;
}
if (entry.isDirectory()) {
if (!ignoreDirs.includes(entry.name)) {
queue.push(fullPath);
}
} else if (entry.isFile() && entry.name === fileName) {
foundFiles.push(fullPath);
}
}
}
return foundFiles;
}

View File

@ -512,13 +512,7 @@ describe('loadServerHierarchicalMemory', () => {
] as Dirent[];
}
if (p === ignoredDir) {
return [
{
name: ORIGINAL_GEMINI_MD_FILENAME_CONST_FOR_TEST,
isFile: () => true,
isDirectory: () => false,
} as Dirent,
] as Dirent[];
return [] as Dirent[];
}
return [] as Dirent[];
}) as unknown as typeof fsPromises.readdir);
@ -565,10 +559,8 @@ describe('loadServerHierarchicalMemory', () => {
await loadServerHierarchicalMemory(CWD, true);
expect(consoleDebugSpy).toHaveBeenCalledWith(
expect.stringContaining('[DEBUG] [MemoryDiscovery]'),
expect.stringContaining(
'Max directory scan limit (200) reached. Stopping downward scan at:',
),
expect.stringContaining('[DEBUG] [BfsFileSearch]'),
expect.stringContaining('Scanning [200/200]:'),
);
consoleDebugSpy.mockRestore();
});

View File

@ -8,6 +8,7 @@ import * as fs from 'fs/promises';
import * as fsSync from 'fs';
import * as path from 'path';
import { homedir } from 'os';
import { bfsFileSearch } from './bfsFileSearch.js';
import {
GEMINI_CONFIG_DIR,
getCurrentGeminiMdFilename,
@ -26,19 +27,6 @@ const logger = {
console.error('[ERROR] [MemoryDiscovery]', ...args),
};
// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files.
const DEFAULT_IGNORE_DIRECTORIES = [
'node_modules',
'.git',
'dist',
'build',
'out',
'coverage',
'.vscode',
'.idea',
'.DS_Store',
];
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200;
interface GeminiFileContent {
@ -89,76 +77,6 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
}
}
async function collectDownwardGeminiFiles(
directory: string,
debugMode: boolean,
ignoreDirs: string[],
scannedDirCount: { count: number },
maxScanDirs: number,
): Promise<string[]> {
if (scannedDirCount.count >= maxScanDirs) {
if (debugMode)
logger.debug(
`Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`,
);
return [];
}
scannedDirCount.count++;
if (debugMode)
logger.debug(
`Scanning downward for ${getCurrentGeminiMdFilename()} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`,
);
const collectedPaths: string[] = [];
try {
const entries = await fs.readdir(directory, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(directory, entry.name);
if (entry.isDirectory()) {
if (ignoreDirs.includes(entry.name)) {
if (debugMode)
logger.debug(`Skipping ignored directory: ${fullPath}`);
continue;
}
const subDirPaths = await collectDownwardGeminiFiles(
fullPath,
debugMode,
ignoreDirs,
scannedDirCount,
maxScanDirs,
);
collectedPaths.push(...subDirPaths);
} else if (
entry.isFile() &&
entry.name === getCurrentGeminiMdFilename()
) {
try {
await fs.access(fullPath, fsSync.constants.R_OK);
collectedPaths.push(fullPath);
if (debugMode)
logger.debug(
`Found readable downward ${getCurrentGeminiMdFilename()}: ${fullPath}`,
);
} catch {
if (debugMode)
logger.debug(
`Downward ${getCurrentGeminiMdFilename()} not readable, skipping: ${fullPath}`,
);
}
}
}
} catch (error) {
// Only log warnings in non-test environments
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
if (!isTestEnv) {
const message = error instanceof Error ? error.message : String(error);
logger.warn(`Error scanning directory ${directory}: ${message}`);
}
if (debugMode) logger.debug(`Failed to scan directory: ${directory}`);
}
return collectedPaths;
}
async function getGeminiMdFilePathsInternal(
currentWorkingDirectory: string,
userHomePath: string,
@ -256,20 +174,19 @@ async function getGeminiMdFilePathsInternal(
}
paths.push(...upwardPaths);
if (debugMode)
logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`);
const scannedDirCount = { count: 0 };
const downwardPaths = await collectDownwardGeminiFiles(
resolvedCwd,
debugMode,
DEFAULT_IGNORE_DIRECTORIES,
scannedDirCount,
MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
);
const downwardPaths = await bfsFileSearch(resolvedCwd, {
fileName: getCurrentGeminiMdFilename(),
maxDirs: MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
debug: debugMode,
respectGitIgnore: true,
projectRoot: projectRoot || resolvedCwd,
});
downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex
if (debugMode && downwardPaths.length > 0)
logger.debug(
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(downwardPaths)}`,
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(
downwardPaths,
)}`,
);
// Add downward paths only if they haven't been included already (e.g. from upward scan)
for (const dPath of downwardPaths) {