From 1d7090b8ac9396a652eedf4fe7744111f81cfe7e Mon Sep 17 00:00:00 2001 From: Allen Hutchison Date: Wed, 11 Jun 2025 09:21:23 -0700 Subject: [PATCH] feat(core): Create BFS file search utility (#903) --- packages/core/src/utils/bfsFileSearch.test.ts | 145 ++++++++++++++++++ packages/core/src/utils/bfsFileSearch.ts | 97 ++++++++++++ .../core/src/utils/memoryDiscovery.test.ts | 14 +- packages/core/src/utils/memoryDiscovery.ts | 105 ++----------- 4 files changed, 256 insertions(+), 105 deletions(-) create mode 100644 packages/core/src/utils/bfsFileSearch.test.ts create mode 100644 packages/core/src/utils/bfsFileSearch.ts diff --git a/packages/core/src/utils/bfsFileSearch.test.ts b/packages/core/src/utils/bfsFileSearch.test.ts new file mode 100644 index 00000000..679700ca --- /dev/null +++ b/packages/core/src/utils/bfsFileSearch.test.ts @@ -0,0 +1,145 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Dirent, PathLike } from 'fs'; +import { vi, describe, it, expect, beforeEach } from 'vitest'; +import * as fs from 'fs/promises'; +import * as gitUtils from './gitUtils.js'; +import { bfsFileSearch } from './bfsFileSearch.js'; + +vi.mock('fs/promises'); +vi.mock('./gitUtils.js'); + +const createMockDirent = (name: string, isFile: boolean): Dirent => { + const dirent = new Dirent(); + dirent.name = name; + dirent.isFile = () => isFile; + dirent.isDirectory = () => !isFile; + return dirent; +}; + +// Type for the specific overload we're using +type ReaddirWithFileTypes = ( + path: PathLike, + options: { withFileTypes: true }, +) => Promise; + +describe('bfsFileSearch', () => { + beforeEach(() => { + vi.resetAllMocks(); + }); + + it('should find a file in the root directory', async () => { + const mockFs = vi.mocked(fs); + const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes; + vi.mocked(mockReaddir).mockResolvedValue([ + createMockDirent('file1.txt', true), + createMockDirent('file2.txt', true), + ]); + + const result = await bfsFileSearch('/test', { fileName: 'file1.txt' }); + expect(result).toEqual(['/test/file1.txt']); + }); + + it('should find a file in a subdirectory', async () => { + const mockFs = vi.mocked(fs); + const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes; + vi.mocked(mockReaddir).mockImplementation(async (dir) => { + if (dir === '/test') { + return [createMockDirent('subdir', false)]; + } + if (dir === '/test/subdir') { + return [createMockDirent('file1.txt', true)]; + } + return []; + }); + + const result = await bfsFileSearch('/test', { fileName: 'file1.txt' }); + expect(result).toEqual(['/test/subdir/file1.txt']); + }); + + it('should ignore specified directories', async () => { + const mockFs = vi.mocked(fs); + const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes; + vi.mocked(mockReaddir).mockImplementation(async (dir) => { + if (dir === '/test') { + return [ + createMockDirent('subdir1', false), + createMockDirent('subdir2', false), + ]; + } + if (dir === '/test/subdir1') { + return [createMockDirent('file1.txt', true)]; + } + if (dir === '/test/subdir2') { + return [createMockDirent('file1.txt', true)]; + } + return []; + }); + + const result = await bfsFileSearch('/test', { + fileName: 'file1.txt', + ignoreDirs: ['subdir2'], + }); + expect(result).toEqual(['/test/subdir1/file1.txt']); + }); + + it('should respect maxDirs limit', async () => { + const mockFs = vi.mocked(fs); + const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes; + vi.mocked(mockReaddir).mockImplementation(async (dir) => { + if (dir === '/test') { + return [ + createMockDirent('subdir1', false), + createMockDirent('subdir2', false), + ]; + } + if (dir === '/test/subdir1') { + return [createMockDirent('file1.txt', true)]; + } + if (dir === '/test/subdir2') { + return [createMockDirent('file1.txt', true)]; + } + return []; + }); + + const result = await bfsFileSearch('/test', { + fileName: 'file1.txt', + maxDirs: 2, + }); + expect(result).toEqual(['/test/subdir1/file1.txt']); + }); + + it('should respect .gitignore files', async () => { + const mockFs = vi.mocked(fs); + const mockGitUtils = vi.mocked(gitUtils); + mockGitUtils.isGitRepository.mockReturnValue(true); + const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes; + vi.mocked(mockReaddir).mockImplementation(async (dir) => { + if (dir === '/test') { + return [ + createMockDirent('.gitignore', true), + createMockDirent('subdir1', false), + createMockDirent('subdir2', false), + ]; + } + if (dir === '/test/subdir1') { + return [createMockDirent('file1.txt', true)]; + } + if (dir === '/test/subdir2') { + return [createMockDirent('file1.txt', true)]; + } + return []; + }); + mockFs.readFile.mockResolvedValue('subdir2'); + + const result = await bfsFileSearch('/test', { + fileName: 'file1.txt', + respectGitIgnore: true, + }); + expect(result).toEqual(['/test/subdir1/file1.txt']); + }); +}); diff --git a/packages/core/src/utils/bfsFileSearch.ts b/packages/core/src/utils/bfsFileSearch.ts new file mode 100644 index 00000000..6b05526f --- /dev/null +++ b/packages/core/src/utils/bfsFileSearch.ts @@ -0,0 +1,97 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { GitIgnoreParser, GitIgnoreFilter } from './gitIgnoreParser.js'; +import { isGitRepository } from './gitUtils.js'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import { Dirent } from 'fs'; + +// Simple console logger for now. +// TODO: Integrate with a more robust server-side logger. +const logger = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + debug: (...args: any[]) => console.debug('[DEBUG] [BfsFileSearch]', ...args), +}; + +interface BfsFileSearchOptions { + fileName: string; + ignoreDirs?: string[]; + maxDirs?: number; + debug?: boolean; + respectGitIgnore?: boolean; + projectRoot?: string; +} + +/** + * Performs a breadth-first search for a specific file within a directory structure. + * + * @param rootDir The directory to start the search from. + * @param options Configuration for the search. + * @returns A promise that resolves to an array of paths where the file was found. + */ +export async function bfsFileSearch( + rootDir: string, + options: BfsFileSearchOptions, +): Promise { + const { + fileName, + ignoreDirs = [], + maxDirs = Infinity, + debug = false, + respectGitIgnore = true, + projectRoot = rootDir, + } = options; + const foundFiles: string[] = []; + const queue: string[] = [rootDir]; + const visited = new Set(); + let scannedDirCount = 0; + + let gitIgnoreFilter: GitIgnoreFilter | null = null; + if (respectGitIgnore && isGitRepository(projectRoot)) { + const parser = new GitIgnoreParser(projectRoot); + await parser.initialize(); + gitIgnoreFilter = parser; + } + + while (queue.length > 0 && scannedDirCount < maxDirs) { + const currentDir = queue.shift()!; + if (visited.has(currentDir)) { + continue; + } + visited.add(currentDir); + scannedDirCount++; + + if (debug) { + logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`); + } + + let entries: Dirent[]; + try { + entries = await fs.readdir(currentDir, { withFileTypes: true }); + } catch { + // Ignore errors for directories we can't read (e.g., permissions) + continue; + } + + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name); + if (gitIgnoreFilter?.isIgnored(fullPath)) { + continue; + } + + if (entry.isDirectory()) { + if (!ignoreDirs.includes(entry.name)) { + queue.push(fullPath); + } + } else if (entry.isFile() && entry.name === fileName) { + foundFiles.push(fullPath); + } + } + } + + return foundFiles; +} diff --git a/packages/core/src/utils/memoryDiscovery.test.ts b/packages/core/src/utils/memoryDiscovery.test.ts index a9d34bf3..5329a15b 100644 --- a/packages/core/src/utils/memoryDiscovery.test.ts +++ b/packages/core/src/utils/memoryDiscovery.test.ts @@ -512,13 +512,7 @@ describe('loadServerHierarchicalMemory', () => { ] as Dirent[]; } if (p === ignoredDir) { - return [ - { - name: ORIGINAL_GEMINI_MD_FILENAME_CONST_FOR_TEST, - isFile: () => true, - isDirectory: () => false, - } as Dirent, - ] as Dirent[]; + return [] as Dirent[]; } return [] as Dirent[]; }) as unknown as typeof fsPromises.readdir); @@ -565,10 +559,8 @@ describe('loadServerHierarchicalMemory', () => { await loadServerHierarchicalMemory(CWD, true); expect(consoleDebugSpy).toHaveBeenCalledWith( - expect.stringContaining('[DEBUG] [MemoryDiscovery]'), - expect.stringContaining( - 'Max directory scan limit (200) reached. Stopping downward scan at:', - ), + expect.stringContaining('[DEBUG] [BfsFileSearch]'), + expect.stringContaining('Scanning [200/200]:'), ); consoleDebugSpy.mockRestore(); }); diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts index 221bf2c6..6e822145 100644 --- a/packages/core/src/utils/memoryDiscovery.ts +++ b/packages/core/src/utils/memoryDiscovery.ts @@ -8,6 +8,7 @@ import * as fs from 'fs/promises'; import * as fsSync from 'fs'; import * as path from 'path'; import { homedir } from 'os'; +import { bfsFileSearch } from './bfsFileSearch.js'; import { GEMINI_CONFIG_DIR, getCurrentGeminiMdFilename, @@ -26,19 +27,6 @@ const logger = { console.error('[ERROR] [MemoryDiscovery]', ...args), }; -// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files. -const DEFAULT_IGNORE_DIRECTORIES = [ - 'node_modules', - '.git', - 'dist', - 'build', - 'out', - 'coverage', - '.vscode', - '.idea', - '.DS_Store', -]; - const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200; interface GeminiFileContent { @@ -89,76 +77,6 @@ async function findProjectRoot(startDir: string): Promise { } } -async function collectDownwardGeminiFiles( - directory: string, - debugMode: boolean, - ignoreDirs: string[], - scannedDirCount: { count: number }, - maxScanDirs: number, -): Promise { - if (scannedDirCount.count >= maxScanDirs) { - if (debugMode) - logger.debug( - `Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`, - ); - return []; - } - scannedDirCount.count++; - - if (debugMode) - logger.debug( - `Scanning downward for ${getCurrentGeminiMdFilename()} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`, - ); - const collectedPaths: string[] = []; - try { - const entries = await fs.readdir(directory, { withFileTypes: true }); - for (const entry of entries) { - const fullPath = path.join(directory, entry.name); - if (entry.isDirectory()) { - if (ignoreDirs.includes(entry.name)) { - if (debugMode) - logger.debug(`Skipping ignored directory: ${fullPath}`); - continue; - } - const subDirPaths = await collectDownwardGeminiFiles( - fullPath, - debugMode, - ignoreDirs, - scannedDirCount, - maxScanDirs, - ); - collectedPaths.push(...subDirPaths); - } else if ( - entry.isFile() && - entry.name === getCurrentGeminiMdFilename() - ) { - try { - await fs.access(fullPath, fsSync.constants.R_OK); - collectedPaths.push(fullPath); - if (debugMode) - logger.debug( - `Found readable downward ${getCurrentGeminiMdFilename()}: ${fullPath}`, - ); - } catch { - if (debugMode) - logger.debug( - `Downward ${getCurrentGeminiMdFilename()} not readable, skipping: ${fullPath}`, - ); - } - } - } - } catch (error) { - // Only log warnings in non-test environments - const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST; - if (!isTestEnv) { - const message = error instanceof Error ? error.message : String(error); - logger.warn(`Error scanning directory ${directory}: ${message}`); - } - if (debugMode) logger.debug(`Failed to scan directory: ${directory}`); - } - return collectedPaths; -} - async function getGeminiMdFilePathsInternal( currentWorkingDirectory: string, userHomePath: string, @@ -256,20 +174,19 @@ async function getGeminiMdFilePathsInternal( } paths.push(...upwardPaths); - if (debugMode) - logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`); - const scannedDirCount = { count: 0 }; - const downwardPaths = await collectDownwardGeminiFiles( - resolvedCwd, - debugMode, - DEFAULT_IGNORE_DIRECTORIES, - scannedDirCount, - MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY, - ); + const downwardPaths = await bfsFileSearch(resolvedCwd, { + fileName: getCurrentGeminiMdFilename(), + maxDirs: MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY, + debug: debugMode, + respectGitIgnore: true, + projectRoot: projectRoot || resolvedCwd, + }); downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex if (debugMode && downwardPaths.length > 0) logger.debug( - `Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(downwardPaths)}`, + `Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify( + downwardPaths, + )}`, ); // Add downward paths only if they haven't been included already (e.g. from upward scan) for (const dPath of downwardPaths) {