feat(core): Create BFS file search utility (#903)
This commit is contained in:
parent
e2d689ff2f
commit
1d7090b8ac
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { Dirent, PathLike } from 'fs';
|
||||
import { vi, describe, it, expect, beforeEach } from 'vitest';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as gitUtils from './gitUtils.js';
|
||||
import { bfsFileSearch } from './bfsFileSearch.js';
|
||||
|
||||
vi.mock('fs/promises');
|
||||
vi.mock('./gitUtils.js');
|
||||
|
||||
const createMockDirent = (name: string, isFile: boolean): Dirent => {
|
||||
const dirent = new Dirent();
|
||||
dirent.name = name;
|
||||
dirent.isFile = () => isFile;
|
||||
dirent.isDirectory = () => !isFile;
|
||||
return dirent;
|
||||
};
|
||||
|
||||
// Type for the specific overload we're using
|
||||
type ReaddirWithFileTypes = (
|
||||
path: PathLike,
|
||||
options: { withFileTypes: true },
|
||||
) => Promise<Dirent[]>;
|
||||
|
||||
describe('bfsFileSearch', () => {
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it('should find a file in the root directory', async () => {
|
||||
const mockFs = vi.mocked(fs);
|
||||
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||
vi.mocked(mockReaddir).mockResolvedValue([
|
||||
createMockDirent('file1.txt', true),
|
||||
createMockDirent('file2.txt', true),
|
||||
]);
|
||||
|
||||
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
|
||||
expect(result).toEqual(['/test/file1.txt']);
|
||||
});
|
||||
|
||||
it('should find a file in a subdirectory', async () => {
|
||||
const mockFs = vi.mocked(fs);
|
||||
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||
if (dir === '/test') {
|
||||
return [createMockDirent('subdir', false)];
|
||||
}
|
||||
if (dir === '/test/subdir') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
|
||||
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
|
||||
expect(result).toEqual(['/test/subdir/file1.txt']);
|
||||
});
|
||||
|
||||
it('should ignore specified directories', async () => {
|
||||
const mockFs = vi.mocked(fs);
|
||||
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||
if (dir === '/test') {
|
||||
return [
|
||||
createMockDirent('subdir1', false),
|
||||
createMockDirent('subdir2', false),
|
||||
];
|
||||
}
|
||||
if (dir === '/test/subdir1') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
if (dir === '/test/subdir2') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
|
||||
const result = await bfsFileSearch('/test', {
|
||||
fileName: 'file1.txt',
|
||||
ignoreDirs: ['subdir2'],
|
||||
});
|
||||
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||
});
|
||||
|
||||
it('should respect maxDirs limit', async () => {
|
||||
const mockFs = vi.mocked(fs);
|
||||
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||
if (dir === '/test') {
|
||||
return [
|
||||
createMockDirent('subdir1', false),
|
||||
createMockDirent('subdir2', false),
|
||||
];
|
||||
}
|
||||
if (dir === '/test/subdir1') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
if (dir === '/test/subdir2') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
|
||||
const result = await bfsFileSearch('/test', {
|
||||
fileName: 'file1.txt',
|
||||
maxDirs: 2,
|
||||
});
|
||||
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||
});
|
||||
|
||||
it('should respect .gitignore files', async () => {
|
||||
const mockFs = vi.mocked(fs);
|
||||
const mockGitUtils = vi.mocked(gitUtils);
|
||||
mockGitUtils.isGitRepository.mockReturnValue(true);
|
||||
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||
if (dir === '/test') {
|
||||
return [
|
||||
createMockDirent('.gitignore', true),
|
||||
createMockDirent('subdir1', false),
|
||||
createMockDirent('subdir2', false),
|
||||
];
|
||||
}
|
||||
if (dir === '/test/subdir1') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
if (dir === '/test/subdir2') {
|
||||
return [createMockDirent('file1.txt', true)];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
mockFs.readFile.mockResolvedValue('subdir2');
|
||||
|
||||
const result = await bfsFileSearch('/test', {
|
||||
fileName: 'file1.txt',
|
||||
respectGitIgnore: true,
|
||||
});
|
||||
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { GitIgnoreParser, GitIgnoreFilter } from './gitIgnoreParser.js';
|
||||
import { isGitRepository } from './gitUtils.js';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import { Dirent } from 'fs';
|
||||
|
||||
// Simple console logger for now.
|
||||
// TODO: Integrate with a more robust server-side logger.
|
||||
const logger = {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
debug: (...args: any[]) => console.debug('[DEBUG] [BfsFileSearch]', ...args),
|
||||
};
|
||||
|
||||
interface BfsFileSearchOptions {
|
||||
fileName: string;
|
||||
ignoreDirs?: string[];
|
||||
maxDirs?: number;
|
||||
debug?: boolean;
|
||||
respectGitIgnore?: boolean;
|
||||
projectRoot?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a breadth-first search for a specific file within a directory structure.
|
||||
*
|
||||
* @param rootDir The directory to start the search from.
|
||||
* @param options Configuration for the search.
|
||||
* @returns A promise that resolves to an array of paths where the file was found.
|
||||
*/
|
||||
export async function bfsFileSearch(
|
||||
rootDir: string,
|
||||
options: BfsFileSearchOptions,
|
||||
): Promise<string[]> {
|
||||
const {
|
||||
fileName,
|
||||
ignoreDirs = [],
|
||||
maxDirs = Infinity,
|
||||
debug = false,
|
||||
respectGitIgnore = true,
|
||||
projectRoot = rootDir,
|
||||
} = options;
|
||||
const foundFiles: string[] = [];
|
||||
const queue: string[] = [rootDir];
|
||||
const visited = new Set<string>();
|
||||
let scannedDirCount = 0;
|
||||
|
||||
let gitIgnoreFilter: GitIgnoreFilter | null = null;
|
||||
if (respectGitIgnore && isGitRepository(projectRoot)) {
|
||||
const parser = new GitIgnoreParser(projectRoot);
|
||||
await parser.initialize();
|
||||
gitIgnoreFilter = parser;
|
||||
}
|
||||
|
||||
while (queue.length > 0 && scannedDirCount < maxDirs) {
|
||||
const currentDir = queue.shift()!;
|
||||
if (visited.has(currentDir)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(currentDir);
|
||||
scannedDirCount++;
|
||||
|
||||
if (debug) {
|
||||
logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`);
|
||||
}
|
||||
|
||||
let entries: Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(currentDir, { withFileTypes: true });
|
||||
} catch {
|
||||
// Ignore errors for directories we can't read (e.g., permissions)
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(currentDir, entry.name);
|
||||
if (gitIgnoreFilter?.isIgnored(fullPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (!ignoreDirs.includes(entry.name)) {
|
||||
queue.push(fullPath);
|
||||
}
|
||||
} else if (entry.isFile() && entry.name === fileName) {
|
||||
foundFiles.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return foundFiles;
|
||||
}
|
|
@ -512,13 +512,7 @@ describe('loadServerHierarchicalMemory', () => {
|
|||
] as Dirent[];
|
||||
}
|
||||
if (p === ignoredDir) {
|
||||
return [
|
||||
{
|
||||
name: ORIGINAL_GEMINI_MD_FILENAME_CONST_FOR_TEST,
|
||||
isFile: () => true,
|
||||
isDirectory: () => false,
|
||||
} as Dirent,
|
||||
] as Dirent[];
|
||||
return [] as Dirent[];
|
||||
}
|
||||
return [] as Dirent[];
|
||||
}) as unknown as typeof fsPromises.readdir);
|
||||
|
@ -565,10 +559,8 @@ describe('loadServerHierarchicalMemory', () => {
|
|||
await loadServerHierarchicalMemory(CWD, true);
|
||||
|
||||
expect(consoleDebugSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[DEBUG] [MemoryDiscovery]'),
|
||||
expect.stringContaining(
|
||||
'Max directory scan limit (200) reached. Stopping downward scan at:',
|
||||
),
|
||||
expect.stringContaining('[DEBUG] [BfsFileSearch]'),
|
||||
expect.stringContaining('Scanning [200/200]:'),
|
||||
);
|
||||
consoleDebugSpy.mockRestore();
|
||||
});
|
||||
|
|
|
@ -8,6 +8,7 @@ import * as fs from 'fs/promises';
|
|||
import * as fsSync from 'fs';
|
||||
import * as path from 'path';
|
||||
import { homedir } from 'os';
|
||||
import { bfsFileSearch } from './bfsFileSearch.js';
|
||||
import {
|
||||
GEMINI_CONFIG_DIR,
|
||||
getCurrentGeminiMdFilename,
|
||||
|
@ -26,19 +27,6 @@ const logger = {
|
|||
console.error('[ERROR] [MemoryDiscovery]', ...args),
|
||||
};
|
||||
|
||||
// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files.
|
||||
const DEFAULT_IGNORE_DIRECTORIES = [
|
||||
'node_modules',
|
||||
'.git',
|
||||
'dist',
|
||||
'build',
|
||||
'out',
|
||||
'coverage',
|
||||
'.vscode',
|
||||
'.idea',
|
||||
'.DS_Store',
|
||||
];
|
||||
|
||||
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200;
|
||||
|
||||
interface GeminiFileContent {
|
||||
|
@ -89,76 +77,6 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
|
|||
}
|
||||
}
|
||||
|
||||
async function collectDownwardGeminiFiles(
|
||||
directory: string,
|
||||
debugMode: boolean,
|
||||
ignoreDirs: string[],
|
||||
scannedDirCount: { count: number },
|
||||
maxScanDirs: number,
|
||||
): Promise<string[]> {
|
||||
if (scannedDirCount.count >= maxScanDirs) {
|
||||
if (debugMode)
|
||||
logger.debug(
|
||||
`Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
scannedDirCount.count++;
|
||||
|
||||
if (debugMode)
|
||||
logger.debug(
|
||||
`Scanning downward for ${getCurrentGeminiMdFilename()} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`,
|
||||
);
|
||||
const collectedPaths: string[] = [];
|
||||
try {
|
||||
const entries = await fs.readdir(directory, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(directory, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (ignoreDirs.includes(entry.name)) {
|
||||
if (debugMode)
|
||||
logger.debug(`Skipping ignored directory: ${fullPath}`);
|
||||
continue;
|
||||
}
|
||||
const subDirPaths = await collectDownwardGeminiFiles(
|
||||
fullPath,
|
||||
debugMode,
|
||||
ignoreDirs,
|
||||
scannedDirCount,
|
||||
maxScanDirs,
|
||||
);
|
||||
collectedPaths.push(...subDirPaths);
|
||||
} else if (
|
||||
entry.isFile() &&
|
||||
entry.name === getCurrentGeminiMdFilename()
|
||||
) {
|
||||
try {
|
||||
await fs.access(fullPath, fsSync.constants.R_OK);
|
||||
collectedPaths.push(fullPath);
|
||||
if (debugMode)
|
||||
logger.debug(
|
||||
`Found readable downward ${getCurrentGeminiMdFilename()}: ${fullPath}`,
|
||||
);
|
||||
} catch {
|
||||
if (debugMode)
|
||||
logger.debug(
|
||||
`Downward ${getCurrentGeminiMdFilename()} not readable, skipping: ${fullPath}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Only log warnings in non-test environments
|
||||
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
|
||||
if (!isTestEnv) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.warn(`Error scanning directory ${directory}: ${message}`);
|
||||
}
|
||||
if (debugMode) logger.debug(`Failed to scan directory: ${directory}`);
|
||||
}
|
||||
return collectedPaths;
|
||||
}
|
||||
|
||||
async function getGeminiMdFilePathsInternal(
|
||||
currentWorkingDirectory: string,
|
||||
userHomePath: string,
|
||||
|
@ -256,20 +174,19 @@ async function getGeminiMdFilePathsInternal(
|
|||
}
|
||||
paths.push(...upwardPaths);
|
||||
|
||||
if (debugMode)
|
||||
logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`);
|
||||
const scannedDirCount = { count: 0 };
|
||||
const downwardPaths = await collectDownwardGeminiFiles(
|
||||
resolvedCwd,
|
||||
debugMode,
|
||||
DEFAULT_IGNORE_DIRECTORIES,
|
||||
scannedDirCount,
|
||||
MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
|
||||
);
|
||||
const downwardPaths = await bfsFileSearch(resolvedCwd, {
|
||||
fileName: getCurrentGeminiMdFilename(),
|
||||
maxDirs: MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
|
||||
debug: debugMode,
|
||||
respectGitIgnore: true,
|
||||
projectRoot: projectRoot || resolvedCwd,
|
||||
});
|
||||
downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex
|
||||
if (debugMode && downwardPaths.length > 0)
|
||||
logger.debug(
|
||||
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(downwardPaths)}`,
|
||||
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(
|
||||
downwardPaths,
|
||||
)}`,
|
||||
);
|
||||
// Add downward paths only if they haven't been included already (e.g. from upward scan)
|
||||
for (const dPath of downwardPaths) {
|
||||
|
|
Loading…
Reference in New Issue