feat(core): Create BFS file search utility (#903)
This commit is contained in:
parent
e2d689ff2f
commit
1d7090b8ac
|
@ -0,0 +1,145 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Dirent, PathLike } from 'fs';
|
||||||
|
import { vi, describe, it, expect, beforeEach } from 'vitest';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as gitUtils from './gitUtils.js';
|
||||||
|
import { bfsFileSearch } from './bfsFileSearch.js';
|
||||||
|
|
||||||
|
vi.mock('fs/promises');
|
||||||
|
vi.mock('./gitUtils.js');
|
||||||
|
|
||||||
|
const createMockDirent = (name: string, isFile: boolean): Dirent => {
|
||||||
|
const dirent = new Dirent();
|
||||||
|
dirent.name = name;
|
||||||
|
dirent.isFile = () => isFile;
|
||||||
|
dirent.isDirectory = () => !isFile;
|
||||||
|
return dirent;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Type for the specific overload we're using
|
||||||
|
type ReaddirWithFileTypes = (
|
||||||
|
path: PathLike,
|
||||||
|
options: { withFileTypes: true },
|
||||||
|
) => Promise<Dirent[]>;
|
||||||
|
|
||||||
|
describe('bfsFileSearch', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.resetAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should find a file in the root directory', async () => {
|
||||||
|
const mockFs = vi.mocked(fs);
|
||||||
|
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||||
|
vi.mocked(mockReaddir).mockResolvedValue([
|
||||||
|
createMockDirent('file1.txt', true),
|
||||||
|
createMockDirent('file2.txt', true),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
|
||||||
|
expect(result).toEqual(['/test/file1.txt']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should find a file in a subdirectory', async () => {
|
||||||
|
const mockFs = vi.mocked(fs);
|
||||||
|
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||||
|
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||||
|
if (dir === '/test') {
|
||||||
|
return [createMockDirent('subdir', false)];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await bfsFileSearch('/test', { fileName: 'file1.txt' });
|
||||||
|
expect(result).toEqual(['/test/subdir/file1.txt']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should ignore specified directories', async () => {
|
||||||
|
const mockFs = vi.mocked(fs);
|
||||||
|
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||||
|
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||||
|
if (dir === '/test') {
|
||||||
|
return [
|
||||||
|
createMockDirent('subdir1', false),
|
||||||
|
createMockDirent('subdir2', false),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir1') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir2') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await bfsFileSearch('/test', {
|
||||||
|
fileName: 'file1.txt',
|
||||||
|
ignoreDirs: ['subdir2'],
|
||||||
|
});
|
||||||
|
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should respect maxDirs limit', async () => {
|
||||||
|
const mockFs = vi.mocked(fs);
|
||||||
|
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||||
|
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||||
|
if (dir === '/test') {
|
||||||
|
return [
|
||||||
|
createMockDirent('subdir1', false),
|
||||||
|
createMockDirent('subdir2', false),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir1') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir2') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await bfsFileSearch('/test', {
|
||||||
|
fileName: 'file1.txt',
|
||||||
|
maxDirs: 2,
|
||||||
|
});
|
||||||
|
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should respect .gitignore files', async () => {
|
||||||
|
const mockFs = vi.mocked(fs);
|
||||||
|
const mockGitUtils = vi.mocked(gitUtils);
|
||||||
|
mockGitUtils.isGitRepository.mockReturnValue(true);
|
||||||
|
const mockReaddir = mockFs.readdir as unknown as ReaddirWithFileTypes;
|
||||||
|
vi.mocked(mockReaddir).mockImplementation(async (dir) => {
|
||||||
|
if (dir === '/test') {
|
||||||
|
return [
|
||||||
|
createMockDirent('.gitignore', true),
|
||||||
|
createMockDirent('subdir1', false),
|
||||||
|
createMockDirent('subdir2', false),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir1') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
if (dir === '/test/subdir2') {
|
||||||
|
return [createMockDirent('file1.txt', true)];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
mockFs.readFile.mockResolvedValue('subdir2');
|
||||||
|
|
||||||
|
const result = await bfsFileSearch('/test', {
|
||||||
|
fileName: 'file1.txt',
|
||||||
|
respectGitIgnore: true,
|
||||||
|
});
|
||||||
|
expect(result).toEqual(['/test/subdir1/file1.txt']);
|
||||||
|
});
|
||||||
|
});
|
|
@ -0,0 +1,97 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { GitIgnoreParser, GitIgnoreFilter } from './gitIgnoreParser.js';
|
||||||
|
import { isGitRepository } from './gitUtils.js';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { Dirent } from 'fs';
|
||||||
|
|
||||||
|
// Simple console logger for now.
|
||||||
|
// TODO: Integrate with a more robust server-side logger.
|
||||||
|
const logger = {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
debug: (...args: any[]) => console.debug('[DEBUG] [BfsFileSearch]', ...args),
|
||||||
|
};
|
||||||
|
|
||||||
|
interface BfsFileSearchOptions {
|
||||||
|
fileName: string;
|
||||||
|
ignoreDirs?: string[];
|
||||||
|
maxDirs?: number;
|
||||||
|
debug?: boolean;
|
||||||
|
respectGitIgnore?: boolean;
|
||||||
|
projectRoot?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a breadth-first search for a specific file within a directory structure.
|
||||||
|
*
|
||||||
|
* @param rootDir The directory to start the search from.
|
||||||
|
* @param options Configuration for the search.
|
||||||
|
* @returns A promise that resolves to an array of paths where the file was found.
|
||||||
|
*/
|
||||||
|
export async function bfsFileSearch(
|
||||||
|
rootDir: string,
|
||||||
|
options: BfsFileSearchOptions,
|
||||||
|
): Promise<string[]> {
|
||||||
|
const {
|
||||||
|
fileName,
|
||||||
|
ignoreDirs = [],
|
||||||
|
maxDirs = Infinity,
|
||||||
|
debug = false,
|
||||||
|
respectGitIgnore = true,
|
||||||
|
projectRoot = rootDir,
|
||||||
|
} = options;
|
||||||
|
const foundFiles: string[] = [];
|
||||||
|
const queue: string[] = [rootDir];
|
||||||
|
const visited = new Set<string>();
|
||||||
|
let scannedDirCount = 0;
|
||||||
|
|
||||||
|
let gitIgnoreFilter: GitIgnoreFilter | null = null;
|
||||||
|
if (respectGitIgnore && isGitRepository(projectRoot)) {
|
||||||
|
const parser = new GitIgnoreParser(projectRoot);
|
||||||
|
await parser.initialize();
|
||||||
|
gitIgnoreFilter = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (queue.length > 0 && scannedDirCount < maxDirs) {
|
||||||
|
const currentDir = queue.shift()!;
|
||||||
|
if (visited.has(currentDir)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
visited.add(currentDir);
|
||||||
|
scannedDirCount++;
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let entries: Dirent[];
|
||||||
|
try {
|
||||||
|
entries = await fs.readdir(currentDir, { withFileTypes: true });
|
||||||
|
} catch {
|
||||||
|
// Ignore errors for directories we can't read (e.g., permissions)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
const fullPath = path.join(currentDir, entry.name);
|
||||||
|
if (gitIgnoreFilter?.isIgnored(fullPath)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
if (!ignoreDirs.includes(entry.name)) {
|
||||||
|
queue.push(fullPath);
|
||||||
|
}
|
||||||
|
} else if (entry.isFile() && entry.name === fileName) {
|
||||||
|
foundFiles.push(fullPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundFiles;
|
||||||
|
}
|
|
@ -512,13 +512,7 @@ describe('loadServerHierarchicalMemory', () => {
|
||||||
] as Dirent[];
|
] as Dirent[];
|
||||||
}
|
}
|
||||||
if (p === ignoredDir) {
|
if (p === ignoredDir) {
|
||||||
return [
|
return [] as Dirent[];
|
||||||
{
|
|
||||||
name: ORIGINAL_GEMINI_MD_FILENAME_CONST_FOR_TEST,
|
|
||||||
isFile: () => true,
|
|
||||||
isDirectory: () => false,
|
|
||||||
} as Dirent,
|
|
||||||
] as Dirent[];
|
|
||||||
}
|
}
|
||||||
return [] as Dirent[];
|
return [] as Dirent[];
|
||||||
}) as unknown as typeof fsPromises.readdir);
|
}) as unknown as typeof fsPromises.readdir);
|
||||||
|
@ -565,10 +559,8 @@ describe('loadServerHierarchicalMemory', () => {
|
||||||
await loadServerHierarchicalMemory(CWD, true);
|
await loadServerHierarchicalMemory(CWD, true);
|
||||||
|
|
||||||
expect(consoleDebugSpy).toHaveBeenCalledWith(
|
expect(consoleDebugSpy).toHaveBeenCalledWith(
|
||||||
expect.stringContaining('[DEBUG] [MemoryDiscovery]'),
|
expect.stringContaining('[DEBUG] [BfsFileSearch]'),
|
||||||
expect.stringContaining(
|
expect.stringContaining('Scanning [200/200]:'),
|
||||||
'Max directory scan limit (200) reached. Stopping downward scan at:',
|
|
||||||
),
|
|
||||||
);
|
);
|
||||||
consoleDebugSpy.mockRestore();
|
consoleDebugSpy.mockRestore();
|
||||||
});
|
});
|
||||||
|
|
|
@ -8,6 +8,7 @@ import * as fs from 'fs/promises';
|
||||||
import * as fsSync from 'fs';
|
import * as fsSync from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { homedir } from 'os';
|
import { homedir } from 'os';
|
||||||
|
import { bfsFileSearch } from './bfsFileSearch.js';
|
||||||
import {
|
import {
|
||||||
GEMINI_CONFIG_DIR,
|
GEMINI_CONFIG_DIR,
|
||||||
getCurrentGeminiMdFilename,
|
getCurrentGeminiMdFilename,
|
||||||
|
@ -26,19 +27,6 @@ const logger = {
|
||||||
console.error('[ERROR] [MemoryDiscovery]', ...args),
|
console.error('[ERROR] [MemoryDiscovery]', ...args),
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files.
|
|
||||||
const DEFAULT_IGNORE_DIRECTORIES = [
|
|
||||||
'node_modules',
|
|
||||||
'.git',
|
|
||||||
'dist',
|
|
||||||
'build',
|
|
||||||
'out',
|
|
||||||
'coverage',
|
|
||||||
'.vscode',
|
|
||||||
'.idea',
|
|
||||||
'.DS_Store',
|
|
||||||
];
|
|
||||||
|
|
||||||
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200;
|
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200;
|
||||||
|
|
||||||
interface GeminiFileContent {
|
interface GeminiFileContent {
|
||||||
|
@ -89,76 +77,6 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function collectDownwardGeminiFiles(
|
|
||||||
directory: string,
|
|
||||||
debugMode: boolean,
|
|
||||||
ignoreDirs: string[],
|
|
||||||
scannedDirCount: { count: number },
|
|
||||||
maxScanDirs: number,
|
|
||||||
): Promise<string[]> {
|
|
||||||
if (scannedDirCount.count >= maxScanDirs) {
|
|
||||||
if (debugMode)
|
|
||||||
logger.debug(
|
|
||||||
`Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`,
|
|
||||||
);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
scannedDirCount.count++;
|
|
||||||
|
|
||||||
if (debugMode)
|
|
||||||
logger.debug(
|
|
||||||
`Scanning downward for ${getCurrentGeminiMdFilename()} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`,
|
|
||||||
);
|
|
||||||
const collectedPaths: string[] = [];
|
|
||||||
try {
|
|
||||||
const entries = await fs.readdir(directory, { withFileTypes: true });
|
|
||||||
for (const entry of entries) {
|
|
||||||
const fullPath = path.join(directory, entry.name);
|
|
||||||
if (entry.isDirectory()) {
|
|
||||||
if (ignoreDirs.includes(entry.name)) {
|
|
||||||
if (debugMode)
|
|
||||||
logger.debug(`Skipping ignored directory: ${fullPath}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const subDirPaths = await collectDownwardGeminiFiles(
|
|
||||||
fullPath,
|
|
||||||
debugMode,
|
|
||||||
ignoreDirs,
|
|
||||||
scannedDirCount,
|
|
||||||
maxScanDirs,
|
|
||||||
);
|
|
||||||
collectedPaths.push(...subDirPaths);
|
|
||||||
} else if (
|
|
||||||
entry.isFile() &&
|
|
||||||
entry.name === getCurrentGeminiMdFilename()
|
|
||||||
) {
|
|
||||||
try {
|
|
||||||
await fs.access(fullPath, fsSync.constants.R_OK);
|
|
||||||
collectedPaths.push(fullPath);
|
|
||||||
if (debugMode)
|
|
||||||
logger.debug(
|
|
||||||
`Found readable downward ${getCurrentGeminiMdFilename()}: ${fullPath}`,
|
|
||||||
);
|
|
||||||
} catch {
|
|
||||||
if (debugMode)
|
|
||||||
logger.debug(
|
|
||||||
`Downward ${getCurrentGeminiMdFilename()} not readable, skipping: ${fullPath}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
// Only log warnings in non-test environments
|
|
||||||
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
|
|
||||||
if (!isTestEnv) {
|
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
|
||||||
logger.warn(`Error scanning directory ${directory}: ${message}`);
|
|
||||||
}
|
|
||||||
if (debugMode) logger.debug(`Failed to scan directory: ${directory}`);
|
|
||||||
}
|
|
||||||
return collectedPaths;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getGeminiMdFilePathsInternal(
|
async function getGeminiMdFilePathsInternal(
|
||||||
currentWorkingDirectory: string,
|
currentWorkingDirectory: string,
|
||||||
userHomePath: string,
|
userHomePath: string,
|
||||||
|
@ -256,20 +174,19 @@ async function getGeminiMdFilePathsInternal(
|
||||||
}
|
}
|
||||||
paths.push(...upwardPaths);
|
paths.push(...upwardPaths);
|
||||||
|
|
||||||
if (debugMode)
|
const downwardPaths = await bfsFileSearch(resolvedCwd, {
|
||||||
logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`);
|
fileName: getCurrentGeminiMdFilename(),
|
||||||
const scannedDirCount = { count: 0 };
|
maxDirs: MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
|
||||||
const downwardPaths = await collectDownwardGeminiFiles(
|
debug: debugMode,
|
||||||
resolvedCwd,
|
respectGitIgnore: true,
|
||||||
debugMode,
|
projectRoot: projectRoot || resolvedCwd,
|
||||||
DEFAULT_IGNORE_DIRECTORIES,
|
});
|
||||||
scannedDirCount,
|
|
||||||
MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
|
|
||||||
);
|
|
||||||
downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex
|
downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex
|
||||||
if (debugMode && downwardPaths.length > 0)
|
if (debugMode && downwardPaths.length > 0)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(downwardPaths)}`,
|
`Found downward ${getCurrentGeminiMdFilename()} files (sorted): ${JSON.stringify(
|
||||||
|
downwardPaths,
|
||||||
|
)}`,
|
||||||
);
|
);
|
||||||
// Add downward paths only if they haven't been included already (e.g. from upward scan)
|
// Add downward paths only if they haven't been included already (e.g. from upward scan)
|
||||||
for (const dPath of downwardPaths) {
|
for (const dPath of downwardPaths) {
|
||||||
|
|
Loading…
Reference in New Issue