From a008d8178015a182656ea8f5a39b9dde554da5ae Mon Sep 17 00:00:00 2001 From: Allen Hutchison Date: Fri, 23 May 2025 08:53:22 -0700 Subject: [PATCH] Refactor(server): Centralize GEMINI.md discovery logic in server (#498) --- packages/cli/src/config/config.ts | 308 +-------------- .../cli/src/ui/hooks/useShowMemoryCommand.ts | 45 +-- packages/server/src/index.ts | 1 + .../server/src/utils/memoryDiscovery.test.ts | 369 ++++++++++++++++++ packages/server/src/utils/memoryDiscovery.ts | 351 +++++++++++++++++ 5 files changed, 746 insertions(+), 328 deletions(-) create mode 100644 packages/server/src/utils/memoryDiscovery.test.ts create mode 100644 packages/server/src/utils/memoryDiscovery.ts diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 53780bb3..783a1a88 100644 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -4,10 +4,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -import * as fs from 'fs/promises'; -import * as fsSync from 'fs'; // For synchronous checks like existsSync -import * as path from 'path'; -import { homedir } from 'os'; import yargs from 'yargs/yargs'; import { hideBin } from 'yargs/helpers'; import process from 'node:process'; @@ -15,8 +11,7 @@ import { Config, loadEnvironment, createServerConfig, - GEMINI_CONFIG_DIR, - GEMINI_MD_FILENAME, + loadServerHierarchicalMemory, } from '@gemini-code/server'; import { Settings } from './settings.js'; import { readPackageUp } from 'read-package-up'; @@ -32,18 +27,6 @@ const logger = { }; const DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro-preview-05-06'; -// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files. -const DEFAULT_IGNORE_DIRECTORIES = [ - 'node_modules', - '.git', - 'dist', - 'build', - 'out', - 'coverage', - '.vscode', - '.idea', - '.DS_Store', -]; interface CliArgs { model: string | undefined; @@ -95,293 +78,24 @@ async function parseArguments(): Promise { return finalArgv; } -async function findProjectRoot(startDir: string): Promise { - let currentDir = path.resolve(startDir); - while (true) { - const gitPath = path.join(currentDir, '.git'); - try { - const stats = await fs.stat(gitPath); - if (stats.isDirectory()) { - return currentDir; - } - } catch (error: unknown) { - if (typeof error === 'object' && error !== null && 'code' in error) { - const fsError = error as { code: string; message: string }; - if (fsError.code !== 'ENOENT') { - logger.warn( - `Error checking for .git directory at ${gitPath}: ${fsError.message}`, - ); - } - } else { - logger.warn( - `Non-standard error checking for .git directory at ${gitPath}: ${String(error)}`, - ); - } - } - const parentDir = path.dirname(currentDir); - if (parentDir === currentDir) { - return null; - } - currentDir = parentDir; - } -} - -async function collectDownwardGeminiFiles( - directory: string, - debugMode: boolean, - ignoreDirs: string[], - scannedDirCount: { count: number }, - maxScanDirs: number, -): Promise { - if (scannedDirCount.count >= maxScanDirs) { - if (debugMode) - logger.debug( - `Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`, - ); - return []; - } - scannedDirCount.count++; - - if (debugMode) - logger.debug( - `Scanning downward for ${GEMINI_MD_FILENAME} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`, - ); - const collectedPaths: string[] = []; - try { - const entries = await fs.readdir(directory, { withFileTypes: true }); - for (const entry of entries) { - const fullPath = path.join(directory, entry.name); - if (entry.isDirectory()) { - if (ignoreDirs.includes(entry.name)) { - if (debugMode) - logger.debug(`Skipping ignored directory: ${fullPath}`); - continue; - } - const subDirPaths = await collectDownwardGeminiFiles( - fullPath, - debugMode, - ignoreDirs, - scannedDirCount, - maxScanDirs, - ); - collectedPaths.push(...subDirPaths); - } else if (entry.isFile() && entry.name === GEMINI_MD_FILENAME) { - try { - await fs.access(fullPath, fsSync.constants.R_OK); - collectedPaths.push(fullPath); - if (debugMode) - logger.debug( - `Found readable downward ${GEMINI_MD_FILENAME}: ${fullPath}`, - ); - } catch { - if (debugMode) - logger.debug( - `Downward ${GEMINI_MD_FILENAME} not readable, skipping: ${fullPath}`, - ); - } - } - } - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - logger.warn(`Error scanning directory ${directory}: ${message}`); - if (debugMode) logger.debug(`Failed to scan directory: ${directory}`); - } - return collectedPaths; -} - -export async function getGeminiMdFilePaths( - currentWorkingDirectory: string, - userHomePath: string, - debugMode: boolean, -): Promise { - const resolvedCwd = path.resolve(currentWorkingDirectory); - const resolvedHome = path.resolve(userHomePath); - const globalMemoryPath = path.join( - resolvedHome, - GEMINI_CONFIG_DIR, - GEMINI_MD_FILENAME, - ); - const paths: string[] = []; - - if (debugMode) - logger.debug( - `Searching for ${GEMINI_MD_FILENAME} starting from CWD: ${resolvedCwd}`, - ); - if (debugMode) logger.debug(`User home directory: ${resolvedHome}`); - - try { - await fs.access(globalMemoryPath, fsSync.constants.R_OK); - paths.push(globalMemoryPath); - if (debugMode) - logger.debug( - `Found readable global ${GEMINI_MD_FILENAME}: ${globalMemoryPath}`, - ); - } catch { - if (debugMode) - logger.debug( - `Global ${GEMINI_MD_FILENAME} not found or not readable: ${globalMemoryPath}`, - ); - } - - const projectRoot = await findProjectRoot(resolvedCwd); - if (debugMode) - logger.debug(`Determined project root: ${projectRoot ?? 'None'}`); - - const upwardPaths: string[] = []; - let currentDir = resolvedCwd; - const stopDir = projectRoot ? path.dirname(projectRoot) : resolvedHome; - - while ( - currentDir && - currentDir !== stopDir && - currentDir !== path.dirname(currentDir) - ) { - if (debugMode) - logger.debug( - `Checking for ${GEMINI_MD_FILENAME} in (upward scan): ${currentDir}`, - ); - if (currentDir === path.join(resolvedHome, GEMINI_CONFIG_DIR)) { - if (debugMode) - logger.debug(`Skipping check inside global config dir: ${currentDir}`); - break; - } - const potentialPath = path.join(currentDir, GEMINI_MD_FILENAME); - try { - await fs.access(potentialPath, fsSync.constants.R_OK); - upwardPaths.unshift(potentialPath); - if (debugMode) - logger.debug( - `Found readable upward ${GEMINI_MD_FILENAME}: ${potentialPath}`, - ); - } catch { - if (debugMode) - logger.debug( - `Upward ${GEMINI_MD_FILENAME} not found or not readable in: ${currentDir}`, - ); - } - const parentDir = path.dirname(currentDir); - if (parentDir === currentDir) { - if (debugMode) - logger.debug(`Reached filesystem root, stopping upward search.`); - break; - } - currentDir = parentDir; - } - paths.push(...upwardPaths); - - if (debugMode) - logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`); - const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200; // Define the cap - const scannedDirCount = { count: 0 }; - const downwardPaths = await collectDownwardGeminiFiles( - resolvedCwd, - debugMode, - DEFAULT_IGNORE_DIRECTORIES, - scannedDirCount, - MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY, - ); - downwardPaths.sort(); - if (debugMode && downwardPaths.length > 0) - logger.debug( - `Found downward ${GEMINI_MD_FILENAME} files (sorted): ${JSON.stringify(downwardPaths)}`, - ); - for (const dPath of downwardPaths) { - if (!paths.includes(dPath)) { - paths.push(dPath); - } - } - - if (debugMode) - logger.debug( - `Final ordered ${GEMINI_MD_FILENAME} paths to read: ${JSON.stringify(paths)}`, - ); - return paths; -} - -interface GeminiFileContent { - filePath: string; - content: string | null; -} - -async function readGeminiMdFiles( - filePaths: string[], - debugMode: boolean, -): Promise { - const results: GeminiFileContent[] = []; - for (const filePath of filePaths) { - try { - const content = await fs.readFile(filePath, 'utf-8'); - results.push({ filePath, content }); - if (debugMode) - logger.debug( - `Successfully read: ${filePath} (Length: ${content.length})`, - ); - } catch (error: unknown) { - const message = error instanceof Error ? error.message : String(error); - logger.warn( - `Warning: Could not read ${GEMINI_MD_FILENAME} file at ${filePath}. Error: ${message}`, - ); - results.push({ filePath, content: null }); - if (debugMode) logger.debug(`Failed to read: ${filePath}`); - } - } - return results; -} - -function concatenateInstructions( - instructionContents: GeminiFileContent[], -): string { - return instructionContents - .filter((item) => typeof item.content === 'string') - .map((item) => { - const trimmedContent = (item.content as string).trim(); - if (trimmedContent.length === 0) { - return null; // Filter out empty content after trimming - } - // Use a relative path for the marker if possible, otherwise full path. - // This assumes process.cwd() is the project root or a relevant base. - const displayPath = path.isAbsolute(item.filePath) - ? path.relative(process.cwd(), item.filePath) - : item.filePath; - return `--- Context from: ${displayPath} ---\n${trimmedContent}\n--- End of Context from: ${displayPath} ---`; - }) - .filter((block): block is string => block !== null) - .join('\n\n'); -} - +// This function is now a thin wrapper around the server's implementation. +// It's kept in the CLI for now as App.tsx directly calls it for memory refresh. +// TODO: Consider if App.tsx should get memory via a server call or if Config should refresh itself. export async function loadHierarchicalGeminiMemory( currentWorkingDirectory: string, debugMode: boolean, ): Promise<{ memoryContent: string; fileCount: number }> { - if (debugMode) + if (debugMode) { logger.debug( - `Loading hierarchical memory for CWD: ${currentWorkingDirectory}`, + `CLI: Delegating hierarchical memory load to server for CWD: ${currentWorkingDirectory}`, ); - const userHomePath = homedir(); - const filePaths = await getGeminiMdFilePaths( - currentWorkingDirectory, - userHomePath, - debugMode, - ); - if (filePaths.length === 0) { - if (debugMode) logger.debug('No GEMINI.md files found in hierarchy.'); - return { memoryContent: '', fileCount: 0 }; } - const contentsWithPaths = await readGeminiMdFiles(filePaths, debugMode); - const combinedInstructions = concatenateInstructions(contentsWithPaths); - if (debugMode) - logger.debug( - `Combined instructions length: ${combinedInstructions.length}`, - ); - if (debugMode && combinedInstructions.length > 0) - logger.debug( - `Combined instructions (snippet): ${combinedInstructions.substring(0, 500)}...`, - ); - return { memoryContent: combinedInstructions, fileCount: filePaths.length }; + // Directly call the server function. + // The server function will use its own homedir() for the global path. + return loadServerHierarchicalMemory(currentWorkingDirectory, debugMode); } export async function loadCliConfig(settings: Settings): Promise { - // Load .env file using logic from server package loadEnvironment(); const geminiApiKey = process.env.GEMINI_API_KEY; @@ -410,17 +124,15 @@ export async function loadCliConfig(settings: Settings): Promise { const argv = await parseArguments(); const debugMode = argv.debug || false; + // Call the (now wrapper) loadHierarchicalGeminiMemory which calls the server's version const { memoryContent, fileCount } = await loadHierarchicalGeminiMemory( process.cwd(), debugMode, ); const userAgent = await createUserAgent(); - - // Gemini Developer API or GCP Express or Vertex AI const apiKeyForServer = geminiApiKey || googleApiKey || ''; - // Create config using factory from server package return createServerConfig( apiKeyForServer, argv.model || DEFAULT_GEMINI_MODEL, diff --git a/packages/cli/src/ui/hooks/useShowMemoryCommand.ts b/packages/cli/src/ui/hooks/useShowMemoryCommand.ts index c15b27cd..c8719bbd 100644 --- a/packages/cli/src/ui/hooks/useShowMemoryCommand.ts +++ b/packages/cli/src/ui/hooks/useShowMemoryCommand.ts @@ -6,11 +6,6 @@ import { Message, MessageType } from '../types.js'; import { Config } from '@gemini-code/server'; -import { getGeminiMdFilePaths } from '../../config/config.js'; -import { homedir } from 'os'; -import process from 'node:process'; - -export const SHOW_MEMORY_COMMAND_NAME = '/showmemory'; export function createShowMemoryAction( config: Config | null, @@ -27,52 +22,42 @@ export function createShowMemoryAction( } const debugMode = config.getDebugMode(); - const cwd = process.cwd(); - const homeDir = homedir(); if (debugMode) { - console.log(`[DEBUG] Show Memory: CWD=${cwd}, Home=${homeDir}`); - } - - const filePaths = await getGeminiMdFilePaths(cwd, homeDir, debugMode); - - if (filePaths.length > 0) { - addMessage({ - type: MessageType.INFO, - content: `The following GEMINI.md files are being used (in order of precedence):\n- ${filePaths.join('\n- ')}`, - timestamp: new Date(), - }); - } else { - addMessage({ - type: MessageType.INFO, - content: 'No GEMINI.md files found in the hierarchy.', - timestamp: new Date(), - }); + console.log('[DEBUG] Show Memory command invoked.'); } const currentMemory = config.getUserMemory(); + const fileCount = config.getGeminiMdFileCount(); - if (config.getDebugMode()) { + if (debugMode) { console.log( `[DEBUG] Showing memory. Content from config.getUserMemory() (first 200 chars): ${currentMemory.substring(0, 200)}...`, ); + console.log(`[DEBUG] Number of GEMINI.md files loaded: ${fileCount}`); + } + + if (fileCount > 0) { + addMessage({ + type: MessageType.INFO, + content: `Loaded memory from ${fileCount} GEMINI.md file(s).`, + timestamp: new Date(), + }); } if (currentMemory && currentMemory.trim().length > 0) { addMessage({ type: MessageType.INFO, - // Display with a clear heading, and potentially format for readability if very long. - // For now, direct display. Consider using Markdown formatting for code blocks if memory contains them. content: `Current combined GEMINI.md memory content:\n\`\`\`markdown\n${currentMemory}\n\`\`\``, timestamp: new Date(), }); } else { - // This message might be redundant if filePaths.length === 0, but kept for explicitness - // if somehow memory is empty even if files were found (e.g., all files are empty). addMessage({ type: MessageType.INFO, content: - 'No hierarchical memory (GEMINI.md) is currently loaded or memory is empty.', + fileCount > 0 + ? 'Hierarchical memory (GEMINI.md) is loaded but content is empty.' + : 'No hierarchical memory (GEMINI.md) is currently loaded.', timestamp: new Date(), }); } diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 788fe6e4..70426d57 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -21,6 +21,7 @@ export * from './utils/paths.js'; export * from './utils/schemaValidator.js'; export * from './utils/errors.js'; export * from './utils/getFolderStructure.js'; +export * from './utils/memoryDiscovery.js'; // Export base tool definitions export * from './tools/tools.js'; diff --git a/packages/server/src/utils/memoryDiscovery.test.ts b/packages/server/src/utils/memoryDiscovery.test.ts new file mode 100644 index 00000000..d104df7a --- /dev/null +++ b/packages/server/src/utils/memoryDiscovery.test.ts @@ -0,0 +1,369 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + vi, + describe, + it, + expect, + beforeEach, + // afterEach, // Removed unused import + Mocked, +} from 'vitest'; +import * as fsPromises from 'fs/promises'; +import * as fsSync from 'fs'; // For constants +import { Stats, Dirent } from 'fs'; // Import types directly from 'fs' +import * as os from 'os'; +import * as path from 'path'; +import { loadServerHierarchicalMemory } from './memoryDiscovery.js'; +import { GEMINI_CONFIG_DIR, GEMINI_MD_FILENAME } from '../tools/memoryTool.js'; + +// Mock the entire fs/promises module +vi.mock('fs/promises'); +// Mock the parts of fsSync we might use (like constants or existsSync if needed) +vi.mock('fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, // Spread actual to get all exports, including Stats and Dirent if they are classes/constructors + constants: { ...actual.constants }, // Preserve constants + // Mock other fsSync functions if directly used by memoryDiscovery, e.g., existsSync + // existsSync: vi.fn(), + }; +}); +vi.mock('os'); + +describe('loadServerHierarchicalMemory', () => { + const mockFs = fsPromises as Mocked; + const mockOs = os as Mocked; + + const CWD = '/test/project/src'; + const PROJECT_ROOT = '/test/project'; + const USER_HOME = '/test/userhome'; + const GLOBAL_GEMINI_DIR = path.join(USER_HOME, GEMINI_CONFIG_DIR); + const GLOBAL_GEMINI_FILE = path.join(GLOBAL_GEMINI_DIR, GEMINI_MD_FILENAME); + + beforeEach(() => { + vi.resetAllMocks(); + + mockOs.homedir.mockReturnValue(USER_HOME); + mockFs.stat.mockRejectedValue(new Error('File not found')); + mockFs.readdir.mockResolvedValue([]); + mockFs.readFile.mockRejectedValue(new Error('File not found')); + mockFs.access.mockRejectedValue(new Error('File not found')); + }); + + it('should return empty memory and count if no GEMINI.md files are found', async () => { + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + expect(memoryContent).toBe(''); + expect(fileCount).toBe(0); + }); + + it('should load only the global GEMINI.md if present and others are not', async () => { + mockFs.access.mockImplementation(async (p) => { + if (p === GLOBAL_GEMINI_FILE) { + return undefined; + } + throw new Error('File not found'); + }); + mockFs.readFile.mockImplementation(async (p) => { + if (p === GLOBAL_GEMINI_FILE) { + return 'Global memory content'; + } + throw new Error('File not found'); + }); + + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + + expect(memoryContent).toBe( + `--- Context from: ${path.relative(CWD, GLOBAL_GEMINI_FILE)} ---\nGlobal memory content\n--- End of Context from: ${path.relative(CWD, GLOBAL_GEMINI_FILE)} ---`, + ); + expect(fileCount).toBe(1); + expect(mockFs.readFile).toHaveBeenCalledWith(GLOBAL_GEMINI_FILE, 'utf-8'); + }); + + it('should load GEMINI.md files by upward traversal from CWD to project root', async () => { + const projectRootGeminiFile = path.join(PROJECT_ROOT, GEMINI_MD_FILENAME); + const srcGeminiFile = path.join(CWD, GEMINI_MD_FILENAME); + + mockFs.stat.mockImplementation(async (p) => { + if (p === path.join(PROJECT_ROOT, '.git')) { + return { isDirectory: () => true } as Stats; + } + throw new Error('File not found'); + }); + + mockFs.access.mockImplementation(async (p) => { + if (p === projectRootGeminiFile || p === srcGeminiFile) { + return undefined; + } + throw new Error('File not found'); + }); + + mockFs.readFile.mockImplementation(async (p) => { + if (p === projectRootGeminiFile) { + return 'Project root memory'; + } + if (p === srcGeminiFile) { + return 'Src directory memory'; + } + throw new Error('File not found'); + }); + + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + const expectedContent = + `--- Context from: ${path.relative(CWD, projectRootGeminiFile)} ---\nProject root memory\n--- End of Context from: ${path.relative(CWD, projectRootGeminiFile)} ---\n\n` + + `--- Context from: ${GEMINI_MD_FILENAME} ---\nSrc directory memory\n--- End of Context from: ${GEMINI_MD_FILENAME} ---`; + + expect(memoryContent).toBe(expectedContent); + expect(fileCount).toBe(2); + expect(mockFs.readFile).toHaveBeenCalledWith( + projectRootGeminiFile, + 'utf-8', + ); + expect(mockFs.readFile).toHaveBeenCalledWith(srcGeminiFile, 'utf-8'); + }); + + it('should load GEMINI.md files by downward traversal from CWD', async () => { + const subDir = path.join(CWD, 'subdir'); + const subDirGeminiFile = path.join(subDir, GEMINI_MD_FILENAME); + const cwdGeminiFile = path.join(CWD, GEMINI_MD_FILENAME); + + mockFs.access.mockImplementation(async (p) => { + if (p === cwdGeminiFile || p === subDirGeminiFile) return undefined; + throw new Error('File not found'); + }); + + mockFs.readFile.mockImplementation(async (p) => { + if (p === cwdGeminiFile) return 'CWD memory'; + if (p === subDirGeminiFile) return 'Subdir memory'; + throw new Error('File not found'); + }); + + mockFs.readdir.mockImplementation(async (p) => { + if (p === CWD) { + return [ + { + name: GEMINI_MD_FILENAME, + isFile: () => true, + isDirectory: () => false, + }, + { name: 'subdir', isFile: () => false, isDirectory: () => true }, + ] as Dirent[]; + } + if (p === subDir) { + return [ + { + name: GEMINI_MD_FILENAME, + isFile: () => true, + isDirectory: () => false, + }, + ] as Dirent[]; + } + return []; + }); + + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + const expectedContent = + `--- Context from: ${GEMINI_MD_FILENAME} ---\nCWD memory\n--- End of Context from: ${GEMINI_MD_FILENAME} ---\n\n` + + `--- Context from: ${path.join('subdir', GEMINI_MD_FILENAME)} ---\nSubdir memory\n--- End of Context from: ${path.join('subdir', GEMINI_MD_FILENAME)} ---`; + + expect(memoryContent).toBe(expectedContent); + expect(fileCount).toBe(2); + }); + + it('should load and correctly order global, upward, and downward GEMINI.md files', async () => { + const projectParentDir = path.dirname(PROJECT_ROOT); + const projectParentGeminiFile = path.join( + projectParentDir, + GEMINI_MD_FILENAME, + ); + const projectRootGeminiFile = path.join(PROJECT_ROOT, GEMINI_MD_FILENAME); + const cwdGeminiFile = path.join(CWD, GEMINI_MD_FILENAME); + const subDir = path.join(CWD, 'sub'); + const subDirGeminiFile = path.join(subDir, GEMINI_MD_FILENAME); + + mockFs.stat.mockImplementation(async (p) => { + if (p === path.join(PROJECT_ROOT, '.git')) { + return { isDirectory: () => true } as Stats; + } + throw new Error('File not found'); + }); + + mockFs.access.mockImplementation(async (p) => { + if ( + p === GLOBAL_GEMINI_FILE || + p === projectParentGeminiFile || + p === projectRootGeminiFile || + p === cwdGeminiFile || + p === subDirGeminiFile + ) { + return undefined; + } + throw new Error('File not found'); + }); + + mockFs.readFile.mockImplementation(async (p) => { + if (p === GLOBAL_GEMINI_FILE) return 'Global memory'; + if (p === projectParentGeminiFile) return 'Project parent memory'; + if (p === projectRootGeminiFile) return 'Project root memory'; + if (p === cwdGeminiFile) return 'CWD memory'; + if (p === subDirGeminiFile) return 'Subdir memory'; + throw new Error('File not found'); + }); + + mockFs.readdir.mockImplementation(async (p) => { + if (p === CWD) { + return [ + { name: 'sub', isFile: () => false, isDirectory: () => true }, + ] as Dirent[]; + } + if (p === subDir) { + return [ + { + name: GEMINI_MD_FILENAME, + isFile: () => true, + isDirectory: () => false, + }, + ] as Dirent[]; + } + return []; + }); + + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + + const relPathGlobal = path.relative(CWD, GLOBAL_GEMINI_FILE); + const relPathProjectParent = path.relative(CWD, projectParentGeminiFile); + const relPathProjectRoot = path.relative(CWD, projectRootGeminiFile); + const relPathCwd = GEMINI_MD_FILENAME; + const relPathSubDir = path.join('sub', GEMINI_MD_FILENAME); + + const expectedContent = [ + `--- Context from: ${relPathGlobal} ---\nGlobal memory\n--- End of Context from: ${relPathGlobal} ---`, + `--- Context from: ${relPathProjectParent} ---\nProject parent memory\n--- End of Context from: ${relPathProjectParent} ---`, + `--- Context from: ${relPathProjectRoot} ---\nProject root memory\n--- End of Context from: ${relPathProjectRoot} ---`, + `--- Context from: ${relPathCwd} ---\nCWD memory\n--- End of Context from: ${relPathCwd} ---`, + `--- Context from: ${relPathSubDir} ---\nSubdir memory\n--- End of Context from: ${relPathSubDir} ---`, + ].join('\n\n'); + + expect(memoryContent).toBe(expectedContent); + expect(fileCount).toBe(5); + }); + + it('should ignore specified directories during downward scan', async () => { + const ignoredDir = path.join(CWD, 'node_modules'); + const ignoredDirGeminiFile = path.join(ignoredDir, GEMINI_MD_FILENAME); + const regularSubDir = path.join(CWD, 'my_code'); + const regularSubDirGeminiFile = path.join( + regularSubDir, + GEMINI_MD_FILENAME, + ); + + mockFs.access.mockImplementation(async (p) => { + if (p === regularSubDirGeminiFile) return undefined; + if (p === ignoredDirGeminiFile) + throw new Error('Should not access ignored file'); + throw new Error('File not found'); + }); + + mockFs.readFile.mockImplementation(async (p) => { + if (p === regularSubDirGeminiFile) return 'My code memory'; + throw new Error('File not found'); + }); + + mockFs.readdir.mockImplementation(async (p) => { + if (p === CWD) { + return [ + { + name: 'node_modules', + isFile: () => false, + isDirectory: () => true, + }, + { name: 'my_code', isFile: () => false, isDirectory: () => true }, + ] as Dirent[]; + } + if (p === regularSubDir) { + return [ + { + name: GEMINI_MD_FILENAME, + isFile: () => true, + isDirectory: () => false, + }, + ] as Dirent[]; + } + if (p === ignoredDir) { + return [ + { + name: GEMINI_MD_FILENAME, + isFile: () => true, + isDirectory: () => false, + }, + ] as Dirent[]; + } + return []; + }); + + const { memoryContent, fileCount } = await loadServerHierarchicalMemory( + CWD, + false, + ); + + const expectedContent = `--- Context from: ${path.join('my_code', GEMINI_MD_FILENAME)} ---\nMy code memory\n--- End of Context from: ${path.join('my_code', GEMINI_MD_FILENAME)} ---`; + + expect(memoryContent).toBe(expectedContent); + expect(fileCount).toBe(1); + expect(mockFs.readFile).not.toHaveBeenCalledWith( + ignoredDirGeminiFile, + 'utf-8', + ); + }); + + it('should respect MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY during downward scan', async () => { + const consoleDebugSpy = vi + .spyOn(console, 'debug') + .mockImplementation(() => {}); + + const dirNames: Dirent[] = []; + for (let i = 0; i < 250; i++) { + dirNames.push({ + name: `deep_dir_${i}`, + isFile: () => false, + isDirectory: () => true, + } as Dirent); + } + + mockFs.readdir.mockImplementation(async (p) => { + if (p === CWD) return dirNames; + if (p.toString().startsWith(path.join(CWD, 'deep_dir_'))) return []; + return []; + }); + mockFs.access.mockRejectedValue(new Error('not found')); + + await loadServerHierarchicalMemory(CWD, true); + + expect(consoleDebugSpy).toHaveBeenCalledWith( + expect.stringContaining('[DEBUG] [MemoryDiscovery]'), + expect.stringContaining( + 'Max directory scan limit (200) reached. Stopping downward scan at:', + ), + ); + consoleDebugSpy.mockRestore(); + }); +}); diff --git a/packages/server/src/utils/memoryDiscovery.ts b/packages/server/src/utils/memoryDiscovery.ts new file mode 100644 index 00000000..362134d8 --- /dev/null +++ b/packages/server/src/utils/memoryDiscovery.ts @@ -0,0 +1,351 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'fs/promises'; +import * as fsSync from 'fs'; +import * as path from 'path'; +import { homedir } from 'os'; +import { GEMINI_CONFIG_DIR, GEMINI_MD_FILENAME } from '../tools/memoryTool.js'; + +// Simple console logger, similar to the one previously in CLI's config.ts +// TODO: Integrate with a more robust server-side logger if available/appropriate. +const logger = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + debug: (...args: any[]) => + console.debug('[DEBUG] [MemoryDiscovery]', ...args), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + warn: (...args: any[]) => console.warn('[WARN] [MemoryDiscovery]', ...args), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + error: (...args: any[]) => + console.error('[ERROR] [MemoryDiscovery]', ...args), +}; + +// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files. +const DEFAULT_IGNORE_DIRECTORIES = [ + 'node_modules', + '.git', + 'dist', + 'build', + 'out', + 'coverage', + '.vscode', + '.idea', + '.DS_Store', +]; + +const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200; + +interface GeminiFileContent { + filePath: string; + content: string | null; +} + +async function findProjectRoot(startDir: string): Promise { + let currentDir = path.resolve(startDir); + while (true) { + const gitPath = path.join(currentDir, '.git'); + try { + const stats = await fs.stat(gitPath); + if (stats.isDirectory()) { + return currentDir; + } + } catch (error: unknown) { + if (typeof error === 'object' && error !== null && 'code' in error) { + const fsError = error as { code: string; message: string }; + if (fsError.code !== 'ENOENT') { + logger.warn( + `Error checking for .git directory at ${gitPath}: ${fsError.message}`, + ); + } + } else { + logger.warn( + `Non-standard error checking for .git directory at ${gitPath}: ${String(error)}`, + ); + } + } + const parentDir = path.dirname(currentDir); + if (parentDir === currentDir) { + return null; + } + currentDir = parentDir; + } +} + +async function collectDownwardGeminiFiles( + directory: string, + debugMode: boolean, + ignoreDirs: string[], + scannedDirCount: { count: number }, + maxScanDirs: number, +): Promise { + if (scannedDirCount.count >= maxScanDirs) { + if (debugMode) + logger.debug( + `Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`, + ); + return []; + } + scannedDirCount.count++; + + if (debugMode) + logger.debug( + `Scanning downward for ${GEMINI_MD_FILENAME} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`, + ); + const collectedPaths: string[] = []; + try { + const entries = await fs.readdir(directory, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(directory, entry.name); + if (entry.isDirectory()) { + if (ignoreDirs.includes(entry.name)) { + if (debugMode) + logger.debug(`Skipping ignored directory: ${fullPath}`); + continue; + } + const subDirPaths = await collectDownwardGeminiFiles( + fullPath, + debugMode, + ignoreDirs, + scannedDirCount, + maxScanDirs, + ); + collectedPaths.push(...subDirPaths); + } else if (entry.isFile() && entry.name === GEMINI_MD_FILENAME) { + try { + await fs.access(fullPath, fsSync.constants.R_OK); + collectedPaths.push(fullPath); + if (debugMode) + logger.debug( + `Found readable downward ${GEMINI_MD_FILENAME}: ${fullPath}`, + ); + } catch { + if (debugMode) + logger.debug( + `Downward ${GEMINI_MD_FILENAME} not readable, skipping: ${fullPath}`, + ); + } + } + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.warn(`Error scanning directory ${directory}: ${message}`); + if (debugMode) logger.debug(`Failed to scan directory: ${directory}`); + } + return collectedPaths; +} + +async function getGeminiMdFilePathsInternal( + currentWorkingDirectory: string, + userHomePath: string, // Keep userHomePath as a parameter for clarity + debugMode: boolean, +): Promise { + const resolvedCwd = path.resolve(currentWorkingDirectory); + const resolvedHome = path.resolve(userHomePath); + const globalMemoryPath = path.join( + resolvedHome, + GEMINI_CONFIG_DIR, + GEMINI_MD_FILENAME, + ); + const paths: string[] = []; + + if (debugMode) + logger.debug( + `Searching for ${GEMINI_MD_FILENAME} starting from CWD: ${resolvedCwd}`, + ); + if (debugMode) logger.debug(`User home directory: ${resolvedHome}`); + + try { + await fs.access(globalMemoryPath, fsSync.constants.R_OK); + paths.push(globalMemoryPath); + if (debugMode) + logger.debug( + `Found readable global ${GEMINI_MD_FILENAME}: ${globalMemoryPath}`, + ); + } catch { + if (debugMode) + logger.debug( + `Global ${GEMINI_MD_FILENAME} not found or not readable: ${globalMemoryPath}`, + ); + } + + const projectRoot = await findProjectRoot(resolvedCwd); + if (debugMode) + logger.debug(`Determined project root: ${projectRoot ?? 'None'}`); + + const upwardPaths: string[] = []; + let currentDir = resolvedCwd; + // Determine the directory that signifies the top of the project or user-specific space. + const ultimateStopDir = projectRoot + ? path.dirname(projectRoot) + : path.dirname(resolvedHome); + + while (currentDir && currentDir !== path.dirname(currentDir)) { + // Loop until filesystem root or currentDir is empty + if (debugMode) { + logger.debug( + `Checking for ${GEMINI_MD_FILENAME} in (upward scan): ${currentDir}`, + ); + } + + // Skip the global .gemini directory itself during upward scan from CWD, + // as global is handled separately and explicitly first. + if (currentDir === path.join(resolvedHome, GEMINI_CONFIG_DIR)) { + if (debugMode) { + logger.debug( + `Upward scan reached global config dir path, stopping upward search here: ${currentDir}`, + ); + } + break; + } + + const potentialPath = path.join(currentDir, GEMINI_MD_FILENAME); + try { + await fs.access(potentialPath, fsSync.constants.R_OK); + // Add to upwardPaths only if it's not the already added globalMemoryPath + if (potentialPath !== globalMemoryPath) { + upwardPaths.unshift(potentialPath); + if (debugMode) { + logger.debug( + `Found readable upward ${GEMINI_MD_FILENAME}: ${potentialPath}`, + ); + } + } + } catch { + if (debugMode) { + logger.debug( + `Upward ${GEMINI_MD_FILENAME} not found or not readable in: ${currentDir}`, + ); + } + } + + // Stop condition: if currentDir is the ultimateStopDir, break after this iteration. + if (currentDir === ultimateStopDir) { + if (debugMode) + logger.debug( + `Reached ultimate stop directory for upward scan: ${currentDir}`, + ); + break; + } + + currentDir = path.dirname(currentDir); + } + paths.push(...upwardPaths); + + if (debugMode) + logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`); + const scannedDirCount = { count: 0 }; + const downwardPaths = await collectDownwardGeminiFiles( + resolvedCwd, + debugMode, + DEFAULT_IGNORE_DIRECTORIES, + scannedDirCount, + MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY, + ); + downwardPaths.sort(); // Sort for consistent ordering, though hierarchy might be more complex + if (debugMode && downwardPaths.length > 0) + logger.debug( + `Found downward ${GEMINI_MD_FILENAME} files (sorted): ${JSON.stringify(downwardPaths)}`, + ); + // Add downward paths only if they haven't been included already (e.g. from upward scan) + for (const dPath of downwardPaths) { + if (!paths.includes(dPath)) { + paths.push(dPath); + } + } + + if (debugMode) + logger.debug( + `Final ordered ${GEMINI_MD_FILENAME} paths to read: ${JSON.stringify(paths)}`, + ); + return paths; +} + +async function readGeminiMdFiles( + filePaths: string[], + debugMode: boolean, +): Promise { + const results: GeminiFileContent[] = []; + for (const filePath of filePaths) { + try { + const content = await fs.readFile(filePath, 'utf-8'); + results.push({ filePath, content }); + if (debugMode) + logger.debug( + `Successfully read: ${filePath} (Length: ${content.length})`, + ); + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + logger.warn( + `Warning: Could not read ${GEMINI_MD_FILENAME} file at ${filePath}. Error: ${message}`, + ); + results.push({ filePath, content: null }); // Still include it with null content + if (debugMode) logger.debug(`Failed to read: ${filePath}`); + } + } + return results; +} + +function concatenateInstructions( + instructionContents: GeminiFileContent[], + // CWD is needed to resolve relative paths for display markers + currentWorkingDirectoryForDisplay: string, +): string { + return instructionContents + .filter((item) => typeof item.content === 'string') + .map((item) => { + const trimmedContent = (item.content as string).trim(); + if (trimmedContent.length === 0) { + return null; + } + const displayPath = path.isAbsolute(item.filePath) + ? path.relative(currentWorkingDirectoryForDisplay, item.filePath) + : item.filePath; + return `--- Context from: ${displayPath} ---\n${trimmedContent}\n--- End of Context from: ${displayPath} ---`; + }) + .filter((block): block is string => block !== null) + .join('\n\n'); +} + +/** + * Loads hierarchical GEMINI.md files and concatenates their content. + * This function is intended for use by the server. + */ +export async function loadServerHierarchicalMemory( + currentWorkingDirectory: string, + debugMode: boolean, +): Promise<{ memoryContent: string; fileCount: number }> { + if (debugMode) + logger.debug( + `Loading server hierarchical memory for CWD: ${currentWorkingDirectory}`, + ); + // For the server, homedir() refers to the server process's home. + // This is consistent with how MemoryTool already finds the global path. + const userHomePath = homedir(); + const filePaths = await getGeminiMdFilePathsInternal( + currentWorkingDirectory, + userHomePath, + debugMode, + ); + if (filePaths.length === 0) { + if (debugMode) logger.debug('No GEMINI.md files found in hierarchy.'); + return { memoryContent: '', fileCount: 0 }; + } + const contentsWithPaths = await readGeminiMdFiles(filePaths, debugMode); + // Pass CWD for relative path display in concatenated content + const combinedInstructions = concatenateInstructions( + contentsWithPaths, + currentWorkingDirectory, + ); + if (debugMode) + logger.debug( + `Combined instructions length: ${combinedInstructions.length}`, + ); + if (debugMode && combinedInstructions.length > 0) + logger.debug( + `Combined instructions (snippet): ${combinedInstructions.substring(0, 500)}...`, + ); + return { memoryContent: combinedInstructions, fileCount: filePaths.length }; +}