diff --git a/packages/core/src/utils/memoryDiscovery.test.ts b/packages/core/src/utils/memoryDiscovery.test.ts index d8f3ccc5..c67427e9 100644 --- a/packages/core/src/utils/memoryDiscovery.test.ts +++ b/packages/core/src/utils/memoryDiscovery.test.ts @@ -368,4 +368,75 @@ describe('loadServerHierarchicalMemory', () => { fileCount: 1, }); }); + + it('should handle multiple directories and files in parallel correctly', async () => { + // Create multiple test directories with GEMINI.md files + const numDirs = 5; + const createdFiles: string[] = []; + + for (let i = 0; i < numDirs; i++) { + const dirPath = await createEmptyDir( + path.join(testRootDir, `project-${i}`), + ); + const filePath = await createTestFile( + path.join(dirPath, DEFAULT_CONTEXT_FILENAME), + `Content from project ${i}`, + ); + createdFiles.push(filePath); + } + + // Load memory from all directories + const result = await loadServerHierarchicalMemory( + cwd, + createdFiles.map((f) => path.dirname(f)), + false, + new FileDiscoveryService(projectRoot), + ); + + // Should have loaded all files + expect(result.fileCount).toBe(numDirs); + + // Content should include all project contents + for (let i = 0; i < numDirs; i++) { + expect(result.memoryContent).toContain(`Content from project ${i}`); + } + }); + + it('should preserve order and prevent duplicates when processing multiple directories', async () => { + // Create overlapping directory structure + const parentDir = await createEmptyDir(path.join(testRootDir, 'parent')); + const childDir = await createEmptyDir(path.join(parentDir, 'child')); + + await createTestFile( + path.join(parentDir, DEFAULT_CONTEXT_FILENAME), + 'Parent content', + ); + await createTestFile( + path.join(childDir, DEFAULT_CONTEXT_FILENAME), + 'Child content', + ); + + // Include both parent and child directories + const result = await loadServerHierarchicalMemory( + parentDir, + [childDir, parentDir], // Deliberately include duplicates + false, + new FileDiscoveryService(projectRoot), + ); + + // Should have both files without duplicates + expect(result.fileCount).toBe(2); + expect(result.memoryContent).toContain('Parent content'); + expect(result.memoryContent).toContain('Child content'); + + // Check that files are not duplicated + const parentOccurrences = ( + result.memoryContent.match(/Parent content/g) || [] + ).length; + const childOccurrences = ( + result.memoryContent.match(/Child content/g) || [] + ).length; + expect(parentOccurrences).toBe(1); + expect(childOccurrences).toBe(1); + }); }); diff --git a/packages/core/src/utils/memoryDiscovery.ts b/packages/core/src/utils/memoryDiscovery.ts index d3c24baf..d2eff39c 100644 --- a/packages/core/src/utils/memoryDiscovery.ts +++ b/packages/core/src/utils/memoryDiscovery.ts @@ -96,19 +96,41 @@ async function getGeminiMdFilePathsInternal( ...includeDirectoriesToReadGemini, currentWorkingDirectory, ]); - const paths = []; - for (const dir of dirs) { - const pathsByDir = await getGeminiMdFilePathsInternalForEachDir( - dir, - userHomePath, - debugMode, - fileService, - extensionContextFilePaths, - fileFilteringOptions, - maxDirs, + + // Process directories in parallel with concurrency limit to prevent EMFILE errors + const CONCURRENT_LIMIT = 10; + const dirsArray = Array.from(dirs); + const pathsArrays: string[][] = []; + + for (let i = 0; i < dirsArray.length; i += CONCURRENT_LIMIT) { + const batch = dirsArray.slice(i, i + CONCURRENT_LIMIT); + const batchPromises = batch.map((dir) => + getGeminiMdFilePathsInternalForEachDir( + dir, + userHomePath, + debugMode, + fileService, + extensionContextFilePaths, + fileFilteringOptions, + maxDirs, + ), ); - paths.push(...pathsByDir); + + const batchResults = await Promise.allSettled(batchPromises); + + for (const result of batchResults) { + if (result.status === 'fulfilled') { + pathsArrays.push(result.value); + } else { + const error = result.reason; + const message = error instanceof Error ? error.message : String(error); + logger.error(`Error discovering files in directory: ${message}`); + // Continue processing other directories + } + } } + + const paths = pathsArrays.flat(); return Array.from(new Set(paths)); } @@ -226,39 +248,63 @@ async function readGeminiMdFiles( debugMode: boolean, importFormat: 'flat' | 'tree' = 'tree', ): Promise { + // Process files in parallel with concurrency limit to prevent EMFILE errors + const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster const results: GeminiFileContent[] = []; - for (const filePath of filePaths) { - try { - const content = await fs.readFile(filePath, 'utf-8'); - // Process imports in the content - const processedResult = await processImports( - content, - path.dirname(filePath), - debugMode, - undefined, - undefined, - importFormat, - ); + for (let i = 0; i < filePaths.length; i += CONCURRENT_LIMIT) { + const batch = filePaths.slice(i, i + CONCURRENT_LIMIT); + const batchPromises = batch.map( + async (filePath): Promise => { + try { + const content = await fs.readFile(filePath, 'utf-8'); - results.push({ filePath, content: processedResult.content }); - if (debugMode) - logger.debug( - `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`, - ); - } catch (error: unknown) { - const isTestEnv = - process.env['NODE_ENV'] === 'test' || process.env['VITEST']; - if (!isTestEnv) { + // Process imports in the content + const processedResult = await processImports( + content, + path.dirname(filePath), + debugMode, + undefined, + undefined, + importFormat, + ); + if (debugMode) + logger.debug( + `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`, + ); + + return { filePath, content: processedResult.content }; + } catch (error: unknown) { + const isTestEnv = + process.env['NODE_ENV'] === 'test' || process.env['VITEST']; + if (!isTestEnv) { + const message = + error instanceof Error ? error.message : String(error); + logger.warn( + `Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`, + ); + } + if (debugMode) logger.debug(`Failed to read: ${filePath}`); + return { filePath, content: null }; // Still include it with null content + } + }, + ); + + const batchResults = await Promise.allSettled(batchPromises); + + for (const result of batchResults) { + if (result.status === 'fulfilled') { + results.push(result.value); + } else { + // This case shouldn't happen since we catch all errors above, + // but handle it for completeness + const error = result.reason; const message = error instanceof Error ? error.message : String(error); - logger.warn( - `Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`, - ); + logger.error(`Unexpected error processing file: ${message}`); } - results.push({ filePath, content: null }); // Still include it with null content - if (debugMode) logger.debug(`Failed to read: ${filePath}`); } } + return results; }