perf(core): parallelize memory discovery file operations performance gain (#5751)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
JaeHo Jang 2025-08-22 03:21:04 +09:00 committed by GitHub
parent 714b3dab73
commit 1e5ead6960
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 154 additions and 37 deletions

View File

@ -368,4 +368,75 @@ describe('loadServerHierarchicalMemory', () => {
fileCount: 1, fileCount: 1,
}); });
}); });
it('should handle multiple directories and files in parallel correctly', async () => {
// Create multiple test directories with GEMINI.md files
const numDirs = 5;
const createdFiles: string[] = [];
for (let i = 0; i < numDirs; i++) {
const dirPath = await createEmptyDir(
path.join(testRootDir, `project-${i}`),
);
const filePath = await createTestFile(
path.join(dirPath, DEFAULT_CONTEXT_FILENAME),
`Content from project ${i}`,
);
createdFiles.push(filePath);
}
// Load memory from all directories
const result = await loadServerHierarchicalMemory(
cwd,
createdFiles.map((f) => path.dirname(f)),
false,
new FileDiscoveryService(projectRoot),
);
// Should have loaded all files
expect(result.fileCount).toBe(numDirs);
// Content should include all project contents
for (let i = 0; i < numDirs; i++) {
expect(result.memoryContent).toContain(`Content from project ${i}`);
}
});
it('should preserve order and prevent duplicates when processing multiple directories', async () => {
// Create overlapping directory structure
const parentDir = await createEmptyDir(path.join(testRootDir, 'parent'));
const childDir = await createEmptyDir(path.join(parentDir, 'child'));
await createTestFile(
path.join(parentDir, DEFAULT_CONTEXT_FILENAME),
'Parent content',
);
await createTestFile(
path.join(childDir, DEFAULT_CONTEXT_FILENAME),
'Child content',
);
// Include both parent and child directories
const result = await loadServerHierarchicalMemory(
parentDir,
[childDir, parentDir], // Deliberately include duplicates
false,
new FileDiscoveryService(projectRoot),
);
// Should have both files without duplicates
expect(result.fileCount).toBe(2);
expect(result.memoryContent).toContain('Parent content');
expect(result.memoryContent).toContain('Child content');
// Check that files are not duplicated
const parentOccurrences = (
result.memoryContent.match(/Parent content/g) || []
).length;
const childOccurrences = (
result.memoryContent.match(/Child content/g) || []
).length;
expect(parentOccurrences).toBe(1);
expect(childOccurrences).toBe(1);
});
}); });

View File

@ -96,9 +96,16 @@ async function getGeminiMdFilePathsInternal(
...includeDirectoriesToReadGemini, ...includeDirectoriesToReadGemini,
currentWorkingDirectory, currentWorkingDirectory,
]); ]);
const paths = [];
for (const dir of dirs) { // Process directories in parallel with concurrency limit to prevent EMFILE errors
const pathsByDir = await getGeminiMdFilePathsInternalForEachDir( const CONCURRENT_LIMIT = 10;
const dirsArray = Array.from(dirs);
const pathsArrays: string[][] = [];
for (let i = 0; i < dirsArray.length; i += CONCURRENT_LIMIT) {
const batch = dirsArray.slice(i, i + CONCURRENT_LIMIT);
const batchPromises = batch.map((dir) =>
getGeminiMdFilePathsInternalForEachDir(
dir, dir,
userHomePath, userHomePath,
debugMode, debugMode,
@ -106,9 +113,24 @@ async function getGeminiMdFilePathsInternal(
extensionContextFilePaths, extensionContextFilePaths,
fileFilteringOptions, fileFilteringOptions,
maxDirs, maxDirs,
),
); );
paths.push(...pathsByDir);
const batchResults = await Promise.allSettled(batchPromises);
for (const result of batchResults) {
if (result.status === 'fulfilled') {
pathsArrays.push(result.value);
} else {
const error = result.reason;
const message = error instanceof Error ? error.message : String(error);
logger.error(`Error discovering files in directory: ${message}`);
// Continue processing other directories
} }
}
}
const paths = pathsArrays.flat();
return Array.from(new Set<string>(paths)); return Array.from(new Set<string>(paths));
} }
@ -226,8 +248,14 @@ async function readGeminiMdFiles(
debugMode: boolean, debugMode: boolean,
importFormat: 'flat' | 'tree' = 'tree', importFormat: 'flat' | 'tree' = 'tree',
): Promise<GeminiFileContent[]> { ): Promise<GeminiFileContent[]> {
// Process files in parallel with concurrency limit to prevent EMFILE errors
const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster
const results: GeminiFileContent[] = []; const results: GeminiFileContent[] = [];
for (const filePath of filePaths) {
for (let i = 0; i < filePaths.length; i += CONCURRENT_LIMIT) {
const batch = filePaths.slice(i, i + CONCURRENT_LIMIT);
const batchPromises = batch.map(
async (filePath): Promise<GeminiFileContent> => {
try { try {
const content = await fs.readFile(filePath, 'utf-8'); const content = await fs.readFile(filePath, 'utf-8');
@ -240,25 +268,43 @@ async function readGeminiMdFiles(
undefined, undefined,
importFormat, importFormat,
); );
results.push({ filePath, content: processedResult.content });
if (debugMode) if (debugMode)
logger.debug( logger.debug(
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`, `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
); );
return { filePath, content: processedResult.content };
} catch (error: unknown) { } catch (error: unknown) {
const isTestEnv = const isTestEnv =
process.env['NODE_ENV'] === 'test' || process.env['VITEST']; process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
if (!isTestEnv) { if (!isTestEnv) {
const message = error instanceof Error ? error.message : String(error); const message =
error instanceof Error ? error.message : String(error);
logger.warn( logger.warn(
`Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`, `Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
); );
} }
results.push({ filePath, content: null }); // Still include it with null content
if (debugMode) logger.debug(`Failed to read: ${filePath}`); if (debugMode) logger.debug(`Failed to read: ${filePath}`);
return { filePath, content: null }; // Still include it with null content
}
},
);
const batchResults = await Promise.allSettled(batchPromises);
for (const result of batchResults) {
if (result.status === 'fulfilled') {
results.push(result.value);
} else {
// This case shouldn't happen since we catch all errors above,
// but handle it for completeness
const error = result.reason;
const message = error instanceof Error ? error.message : String(error);
logger.error(`Unexpected error processing file: ${message}`);
} }
} }
}
return results; return results;
} }