perf(core): parallelize memory discovery file operations performance gain (#5751)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
JaeHo Jang 2025-08-22 03:21:04 +09:00 committed by GitHub
parent 714b3dab73
commit 1e5ead6960
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 154 additions and 37 deletions

View File

@ -368,4 +368,75 @@ describe('loadServerHierarchicalMemory', () => {
fileCount: 1,
});
});
it('should handle multiple directories and files in parallel correctly', async () => {
// Create multiple test directories with GEMINI.md files
const numDirs = 5;
const createdFiles: string[] = [];
for (let i = 0; i < numDirs; i++) {
const dirPath = await createEmptyDir(
path.join(testRootDir, `project-${i}`),
);
const filePath = await createTestFile(
path.join(dirPath, DEFAULT_CONTEXT_FILENAME),
`Content from project ${i}`,
);
createdFiles.push(filePath);
}
// Load memory from all directories
const result = await loadServerHierarchicalMemory(
cwd,
createdFiles.map((f) => path.dirname(f)),
false,
new FileDiscoveryService(projectRoot),
);
// Should have loaded all files
expect(result.fileCount).toBe(numDirs);
// Content should include all project contents
for (let i = 0; i < numDirs; i++) {
expect(result.memoryContent).toContain(`Content from project ${i}`);
}
});
it('should preserve order and prevent duplicates when processing multiple directories', async () => {
// Create overlapping directory structure
const parentDir = await createEmptyDir(path.join(testRootDir, 'parent'));
const childDir = await createEmptyDir(path.join(parentDir, 'child'));
await createTestFile(
path.join(parentDir, DEFAULT_CONTEXT_FILENAME),
'Parent content',
);
await createTestFile(
path.join(childDir, DEFAULT_CONTEXT_FILENAME),
'Child content',
);
// Include both parent and child directories
const result = await loadServerHierarchicalMemory(
parentDir,
[childDir, parentDir], // Deliberately include duplicates
false,
new FileDiscoveryService(projectRoot),
);
// Should have both files without duplicates
expect(result.fileCount).toBe(2);
expect(result.memoryContent).toContain('Parent content');
expect(result.memoryContent).toContain('Child content');
// Check that files are not duplicated
const parentOccurrences = (
result.memoryContent.match(/Parent content/g) || []
).length;
const childOccurrences = (
result.memoryContent.match(/Child content/g) || []
).length;
expect(parentOccurrences).toBe(1);
expect(childOccurrences).toBe(1);
});
});

View File

@ -96,19 +96,41 @@ async function getGeminiMdFilePathsInternal(
...includeDirectoriesToReadGemini,
currentWorkingDirectory,
]);
const paths = [];
for (const dir of dirs) {
const pathsByDir = await getGeminiMdFilePathsInternalForEachDir(
dir,
userHomePath,
debugMode,
fileService,
extensionContextFilePaths,
fileFilteringOptions,
maxDirs,
// Process directories in parallel with concurrency limit to prevent EMFILE errors
const CONCURRENT_LIMIT = 10;
const dirsArray = Array.from(dirs);
const pathsArrays: string[][] = [];
for (let i = 0; i < dirsArray.length; i += CONCURRENT_LIMIT) {
const batch = dirsArray.slice(i, i + CONCURRENT_LIMIT);
const batchPromises = batch.map((dir) =>
getGeminiMdFilePathsInternalForEachDir(
dir,
userHomePath,
debugMode,
fileService,
extensionContextFilePaths,
fileFilteringOptions,
maxDirs,
),
);
paths.push(...pathsByDir);
const batchResults = await Promise.allSettled(batchPromises);
for (const result of batchResults) {
if (result.status === 'fulfilled') {
pathsArrays.push(result.value);
} else {
const error = result.reason;
const message = error instanceof Error ? error.message : String(error);
logger.error(`Error discovering files in directory: ${message}`);
// Continue processing other directories
}
}
}
const paths = pathsArrays.flat();
return Array.from(new Set<string>(paths));
}
@ -226,39 +248,63 @@ async function readGeminiMdFiles(
debugMode: boolean,
importFormat: 'flat' | 'tree' = 'tree',
): Promise<GeminiFileContent[]> {
// Process files in parallel with concurrency limit to prevent EMFILE errors
const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster
const results: GeminiFileContent[] = [];
for (const filePath of filePaths) {
try {
const content = await fs.readFile(filePath, 'utf-8');
// Process imports in the content
const processedResult = await processImports(
content,
path.dirname(filePath),
debugMode,
undefined,
undefined,
importFormat,
);
for (let i = 0; i < filePaths.length; i += CONCURRENT_LIMIT) {
const batch = filePaths.slice(i, i + CONCURRENT_LIMIT);
const batchPromises = batch.map(
async (filePath): Promise<GeminiFileContent> => {
try {
const content = await fs.readFile(filePath, 'utf-8');
results.push({ filePath, content: processedResult.content });
if (debugMode)
logger.debug(
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
);
} catch (error: unknown) {
const isTestEnv =
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
if (!isTestEnv) {
// Process imports in the content
const processedResult = await processImports(
content,
path.dirname(filePath),
debugMode,
undefined,
undefined,
importFormat,
);
if (debugMode)
logger.debug(
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
);
return { filePath, content: processedResult.content };
} catch (error: unknown) {
const isTestEnv =
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
if (!isTestEnv) {
const message =
error instanceof Error ? error.message : String(error);
logger.warn(
`Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
);
}
if (debugMode) logger.debug(`Failed to read: ${filePath}`);
return { filePath, content: null }; // Still include it with null content
}
},
);
const batchResults = await Promise.allSettled(batchPromises);
for (const result of batchResults) {
if (result.status === 'fulfilled') {
results.push(result.value);
} else {
// This case shouldn't happen since we catch all errors above,
// but handle it for completeness
const error = result.reason;
const message = error instanceof Error ? error.message : String(error);
logger.warn(
`Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
);
logger.error(`Unexpected error processing file: ${message}`);
}
results.push({ filePath, content: null }); // Still include it with null content
if (debugMode) logger.debug(`Failed to read: ${filePath}`);
}
}
return results;
}