perf(core): parallelize memory discovery file operations performance gain (#5751)
Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
parent
714b3dab73
commit
1e5ead6960
|
@ -368,4 +368,75 @@ describe('loadServerHierarchicalMemory', () => {
|
||||||
fileCount: 1,
|
fileCount: 1,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should handle multiple directories and files in parallel correctly', async () => {
|
||||||
|
// Create multiple test directories with GEMINI.md files
|
||||||
|
const numDirs = 5;
|
||||||
|
const createdFiles: string[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < numDirs; i++) {
|
||||||
|
const dirPath = await createEmptyDir(
|
||||||
|
path.join(testRootDir, `project-${i}`),
|
||||||
|
);
|
||||||
|
const filePath = await createTestFile(
|
||||||
|
path.join(dirPath, DEFAULT_CONTEXT_FILENAME),
|
||||||
|
`Content from project ${i}`,
|
||||||
|
);
|
||||||
|
createdFiles.push(filePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load memory from all directories
|
||||||
|
const result = await loadServerHierarchicalMemory(
|
||||||
|
cwd,
|
||||||
|
createdFiles.map((f) => path.dirname(f)),
|
||||||
|
false,
|
||||||
|
new FileDiscoveryService(projectRoot),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should have loaded all files
|
||||||
|
expect(result.fileCount).toBe(numDirs);
|
||||||
|
|
||||||
|
// Content should include all project contents
|
||||||
|
for (let i = 0; i < numDirs; i++) {
|
||||||
|
expect(result.memoryContent).toContain(`Content from project ${i}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should preserve order and prevent duplicates when processing multiple directories', async () => {
|
||||||
|
// Create overlapping directory structure
|
||||||
|
const parentDir = await createEmptyDir(path.join(testRootDir, 'parent'));
|
||||||
|
const childDir = await createEmptyDir(path.join(parentDir, 'child'));
|
||||||
|
|
||||||
|
await createTestFile(
|
||||||
|
path.join(parentDir, DEFAULT_CONTEXT_FILENAME),
|
||||||
|
'Parent content',
|
||||||
|
);
|
||||||
|
await createTestFile(
|
||||||
|
path.join(childDir, DEFAULT_CONTEXT_FILENAME),
|
||||||
|
'Child content',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Include both parent and child directories
|
||||||
|
const result = await loadServerHierarchicalMemory(
|
||||||
|
parentDir,
|
||||||
|
[childDir, parentDir], // Deliberately include duplicates
|
||||||
|
false,
|
||||||
|
new FileDiscoveryService(projectRoot),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should have both files without duplicates
|
||||||
|
expect(result.fileCount).toBe(2);
|
||||||
|
expect(result.memoryContent).toContain('Parent content');
|
||||||
|
expect(result.memoryContent).toContain('Child content');
|
||||||
|
|
||||||
|
// Check that files are not duplicated
|
||||||
|
const parentOccurrences = (
|
||||||
|
result.memoryContent.match(/Parent content/g) || []
|
||||||
|
).length;
|
||||||
|
const childOccurrences = (
|
||||||
|
result.memoryContent.match(/Child content/g) || []
|
||||||
|
).length;
|
||||||
|
expect(parentOccurrences).toBe(1);
|
||||||
|
expect(childOccurrences).toBe(1);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -96,9 +96,16 @@ async function getGeminiMdFilePathsInternal(
|
||||||
...includeDirectoriesToReadGemini,
|
...includeDirectoriesToReadGemini,
|
||||||
currentWorkingDirectory,
|
currentWorkingDirectory,
|
||||||
]);
|
]);
|
||||||
const paths = [];
|
|
||||||
for (const dir of dirs) {
|
// Process directories in parallel with concurrency limit to prevent EMFILE errors
|
||||||
const pathsByDir = await getGeminiMdFilePathsInternalForEachDir(
|
const CONCURRENT_LIMIT = 10;
|
||||||
|
const dirsArray = Array.from(dirs);
|
||||||
|
const pathsArrays: string[][] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < dirsArray.length; i += CONCURRENT_LIMIT) {
|
||||||
|
const batch = dirsArray.slice(i, i + CONCURRENT_LIMIT);
|
||||||
|
const batchPromises = batch.map((dir) =>
|
||||||
|
getGeminiMdFilePathsInternalForEachDir(
|
||||||
dir,
|
dir,
|
||||||
userHomePath,
|
userHomePath,
|
||||||
debugMode,
|
debugMode,
|
||||||
|
@ -106,9 +113,24 @@ async function getGeminiMdFilePathsInternal(
|
||||||
extensionContextFilePaths,
|
extensionContextFilePaths,
|
||||||
fileFilteringOptions,
|
fileFilteringOptions,
|
||||||
maxDirs,
|
maxDirs,
|
||||||
|
),
|
||||||
);
|
);
|
||||||
paths.push(...pathsByDir);
|
|
||||||
|
const batchResults = await Promise.allSettled(batchPromises);
|
||||||
|
|
||||||
|
for (const result of batchResults) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
pathsArrays.push(result.value);
|
||||||
|
} else {
|
||||||
|
const error = result.reason;
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
logger.error(`Error discovering files in directory: ${message}`);
|
||||||
|
// Continue processing other directories
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const paths = pathsArrays.flat();
|
||||||
return Array.from(new Set<string>(paths));
|
return Array.from(new Set<string>(paths));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,8 +248,14 @@ async function readGeminiMdFiles(
|
||||||
debugMode: boolean,
|
debugMode: boolean,
|
||||||
importFormat: 'flat' | 'tree' = 'tree',
|
importFormat: 'flat' | 'tree' = 'tree',
|
||||||
): Promise<GeminiFileContent[]> {
|
): Promise<GeminiFileContent[]> {
|
||||||
|
// Process files in parallel with concurrency limit to prevent EMFILE errors
|
||||||
|
const CONCURRENT_LIMIT = 20; // Higher limit for file reads as they're typically faster
|
||||||
const results: GeminiFileContent[] = [];
|
const results: GeminiFileContent[] = [];
|
||||||
for (const filePath of filePaths) {
|
|
||||||
|
for (let i = 0; i < filePaths.length; i += CONCURRENT_LIMIT) {
|
||||||
|
const batch = filePaths.slice(i, i + CONCURRENT_LIMIT);
|
||||||
|
const batchPromises = batch.map(
|
||||||
|
async (filePath): Promise<GeminiFileContent> => {
|
||||||
try {
|
try {
|
||||||
const content = await fs.readFile(filePath, 'utf-8');
|
const content = await fs.readFile(filePath, 'utf-8');
|
||||||
|
|
||||||
|
@ -240,25 +268,43 @@ async function readGeminiMdFiles(
|
||||||
undefined,
|
undefined,
|
||||||
importFormat,
|
importFormat,
|
||||||
);
|
);
|
||||||
|
|
||||||
results.push({ filePath, content: processedResult.content });
|
|
||||||
if (debugMode)
|
if (debugMode)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
|
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
return { filePath, content: processedResult.content };
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
const isTestEnv =
|
const isTestEnv =
|
||||||
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
|
process.env['NODE_ENV'] === 'test' || process.env['VITEST'];
|
||||||
if (!isTestEnv) {
|
if (!isTestEnv) {
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
const message =
|
||||||
|
error instanceof Error ? error.message : String(error);
|
||||||
logger.warn(
|
logger.warn(
|
||||||
`Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
|
`Warning: Could not read ${getAllGeminiMdFilenames()} file at ${filePath}. Error: ${message}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
results.push({ filePath, content: null }); // Still include it with null content
|
|
||||||
if (debugMode) logger.debug(`Failed to read: ${filePath}`);
|
if (debugMode) logger.debug(`Failed to read: ${filePath}`);
|
||||||
|
return { filePath, content: null }; // Still include it with null content
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const batchResults = await Promise.allSettled(batchPromises);
|
||||||
|
|
||||||
|
for (const result of batchResults) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
results.push(result.value);
|
||||||
|
} else {
|
||||||
|
// This case shouldn't happen since we catch all errors above,
|
||||||
|
// but handle it for completeness
|
||||||
|
const error = result.reason;
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
logger.error(`Unexpected error processing file: ${message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue