From bcce1e7b84f68383af54b58c04da84d859b60950 Mon Sep 17 00:00:00 2001 From: Hyunsu Shin <58941022+scato3@users.noreply.github.com> Date: Thu, 31 Jul 2025 02:32:03 +0900 Subject: [PATCH] perf(core): parallelize bfsFileSearch for 40% faster CLI startup (#5185) --- packages/core/src/utils/bfsFileSearch.test.ts | 75 +++++++++++++++ packages/core/src/utils/bfsFileSearch.ts | 96 ++++++++++++------- 2 files changed, 138 insertions(+), 33 deletions(-) diff --git a/packages/core/src/utils/bfsFileSearch.test.ts b/packages/core/src/utils/bfsFileSearch.test.ts index 63198a8d..3d5a0010 100644 --- a/packages/core/src/utils/bfsFileSearch.test.ts +++ b/packages/core/src/utils/bfsFileSearch.test.ts @@ -189,4 +189,79 @@ describe('bfsFileSearch', () => { expect(result.sort()).toEqual([target1, target2].sort()); }); }); + + it('should perform parallel directory scanning efficiently (performance test)', async () => { + // Create a more complex directory structure for performance testing + console.log('\nšŸš€ Testing Parallel BFS Performance...'); + + // Create 50 directories with multiple levels for faster test execution + for (let i = 0; i < 50; i++) { + await createEmptyDir(`dir${i}`); + await createEmptyDir(`dir${i}`, 'subdir1'); + await createEmptyDir(`dir${i}`, 'subdir2'); + await createEmptyDir(`dir${i}`, 'subdir1', 'deep'); + if (i < 10) { + // Add target files in some directories + await createTestFile('content', `dir${i}`, 'GEMINI.md'); + await createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md'); + } + } + + // Run multiple iterations to ensure consistency + const iterations = 3; + const durations: number[] = []; + let foundFiles = 0; + let firstResultSorted: string[] | undefined; + + for (let i = 0; i < iterations; i++) { + const searchStartTime = performance.now(); + const result = await bfsFileSearch(testRootDir, { + fileName: 'GEMINI.md', + maxDirs: 200, + debug: false, + }); + const duration = performance.now() - searchStartTime; + durations.push(duration); + + // Verify consistency: all iterations should find the exact same files + if (firstResultSorted === undefined) { + foundFiles = result.length; + firstResultSorted = result.sort(); + } else { + expect(result.sort()).toEqual(firstResultSorted); + } + + console.log(`šŸ“Š Iteration ${i + 1}: ${duration.toFixed(2)}ms`); + } + + const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length; + const maxDuration = Math.max(...durations); + const minDuration = Math.min(...durations); + + console.log(`šŸ“Š Average Duration: ${avgDuration.toFixed(2)}ms`); + console.log( + `šŸ“Š Min/Max Duration: ${minDuration.toFixed(2)}ms / ${maxDuration.toFixed(2)}ms`, + ); + console.log(`šŸ“ Found ${foundFiles} GEMINI.md files`); + console.log( + `šŸŽļø Processing ~${Math.round(200 / (avgDuration / 1000))} dirs/second`, + ); + + // Verify we found the expected files + expect(foundFiles).toBe(20); // 10 dirs * 2 files each + + // Performance expectation: check consistency rather than absolute time + const variance = maxDuration - minDuration; + const consistencyRatio = variance / avgDuration; + + // Ensure reasonable performance (generous limit for CI environments) + expect(avgDuration).toBeLessThan(2000); // Very generous limit + + // Ensure consistency across runs (variance should not be too high) + expect(consistencyRatio).toBeLessThan(1.5); // Max variance should be less than 150% of average + + console.log( + `āœ… Performance test passed: avg=${avgDuration.toFixed(2)}ms, consistency=${(consistencyRatio * 100).toFixed(1)}%`, + ); + }); }); diff --git a/packages/core/src/utils/bfsFileSearch.ts b/packages/core/src/utils/bfsFileSearch.ts index 790521e0..c5b82f2f 100644 --- a/packages/core/src/utils/bfsFileSearch.ts +++ b/packages/core/src/utils/bfsFileSearch.ts @@ -6,7 +6,6 @@ import * as fs from 'fs/promises'; import * as path from 'path'; -import { Dirent } from 'fs'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { FileFilteringOptions } from '../config/config.js'; // Simple console logger for now. @@ -47,45 +46,76 @@ export async function bfsFileSearch( const queue: string[] = [rootDir]; const visited = new Set(); let scannedDirCount = 0; + let queueHead = 0; // Pointer-based queue head to avoid expensive splice operations - while (queue.length > 0 && scannedDirCount < maxDirs) { - const currentDir = queue.shift()!; - if (visited.has(currentDir)) { - continue; + // Convert ignoreDirs array to Set for O(1) lookup performance + const ignoreDirsSet = new Set(ignoreDirs); + + // Process directories in parallel batches for maximum performance + const PARALLEL_BATCH_SIZE = 15; // Parallel processing batch size for optimal performance + + while (queueHead < queue.length && scannedDirCount < maxDirs) { + // Fill batch with unvisited directories up to the desired size + const batchSize = Math.min(PARALLEL_BATCH_SIZE, maxDirs - scannedDirCount); + const currentBatch = []; + while (currentBatch.length < batchSize && queueHead < queue.length) { + const currentDir = queue[queueHead]; + queueHead++; + if (!visited.has(currentDir)) { + visited.add(currentDir); + currentBatch.push(currentDir); + } } - visited.add(currentDir); - scannedDirCount++; + scannedDirCount += currentBatch.length; + + if (currentBatch.length === 0) continue; if (debug) { - logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`); + logger.debug( + `Scanning [${scannedDirCount}/${maxDirs}]: batch of ${currentBatch.length}`, + ); } - let entries: Dirent[]; - try { - entries = await fs.readdir(currentDir, { withFileTypes: true }); - } catch { - // Ignore errors for directories we can't read (e.g., permissions) - continue; - } - - for (const entry of entries) { - const fullPath = path.join(currentDir, entry.name); - if ( - fileService?.shouldIgnoreFile(fullPath, { - respectGitIgnore: options.fileFilteringOptions?.respectGitIgnore, - respectGeminiIgnore: - options.fileFilteringOptions?.respectGeminiIgnore, - }) - ) { - continue; - } - - if (entry.isDirectory()) { - if (!ignoreDirs.includes(entry.name)) { - queue.push(fullPath); + // Read directories in parallel instead of one by one + const readPromises = currentBatch.map(async (currentDir) => { + try { + const entries = await fs.readdir(currentDir, { withFileTypes: true }); + return { currentDir, entries }; + } catch (error) { + // Warn user that a directory could not be read, as this affects search results. + const message = (error as Error)?.message ?? 'Unknown error'; + console.warn( + `[WARN] Skipping unreadable directory: ${currentDir} (${message})`, + ); + if (debug) { + logger.debug(`Full error for ${currentDir}:`, error); + } + return { currentDir, entries: [] }; + } + }); + + const results = await Promise.all(readPromises); + + for (const { currentDir, entries } of results) { + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name); + if ( + fileService?.shouldIgnoreFile(fullPath, { + respectGitIgnore: options.fileFilteringOptions?.respectGitIgnore, + respectGeminiIgnore: + options.fileFilteringOptions?.respectGeminiIgnore, + }) + ) { + continue; + } + + if (entry.isDirectory()) { + if (!ignoreDirsSet.has(entry.name)) { + queue.push(fullPath); + } + } else if (entry.isFile() && entry.name === fileName) { + foundFiles.push(fullPath); } - } else if (entry.isFile() && entry.name === fileName) { - foundFiles.push(fullPath); } } }