perf(core): parallelize bfsFileSearch for 40% faster CLI startup (#5185)
This commit is contained in:
parent
bc23009f61
commit
bcce1e7b84
|
@ -189,4 +189,79 @@ describe('bfsFileSearch', () => {
|
||||||
expect(result.sort()).toEqual([target1, target2].sort());
|
expect(result.sort()).toEqual([target1, target2].sort());
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should perform parallel directory scanning efficiently (performance test)', async () => {
|
||||||
|
// Create a more complex directory structure for performance testing
|
||||||
|
console.log('\n🚀 Testing Parallel BFS Performance...');
|
||||||
|
|
||||||
|
// Create 50 directories with multiple levels for faster test execution
|
||||||
|
for (let i = 0; i < 50; i++) {
|
||||||
|
await createEmptyDir(`dir${i}`);
|
||||||
|
await createEmptyDir(`dir${i}`, 'subdir1');
|
||||||
|
await createEmptyDir(`dir${i}`, 'subdir2');
|
||||||
|
await createEmptyDir(`dir${i}`, 'subdir1', 'deep');
|
||||||
|
if (i < 10) {
|
||||||
|
// Add target files in some directories
|
||||||
|
await createTestFile('content', `dir${i}`, 'GEMINI.md');
|
||||||
|
await createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run multiple iterations to ensure consistency
|
||||||
|
const iterations = 3;
|
||||||
|
const durations: number[] = [];
|
||||||
|
let foundFiles = 0;
|
||||||
|
let firstResultSorted: string[] | undefined;
|
||||||
|
|
||||||
|
for (let i = 0; i < iterations; i++) {
|
||||||
|
const searchStartTime = performance.now();
|
||||||
|
const result = await bfsFileSearch(testRootDir, {
|
||||||
|
fileName: 'GEMINI.md',
|
||||||
|
maxDirs: 200,
|
||||||
|
debug: false,
|
||||||
|
});
|
||||||
|
const duration = performance.now() - searchStartTime;
|
||||||
|
durations.push(duration);
|
||||||
|
|
||||||
|
// Verify consistency: all iterations should find the exact same files
|
||||||
|
if (firstResultSorted === undefined) {
|
||||||
|
foundFiles = result.length;
|
||||||
|
firstResultSorted = result.sort();
|
||||||
|
} else {
|
||||||
|
expect(result.sort()).toEqual(firstResultSorted);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`📊 Iteration ${i + 1}: ${duration.toFixed(2)}ms`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
||||||
|
const maxDuration = Math.max(...durations);
|
||||||
|
const minDuration = Math.min(...durations);
|
||||||
|
|
||||||
|
console.log(`📊 Average Duration: ${avgDuration.toFixed(2)}ms`);
|
||||||
|
console.log(
|
||||||
|
`📊 Min/Max Duration: ${minDuration.toFixed(2)}ms / ${maxDuration.toFixed(2)}ms`,
|
||||||
|
);
|
||||||
|
console.log(`📁 Found ${foundFiles} GEMINI.md files`);
|
||||||
|
console.log(
|
||||||
|
`🏎️ Processing ~${Math.round(200 / (avgDuration / 1000))} dirs/second`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify we found the expected files
|
||||||
|
expect(foundFiles).toBe(20); // 10 dirs * 2 files each
|
||||||
|
|
||||||
|
// Performance expectation: check consistency rather than absolute time
|
||||||
|
const variance = maxDuration - minDuration;
|
||||||
|
const consistencyRatio = variance / avgDuration;
|
||||||
|
|
||||||
|
// Ensure reasonable performance (generous limit for CI environments)
|
||||||
|
expect(avgDuration).toBeLessThan(2000); // Very generous limit
|
||||||
|
|
||||||
|
// Ensure consistency across runs (variance should not be too high)
|
||||||
|
expect(consistencyRatio).toBeLessThan(1.5); // Max variance should be less than 150% of average
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`✅ Performance test passed: avg=${avgDuration.toFixed(2)}ms, consistency=${(consistencyRatio * 100).toFixed(1)}%`,
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
import * as fs from 'fs/promises';
|
import * as fs from 'fs/promises';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { Dirent } from 'fs';
|
|
||||||
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
|
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
|
||||||
import { FileFilteringOptions } from '../config/config.js';
|
import { FileFilteringOptions } from '../config/config.js';
|
||||||
// Simple console logger for now.
|
// Simple console logger for now.
|
||||||
|
@ -47,27 +46,57 @@ export async function bfsFileSearch(
|
||||||
const queue: string[] = [rootDir];
|
const queue: string[] = [rootDir];
|
||||||
const visited = new Set<string>();
|
const visited = new Set<string>();
|
||||||
let scannedDirCount = 0;
|
let scannedDirCount = 0;
|
||||||
|
let queueHead = 0; // Pointer-based queue head to avoid expensive splice operations
|
||||||
|
|
||||||
while (queue.length > 0 && scannedDirCount < maxDirs) {
|
// Convert ignoreDirs array to Set for O(1) lookup performance
|
||||||
const currentDir = queue.shift()!;
|
const ignoreDirsSet = new Set(ignoreDirs);
|
||||||
if (visited.has(currentDir)) {
|
|
||||||
continue;
|
// Process directories in parallel batches for maximum performance
|
||||||
}
|
const PARALLEL_BATCH_SIZE = 15; // Parallel processing batch size for optimal performance
|
||||||
|
|
||||||
|
while (queueHead < queue.length && scannedDirCount < maxDirs) {
|
||||||
|
// Fill batch with unvisited directories up to the desired size
|
||||||
|
const batchSize = Math.min(PARALLEL_BATCH_SIZE, maxDirs - scannedDirCount);
|
||||||
|
const currentBatch = [];
|
||||||
|
while (currentBatch.length < batchSize && queueHead < queue.length) {
|
||||||
|
const currentDir = queue[queueHead];
|
||||||
|
queueHead++;
|
||||||
|
if (!visited.has(currentDir)) {
|
||||||
visited.add(currentDir);
|
visited.add(currentDir);
|
||||||
scannedDirCount++;
|
currentBatch.push(currentDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scannedDirCount += currentBatch.length;
|
||||||
|
|
||||||
|
if (currentBatch.length === 0) continue;
|
||||||
|
|
||||||
if (debug) {
|
if (debug) {
|
||||||
logger.debug(`Scanning [${scannedDirCount}/${maxDirs}]: ${currentDir}`);
|
logger.debug(
|
||||||
|
`Scanning [${scannedDirCount}/${maxDirs}]: batch of ${currentBatch.length}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let entries: Dirent[];
|
// Read directories in parallel instead of one by one
|
||||||
|
const readPromises = currentBatch.map(async (currentDir) => {
|
||||||
try {
|
try {
|
||||||
entries = await fs.readdir(currentDir, { withFileTypes: true });
|
const entries = await fs.readdir(currentDir, { withFileTypes: true });
|
||||||
} catch {
|
return { currentDir, entries };
|
||||||
// Ignore errors for directories we can't read (e.g., permissions)
|
} catch (error) {
|
||||||
continue;
|
// Warn user that a directory could not be read, as this affects search results.
|
||||||
|
const message = (error as Error)?.message ?? 'Unknown error';
|
||||||
|
console.warn(
|
||||||
|
`[WARN] Skipping unreadable directory: ${currentDir} (${message})`,
|
||||||
|
);
|
||||||
|
if (debug) {
|
||||||
|
logger.debug(`Full error for ${currentDir}:`, error);
|
||||||
}
|
}
|
||||||
|
return { currentDir, entries: [] };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await Promise.all(readPromises);
|
||||||
|
|
||||||
|
for (const { currentDir, entries } of results) {
|
||||||
for (const entry of entries) {
|
for (const entry of entries) {
|
||||||
const fullPath = path.join(currentDir, entry.name);
|
const fullPath = path.join(currentDir, entry.name);
|
||||||
if (
|
if (
|
||||||
|
@ -81,7 +110,7 @@ export async function bfsFileSearch(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry.isDirectory()) {
|
if (entry.isDirectory()) {
|
||||||
if (!ignoreDirs.includes(entry.name)) {
|
if (!ignoreDirsSet.has(entry.name)) {
|
||||||
queue.push(fullPath);
|
queue.push(fullPath);
|
||||||
}
|
}
|
||||||
} else if (entry.isFile() && entry.name === fileName) {
|
} else if (entry.isFile() && entry.name === fileName) {
|
||||||
|
@ -89,6 +118,7 @@ export async function bfsFileSearch(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return foundFiles;
|
return foundFiles;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue