fix(core): Replace flaky performance tests with robust correctness tests (#5795)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
Allen Hutchison 2025-08-07 15:38:21 -07:00 committed by GitHub
parent 9fc7115b86
commit 0c32a4061d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 85 deletions

View File

@ -523,7 +523,7 @@ describe('ReadManyFilesTool', () => {
fs.writeFileSync(fullPath, content); fs.writeFileSync(fullPath, content);
}; };
it('should process files in parallel for performance', async () => { it('should process files in parallel', async () => {
// Mock detectFileType to add artificial delay to simulate I/O // Mock detectFileType to add artificial delay to simulate I/O
const detectFileTypeSpy = vi.spyOn( const detectFileTypeSpy = vi.spyOn(
await import('../utils/fileUtils.js'), await import('../utils/fileUtils.js'),
@ -534,31 +534,21 @@ describe('ReadManyFilesTool', () => {
const fileCount = 4; const fileCount = 4;
const files = createMultipleFiles(fileCount, 'Batch test'); const files = createMultipleFiles(fileCount, 'Batch test');
// Mock with 100ms delay per file to simulate I/O operations // Mock with 10ms delay per file to simulate I/O operations
detectFileTypeSpy.mockImplementation(async (_filePath: string) => { detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
await new Promise((resolve) => setTimeout(resolve, 100)); await new Promise((resolve) => setTimeout(resolve, 10));
return 'text'; return 'text';
}); });
const startTime = Date.now();
const params = { paths: files }; const params = { paths: files };
const result = await tool.execute(params, new AbortController().signal); const result = await tool.execute(params, new AbortController().signal);
const endTime = Date.now();
const processingTime = endTime - startTime;
console.log(
`Processing time: ${processingTime}ms for ${fileCount} files`,
);
// Verify parallel processing performance improvement
// Parallel processing should complete in ~100ms (single file time)
// Sequential would take ~400ms (4 files × 100ms each)
expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
// Verify all files were processed // Verify all files were processed
const content = result.llmContent as string[]; const content = result.llmContent as string[];
expect(content).toHaveLength(fileCount); expect(content).toHaveLength(fileCount);
for (let i = 0; i < fileCount; i++) {
expect(content.join('')).toContain(`Batch test ${i}`);
}
// Cleanup mock // Cleanup mock
detectFileTypeSpy.mockRestore(); detectFileTypeSpy.mockRestore();

View File

@ -190,80 +190,43 @@ describe('bfsFileSearch', () => {
}); });
}); });
it('should perform parallel directory scanning efficiently (performance test)', async () => { it('should find all files in a complex directory structure', async () => {
// Create a more complex directory structure for performance testing // Create a complex directory structure to test correctness at scale
console.log('\n🚀 Testing Parallel BFS Performance...'); // without flaky performance checks.
const numDirs = 50;
const numFilesPerDir = 2;
const numTargetDirs = 10;
// Create 50 directories with multiple levels for faster test execution const dirCreationPromises: Array<Promise<unknown>> = [];
for (let i = 0; i < 50; i++) { for (let i = 0; i < numDirs; i++) {
await createEmptyDir(`dir${i}`); dirCreationPromises.push(createEmptyDir(`dir${i}`));
await createEmptyDir(`dir${i}`, 'subdir1'); dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1'));
await createEmptyDir(`dir${i}`, 'subdir2'); dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir2'));
await createEmptyDir(`dir${i}`, 'subdir1', 'deep'); dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1', 'deep'));
if (i < 10) {
// Add target files in some directories
await createTestFile('content', `dir${i}`, 'GEMINI.md');
await createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md');
}
} }
await Promise.all(dirCreationPromises);
// Run multiple iterations to ensure consistency const fileCreationPromises: Array<Promise<string>> = [];
const iterations = 3; for (let i = 0; i < numTargetDirs; i++) {
const durations: number[] = []; // Add target files in some directories
let foundFiles = 0; fileCreationPromises.push(
let firstResultSorted: string[] | undefined; createTestFile('content', `dir${i}`, 'GEMINI.md'),
);
for (let i = 0; i < iterations; i++) { fileCreationPromises.push(
const searchStartTime = performance.now(); createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md'),
const result = await bfsFileSearch(testRootDir, { );
fileName: 'GEMINI.md',
maxDirs: 200,
debug: false,
});
const duration = performance.now() - searchStartTime;
durations.push(duration);
// Verify consistency: all iterations should find the exact same files
if (firstResultSorted === undefined) {
foundFiles = result.length;
firstResultSorted = result.sort();
} else {
expect(result.sort()).toEqual(firstResultSorted);
}
console.log(`📊 Iteration ${i + 1}: ${duration.toFixed(2)}ms`);
} }
const expectedFiles = await Promise.all(fileCreationPromises);
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length; const result = await bfsFileSearch(testRootDir, {
const maxDuration = Math.max(...durations); fileName: 'GEMINI.md',
const minDuration = Math.min(...durations); // Provide a generous maxDirs limit to ensure it doesn't prematurely stop
// in this large test case. Total dirs created is 200.
maxDirs: 250,
});
console.log(`📊 Average Duration: ${avgDuration.toFixed(2)}ms`); // Verify we found the exact files we created
console.log( expect(result.length).toBe(numTargetDirs * numFilesPerDir);
`📊 Min/Max Duration: ${minDuration.toFixed(2)}ms / ${maxDuration.toFixed(2)}ms`, expect(result.sort()).toEqual(expectedFiles.sort());
);
console.log(`📁 Found ${foundFiles} GEMINI.md files`);
console.log(
`🏎️ Processing ~${Math.round(200 / (avgDuration / 1000))} dirs/second`,
);
// Verify we found the expected files
expect(foundFiles).toBe(20); // 10 dirs * 2 files each
// Performance expectation: check consistency rather than absolute time
const variance = maxDuration - minDuration;
const consistencyRatio = variance / avgDuration;
// Ensure reasonable performance (generous limit for CI environments)
expect(avgDuration).toBeLessThan(2000); // Very generous limit
// Ensure consistency across runs (variance should not be too high)
// More tolerant in CI environments where performance can be variable
const maxConsistencyRatio = process.env.CI ? 3.0 : 1.5;
expect(consistencyRatio).toBeLessThan(maxConsistencyRatio); // Max variance should be reasonable
console.log(
`✅ Performance test passed: avg=${avgDuration.toFixed(2)}ms, consistency=${(consistencyRatio * 100).toFixed(1)}% (threshold: ${(maxConsistencyRatio * 100).toFixed(0)}%)`,
);
}); });
}); });