fix(core): Replace flaky performance tests with robust correctness tests (#5795)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
2025-08-07 15:38:21 -07:00 · 2025-08-07 15:38:21 -07:00 · 0c32a4061d
parent 9fc7115b86
commit 0c32a4061d
2 changed files with 38 additions and 85 deletions
--- a/packages/core/src/tools/read-many-files.test.ts
+++ b/packages/core/src/tools/read-many-files.test.ts
@ -523,7 +523,7 @@ describe('ReadManyFilesTool', () => {
      fs.writeFileSync(fullPath, content);
    };
-    it('should process files in parallel for performance', async () => {
+    it('should process files in parallel', async () => {
      // Mock detectFileType to add artificial delay to simulate I/O
      const detectFileTypeSpy = vi.spyOn(
        await import('../utils/fileUtils.js'),
@ -534,31 +534,21 @@ describe('ReadManyFilesTool', () => {
      const fileCount = 4;
      const files = createMultipleFiles(fileCount, 'Batch test');
-      // Mock with 100ms delay per file to simulate I/O operations
+      // Mock with 10ms delay per file to simulate I/O operations
      detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
-        await new Promise((resolve) => setTimeout(resolve, 100));
+        await new Promise((resolve) => setTimeout(resolve, 10));
        return 'text';
      });
      const startTime = Date.now();
      const params = { paths: files };
      const result = await tool.execute(params, new AbortController().signal);
      const endTime = Date.now();
      const processingTime = endTime - startTime;
      console.log(
        `Processing time: ${processingTime}ms for ${fileCount} files`,
      );
      // Verify parallel processing performance improvement
      // Parallel processing should complete in ~100ms (single file time)
      // Sequential would take ~400ms (4 files × 100ms each)
      expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
      // Verify all files were processed
      const content = result.llmContent as string[];
      expect(content).toHaveLength(fileCount);
      for (let i = 0; i < fileCount; i++) {
        expect(content.join('')).toContain(`Batch test ${i}`);
      }
      // Cleanup mock
      detectFileTypeSpy.mockRestore();
--- a/packages/core/src/utils/bfsFileSearch.test.ts
+++ b/packages/core/src/utils/bfsFileSearch.test.ts
@ -190,80 +190,43 @@ describe('bfsFileSearch', () => {
    });
  });
-  it('should perform parallel directory scanning efficiently (performance test)', async () => {
+  it('should find all files in a complex directory structure', async () => {
-    // Create a more complex directory structure for performance testing
+    // Create a complex directory structure to test correctness at scale
-    console.log('\n🚀 Testing Parallel BFS Performance...');
+    // without flaky performance checks.
    const numDirs = 50;
    const numFilesPerDir = 2;
    const numTargetDirs = 10;
-    // Create 50 directories with multiple levels for faster test execution
+    const dirCreationPromises: Array<Promise<unknown>> = [];
-    for (let i = 0; i < 50; i++) {
+    for (let i = 0; i < numDirs; i++) {
-      await createEmptyDir(`dir${i}`);
+      dirCreationPromises.push(createEmptyDir(`dir${i}`));
-      await createEmptyDir(`dir${i}`, 'subdir1');
+      dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1'));
-      await createEmptyDir(`dir${i}`, 'subdir2');
+      dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir2'));
-      await createEmptyDir(`dir${i}`, 'subdir1', 'deep');
+      dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1', 'deep'));
-      if (i < 10) {
+    }
    await Promise.all(dirCreationPromises);
    const fileCreationPromises: Array<Promise<string>> = [];
    for (let i = 0; i < numTargetDirs; i++) {
      // Add target files in some directories
-        await createTestFile('content', `dir${i}`, 'GEMINI.md');
+      fileCreationPromises.push(
-        await createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md');
+        createTestFile('content', `dir${i}`, 'GEMINI.md'),
-      }
+      );
      fileCreationPromises.push(
        createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md'),
      );
    }
    const expectedFiles = await Promise.all(fileCreationPromises);
    // Run multiple iterations to ensure consistency
    const iterations = 3;
    const durations: number[] = [];
    let foundFiles = 0;
    let firstResultSorted: string[] | undefined;
    for (let i = 0; i < iterations; i++) {
      const searchStartTime = performance.now();
    const result = await bfsFileSearch(testRootDir, {
      fileName: 'GEMINI.md',
-        maxDirs: 200,
+      // Provide a generous maxDirs limit to ensure it doesn't prematurely stop
-        debug: false,
+      // in this large test case. Total dirs created is 200.
      maxDirs: 250,
    });
      const duration = performance.now() - searchStartTime;
      durations.push(duration);
-      // Verify consistency: all iterations should find the exact same files
+    // Verify we found the exact files we created
-      if (firstResultSorted === undefined) {
+    expect(result.length).toBe(numTargetDirs * numFilesPerDir);
-        foundFiles = result.length;
+    expect(result.sort()).toEqual(expectedFiles.sort());
        firstResultSorted = result.sort();
      } else {
        expect(result.sort()).toEqual(firstResultSorted);
      }
      console.log(`📊 Iteration ${i + 1}: ${duration.toFixed(2)}ms`);
    }
    const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
    const maxDuration = Math.max(...durations);
    const minDuration = Math.min(...durations);
    console.log(`📊 Average Duration: ${avgDuration.toFixed(2)}ms`);
    console.log(
      `📊 Min/Max Duration: ${minDuration.toFixed(2)}ms / ${maxDuration.toFixed(2)}ms`,
    );
    console.log(`📁 Found ${foundFiles} GEMINI.md files`);
    console.log(
      `🏎️  Processing ~${Math.round(200 / (avgDuration / 1000))} dirs/second`,
    );
    // Verify we found the expected files
    expect(foundFiles).toBe(20); // 10 dirs * 2 files each
    // Performance expectation: check consistency rather than absolute time
    const variance = maxDuration - minDuration;
    const consistencyRatio = variance / avgDuration;
    // Ensure reasonable performance (generous limit for CI environments)
    expect(avgDuration).toBeLessThan(2000); // Very generous limit
    // Ensure consistency across runs (variance should not be too high)
    // More tolerant in CI environments where performance can be variable
    const maxConsistencyRatio = process.env.CI ? 3.0 : 1.5;
    expect(consistencyRatio).toBeLessThan(maxConsistencyRatio); // Max variance should be reasonable
    console.log(
      `✅ Performance test passed: avg=${avgDuration.toFixed(2)}ms, consistency=${(consistencyRatio * 100).toFixed(1)}% (threshold: ${(maxConsistencyRatio * 100).toFixed(0)}%)`,
    );
  });
 });