perf(core): implement parallel file processing for 74% performance improvement (#4763)
Co-authored-by: Jacob Richman <jacob314@gmail.com> Co-authored-by: Sandy Tao <sandytao520@icloud.com>
This commit is contained in:
parent
c402784d97
commit
aebe3ace3c
|
@ -477,4 +477,139 @@ describe('ReadManyFilesTool', () => {
|
||||||
fs.rmSync(tempDir2, { recursive: true, force: true });
|
fs.rmSync(tempDir2, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Batch Processing', () => {
|
||||||
|
const createMultipleFiles = (count: number, contentPrefix = 'Content') => {
|
||||||
|
const files: string[] = [];
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const fileName = `file${i}.txt`;
|
||||||
|
createFile(fileName, `${contentPrefix} ${i}`);
|
||||||
|
files.push(fileName);
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createFile = (filePath: string, content = '') => {
|
||||||
|
const fullPath = path.join(tempRootDir, filePath);
|
||||||
|
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||||
|
fs.writeFileSync(fullPath, content);
|
||||||
|
};
|
||||||
|
|
||||||
|
it('should process files in parallel for performance', async () => {
|
||||||
|
// Mock detectFileType to add artificial delay to simulate I/O
|
||||||
|
const detectFileTypeSpy = vi.spyOn(
|
||||||
|
await import('../utils/fileUtils.js'),
|
||||||
|
'detectFileType',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create files
|
||||||
|
const fileCount = 4;
|
||||||
|
const files = createMultipleFiles(fileCount, 'Batch test');
|
||||||
|
|
||||||
|
// Mock with 100ms delay per file to simulate I/O operations
|
||||||
|
detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||||
|
return 'text';
|
||||||
|
});
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
const params = { paths: files };
|
||||||
|
const result = await tool.execute(params, new AbortController().signal);
|
||||||
|
const endTime = Date.now();
|
||||||
|
|
||||||
|
const processingTime = endTime - startTime;
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Processing time: ${processingTime}ms for ${fileCount} files`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify parallel processing performance improvement
|
||||||
|
// Parallel processing should complete in ~100ms (single file time)
|
||||||
|
// Sequential would take ~400ms (4 files × 100ms each)
|
||||||
|
expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
|
||||||
|
|
||||||
|
// Verify all files were processed
|
||||||
|
const content = result.llmContent as string[];
|
||||||
|
expect(content).toHaveLength(fileCount);
|
||||||
|
|
||||||
|
// Cleanup mock
|
||||||
|
detectFileTypeSpy.mockRestore();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle batch processing errors gracefully', async () => {
|
||||||
|
// Create mix of valid and problematic files
|
||||||
|
createFile('valid1.txt', 'Valid content 1');
|
||||||
|
createFile('valid2.txt', 'Valid content 2');
|
||||||
|
createFile('valid3.txt', 'Valid content 3');
|
||||||
|
|
||||||
|
const params = {
|
||||||
|
paths: [
|
||||||
|
'valid1.txt',
|
||||||
|
'valid2.txt',
|
||||||
|
'nonexistent-file.txt', // This will fail
|
||||||
|
'valid3.txt',
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await tool.execute(params, new AbortController().signal);
|
||||||
|
const content = result.llmContent as string[];
|
||||||
|
|
||||||
|
// Should successfully process valid files despite one failure
|
||||||
|
expect(content.length).toBeGreaterThanOrEqual(3);
|
||||||
|
expect(result.returnDisplay).toContain('Successfully read');
|
||||||
|
|
||||||
|
// Verify valid files were processed
|
||||||
|
const expectedPath1 = path.join(tempRootDir, 'valid1.txt');
|
||||||
|
const expectedPath3 = path.join(tempRootDir, 'valid3.txt');
|
||||||
|
expect(content.some((c) => c.includes(expectedPath1))).toBe(true);
|
||||||
|
expect(content.some((c) => c.includes(expectedPath3))).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should execute file operations concurrently', async () => {
|
||||||
|
// Track execution order to verify concurrency
|
||||||
|
const executionOrder: string[] = [];
|
||||||
|
const detectFileTypeSpy = vi.spyOn(
|
||||||
|
await import('../utils/fileUtils.js'),
|
||||||
|
'detectFileType',
|
||||||
|
);
|
||||||
|
|
||||||
|
const files = ['file1.txt', 'file2.txt', 'file3.txt'];
|
||||||
|
files.forEach((file) => createFile(file, 'test content'));
|
||||||
|
|
||||||
|
// Mock to track concurrent vs sequential execution
|
||||||
|
detectFileTypeSpy.mockImplementation(async (filePath: string) => {
|
||||||
|
const fileName = filePath.split('/').pop() || '';
|
||||||
|
executionOrder.push(`start:${fileName}`);
|
||||||
|
|
||||||
|
// Add delay to make timing differences visible
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||||
|
|
||||||
|
executionOrder.push(`end:${fileName}`);
|
||||||
|
return 'text';
|
||||||
|
});
|
||||||
|
|
||||||
|
await tool.execute({ paths: files }, new AbortController().signal);
|
||||||
|
|
||||||
|
console.log('Execution order:', executionOrder);
|
||||||
|
|
||||||
|
// Verify concurrent execution pattern
|
||||||
|
// In parallel execution: all "start:" events should come before all "end:" events
|
||||||
|
// In sequential execution: "start:file1", "end:file1", "start:file2", "end:file2", etc.
|
||||||
|
|
||||||
|
const startEvents = executionOrder.filter((e) =>
|
||||||
|
e.startsWith('start:'),
|
||||||
|
).length;
|
||||||
|
const firstEndIndex = executionOrder.findIndex((e) =>
|
||||||
|
e.startsWith('end:'),
|
||||||
|
);
|
||||||
|
const startsBeforeFirstEnd = executionOrder
|
||||||
|
.slice(0, firstEndIndex)
|
||||||
|
.filter((e) => e.startsWith('start:')).length;
|
||||||
|
|
||||||
|
// For parallel processing, ALL start events should happen before the first end event
|
||||||
|
expect(startsBeforeFirstEnd).toBe(startEvents); // Should PASS with parallel implementation
|
||||||
|
|
||||||
|
detectFileTypeSpy.mockRestore();
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -70,6 +70,27 @@ export interface ReadManyFilesParams {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result type for file processing operations
|
||||||
|
*/
|
||||||
|
type FileProcessingResult =
|
||||||
|
| {
|
||||||
|
success: true;
|
||||||
|
filePath: string;
|
||||||
|
relativePathForDisplay: string;
|
||||||
|
fileReadResult: NonNullable<
|
||||||
|
Awaited<ReturnType<typeof processSingleFileContent>>
|
||||||
|
>;
|
||||||
|
reason?: undefined;
|
||||||
|
}
|
||||||
|
| {
|
||||||
|
success: false;
|
||||||
|
filePath: string;
|
||||||
|
relativePathForDisplay: string;
|
||||||
|
fileReadResult?: undefined;
|
||||||
|
reason: string;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default exclusion patterns for commonly ignored directories and binary file types.
|
* Default exclusion patterns for commonly ignored directories and binary file types.
|
||||||
* These are compatible with glob ignore patterns.
|
* These are compatible with glob ignore patterns.
|
||||||
|
@ -413,66 +434,124 @@ Use this tool when the user's query implies needing the content of several files
|
||||||
|
|
||||||
const sortedFiles = Array.from(filesToConsider).sort();
|
const sortedFiles = Array.from(filesToConsider).sort();
|
||||||
|
|
||||||
for (const filePath of sortedFiles) {
|
const fileProcessingPromises = sortedFiles.map(
|
||||||
const relativePathForDisplay = path
|
async (filePath): Promise<FileProcessingResult> => {
|
||||||
.relative(this.config.getTargetDir(), filePath)
|
try {
|
||||||
.replace(/\\/g, '/');
|
const relativePathForDisplay = path
|
||||||
|
.relative(this.config.getTargetDir(), filePath)
|
||||||
|
.replace(/\\/g, '/');
|
||||||
|
|
||||||
const fileType = await detectFileType(filePath);
|
const fileType = await detectFileType(filePath);
|
||||||
|
|
||||||
if (fileType === 'image' || fileType === 'pdf') {
|
if (fileType === 'image' || fileType === 'pdf') {
|
||||||
const fileExtension = path.extname(filePath).toLowerCase();
|
const fileExtension = path.extname(filePath).toLowerCase();
|
||||||
const fileNameWithoutExtension = path.basename(filePath, fileExtension);
|
const fileNameWithoutExtension = path.basename(
|
||||||
const requestedExplicitly = inputPatterns.some(
|
filePath,
|
||||||
(pattern: string) =>
|
fileExtension,
|
||||||
pattern.toLowerCase().includes(fileExtension) ||
|
);
|
||||||
pattern.includes(fileNameWithoutExtension),
|
const requestedExplicitly = inputPatterns.some(
|
||||||
);
|
(pattern: string) =>
|
||||||
|
pattern.toLowerCase().includes(fileExtension) ||
|
||||||
|
pattern.includes(fileNameWithoutExtension),
|
||||||
|
);
|
||||||
|
|
||||||
if (!requestedExplicitly) {
|
if (!requestedExplicitly) {
|
||||||
skippedFiles.push({
|
return {
|
||||||
path: relativePathForDisplay,
|
success: false,
|
||||||
reason:
|
filePath,
|
||||||
'asset file (image/pdf) was not explicitly requested by name or extension',
|
relativePathForDisplay,
|
||||||
});
|
reason:
|
||||||
continue;
|
'asset file (image/pdf) was not explicitly requested by name or extension',
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Use processSingleFileContent for all file types now
|
// Use processSingleFileContent for all file types now
|
||||||
const fileReadResult = await processSingleFileContent(
|
const fileReadResult = await processSingleFileContent(
|
||||||
filePath,
|
|
||||||
this.config.getTargetDir(),
|
|
||||||
);
|
|
||||||
|
|
||||||
if (fileReadResult.error) {
|
|
||||||
skippedFiles.push({
|
|
||||||
path: relativePathForDisplay,
|
|
||||||
reason: `Read error: ${fileReadResult.error}`,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
if (typeof fileReadResult.llmContent === 'string') {
|
|
||||||
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
|
|
||||||
'{filePath}',
|
|
||||||
filePath,
|
filePath,
|
||||||
|
this.config.getTargetDir(),
|
||||||
);
|
);
|
||||||
contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
|
|
||||||
} else {
|
if (fileReadResult.error) {
|
||||||
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
|
return {
|
||||||
|
success: false,
|
||||||
|
filePath,
|
||||||
|
relativePathForDisplay,
|
||||||
|
reason: `Read error: ${fileReadResult.error}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
filePath,
|
||||||
|
relativePathForDisplay,
|
||||||
|
fileReadResult,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
const relativePathForDisplay = path
|
||||||
|
.relative(this.config.getTargetDir(), filePath)
|
||||||
|
.replace(/\\/g, '/');
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
filePath,
|
||||||
|
relativePathForDisplay,
|
||||||
|
reason: `Unexpected error: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
processedFilesRelativePaths.push(relativePathForDisplay);
|
},
|
||||||
const lines =
|
);
|
||||||
typeof fileReadResult.llmContent === 'string'
|
|
||||||
? fileReadResult.llmContent.split('\n').length
|
const results = await Promise.allSettled(fileProcessingPromises);
|
||||||
: undefined;
|
|
||||||
const mimetype = getSpecificMimeType(filePath);
|
for (const result of results) {
|
||||||
recordFileOperationMetric(
|
if (result.status === 'fulfilled') {
|
||||||
this.config,
|
const fileResult = result.value;
|
||||||
FileOperation.READ,
|
|
||||||
lines,
|
if (!fileResult.success) {
|
||||||
mimetype,
|
// Handle skipped files (images/PDFs not requested or read errors)
|
||||||
path.extname(filePath),
|
skippedFiles.push({
|
||||||
);
|
path: fileResult.relativePathForDisplay,
|
||||||
|
reason: fileResult.reason,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Handle successfully processed files
|
||||||
|
const { filePath, relativePathForDisplay, fileReadResult } =
|
||||||
|
fileResult;
|
||||||
|
|
||||||
|
if (typeof fileReadResult.llmContent === 'string') {
|
||||||
|
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
|
||||||
|
'{filePath}',
|
||||||
|
filePath,
|
||||||
|
);
|
||||||
|
contentParts.push(
|
||||||
|
`${separator}\n\n${fileReadResult.llmContent}\n\n`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
|
||||||
|
}
|
||||||
|
|
||||||
|
processedFilesRelativePaths.push(relativePathForDisplay);
|
||||||
|
|
||||||
|
const lines =
|
||||||
|
typeof fileReadResult.llmContent === 'string'
|
||||||
|
? fileReadResult.llmContent.split('\n').length
|
||||||
|
: undefined;
|
||||||
|
const mimetype = getSpecificMimeType(filePath);
|
||||||
|
recordFileOperationMetric(
|
||||||
|
this.config,
|
||||||
|
FileOperation.READ,
|
||||||
|
lines,
|
||||||
|
mimetype,
|
||||||
|
path.extname(filePath),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Handle Promise rejection (unexpected errors)
|
||||||
|
skippedFiles.push({
|
||||||
|
path: 'unknown',
|
||||||
|
reason: `Unexpected error: ${result.reason}`,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue