From 0275ab0108e9a411d90d8ef8c8d70e21e498d81a Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Date: Wed, 2 Jul 2025 00:52:32 +0530 Subject: [PATCH] feat: add audio and video support to read_file (#2556) --- .../core/src/tools/read-many-files.test.ts | 27 ++++++++++++ packages/core/src/utils/fileUtils.test.ts | 28 +++++++++++++ packages/core/src/utils/fileUtils.ts | 41 +++++++++++++++---- 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/packages/core/src/tools/read-many-files.test.ts b/packages/core/src/tools/read-many-files.test.ts index 666c484c..697b7d1b 100644 --- a/packages/core/src/tools/read-many-files.test.ts +++ b/packages/core/src/tools/read-many-files.test.ts @@ -14,6 +14,33 @@ import fs from 'fs'; // Actual fs for setup import os from 'os'; import { Config } from '../config/config.js'; +vi.mock('mime-types', () => { + const lookup = (filename: string) => { + if (filename.endsWith('.ts') || filename.endsWith('.js')) { + return 'text/plain'; + } + if (filename.endsWith('.png')) { + return 'image/png'; + } + if (filename.endsWith('.pdf')) { + return 'application/pdf'; + } + if (filename.endsWith('.mp3') || filename.endsWith('.wav')) { + return 'audio/mpeg'; + } + if (filename.endsWith('.mp4') || filename.endsWith('.mov')) { + return 'video/mp4'; + } + return false; + }; + return { + default: { + lookup, + }, + lookup, + }; +}); + describe('ReadManyFilesTool', () => { let tool: ReadManyFilesTool; let tempRootDir: string; diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index 4f4c7c1e..0455b6e1 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -211,6 +211,16 @@ describe('fileUtils', () => { expect(detectFileType('file.pdf')).toBe('pdf'); }); + it('should detect audio type by extension', () => { + mockMimeLookup.mockReturnValueOnce('audio/mpeg'); + expect(detectFileType('song.mp3')).toBe('audio'); + }); + + it('should detect video type by extension', () => { + mockMimeLookup.mockReturnValueOnce('video/mp4'); + expect(detectFileType('movie.mp4')).toBe('video'); + }); + it('should detect known binary extensions as binary (e.g. .zip)', () => { mockMimeLookup.mockReturnValueOnce('application/zip'); expect(detectFileType('archive.zip')).toBe('binary'); @@ -427,5 +437,23 @@ describe('fileUtils', () => { ); expect(result.isTruncated).toBe(true); }); + + it('should return an error if the file size exceeds 20MB', async () => { + // Create a file just over 20MB + const twentyOneMB = 21 * 1024 * 1024; + const buffer = Buffer.alloc(twentyOneMB, 0x61); // Fill with 'a' + actualNodeFs.writeFileSync(testTextFilePath, buffer); + + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + ); + + expect(result.error).toContain('File size exceeds the 20MB limit'); + expect(result.returnDisplay).toContain( + 'File size exceeds the 20MB limit', + ); + expect(result.llmContent).toContain('File size exceeds the 20MB limit'); + }); }); }); diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index cb4d333a..5a05d513 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -94,19 +94,27 @@ export function isBinaryFile(filePath: string): boolean { /** * Detects the type of file based on extension and content. * @param filePath Path to the file. - * @returns 'text', 'image', 'pdf', or 'binary'. + * @returns 'text', 'image', 'pdf', 'audio', 'video', or 'binary'. */ export function detectFileType( filePath: string, -): 'text' | 'image' | 'pdf' | 'binary' { +): 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' { const ext = path.extname(filePath).toLowerCase(); const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string - if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) { - return 'image'; - } - if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') { - return 'pdf'; + if (lookedUpMimeType) { + if (lookedUpMimeType.startsWith('image/')) { + return 'image'; + } + if (lookedUpMimeType.startsWith('audio/')) { + return 'audio'; + } + if (lookedUpMimeType.startsWith('video/')) { + return 'video'; + } + if (lookedUpMimeType === 'application/pdf') { + return 'pdf'; + } } // Stricter binary check for common non-text extensions before content check @@ -187,7 +195,7 @@ export async function processSingleFileContent( error: `File not found: ${filePath}`, }; } - const stats = fs.statSync(filePath); // Sync check + const stats = await fs.promises.stat(filePath); if (stats.isDirectory()) { return { llmContent: '', @@ -196,6 +204,19 @@ export async function processSingleFileContent( }; } + const fileSizeInBytes = stats.size; + // 20MB limit + const maxFileSize = 20 * 1024 * 1024; + + if (fileSizeInBytes > maxFileSize) { + throw new Error( + `File size exceeds the 20MB limit: ${filePath} (${( + fileSizeInBytes / + (1024 * 1024) + ).toFixed(2)}MB)`, + ); + } + const fileType = detectFileType(filePath); const relativePathForDisplay = path .relative(rootDirectory, filePath) @@ -253,7 +274,9 @@ export async function processSingleFileContent( }; } case 'image': - case 'pdf': { + case 'pdf': + case 'audio': + case 'video': { const contentBuffer = await fs.promises.readFile(filePath); const base64Data = contentBuffer.toString('base64'); return {