diff --git a/packages/core/src/tools/read-many-files.ts b/packages/core/src/tools/read-many-files.ts index 7c3be6e3..7ea6c722 100644 --- a/packages/core/src/tools/read-many-files.ts +++ b/packages/core/src/tools/read-many-files.ts @@ -405,7 +405,7 @@ Use this tool when the user's query implies needing the content of several files .relative(this.config.getTargetDir(), filePath) .replace(/\\/g, '/'); - const fileType = detectFileType(filePath); + const fileType = await detectFileType(filePath); if (fileType === 'image' || fileType === 'pdf') { const fileExtension = path.extname(filePath).toLowerCase(); diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index 78a5ab4c..34aff79b 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -142,41 +142,41 @@ describe('fileUtils', () => { } }); - it('should return false for an empty file', () => { + it('should return false for an empty file', async () => { actualNodeFs.writeFileSync(filePathForBinaryTest, ''); - expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); }); - it('should return false for a typical text file', () => { + it('should return false for a typical text file', async () => { actualNodeFs.writeFileSync( filePathForBinaryTest, 'Hello, world!\nThis is a test file with normal text content.', ); - expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); }); - it('should return true for a file with many null bytes', () => { + it('should return true for a file with many null bytes', async () => { const binaryContent = Buffer.from([ 0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, ]); // "He\0llo\0\0\0\0\0" actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); - expect(isBinaryFile(filePathForBinaryTest)).toBe(true); + expect(await isBinaryFile(filePathForBinaryTest)).toBe(true); }); - it('should return true for a file with high percentage of non-printable ASCII', () => { + it('should return true for a file with high percentage of non-printable ASCII', async () => { const binaryContent = Buffer.from([ 0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06, ]); // AB\x01\x02\x03\x04\x05CD\x06 actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); - expect(isBinaryFile(filePathForBinaryTest)).toBe(true); + expect(await isBinaryFile(filePathForBinaryTest)).toBe(true); }); - it('should return false if file access fails (e.g., ENOENT)', () => { + it('should return false if file access fails (e.g., ENOENT)', async () => { // Ensure the file does not exist if (actualNodeFs.existsSync(filePathForBinaryTest)) { actualNodeFs.unlinkSync(filePathForBinaryTest); } - expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + expect(await isBinaryFile(filePathForBinaryTest)).toBe(false); }); }); @@ -196,64 +196,64 @@ describe('fileUtils', () => { vi.restoreAllMocks(); // Restore spies on actualNodeFs }); - it('should detect typescript type by extension (ts)', () => { - expect(detectFileType('file.ts')).toBe('text'); - expect(detectFileType('file.test.ts')).toBe('text'); + it('should detect typescript type by extension (ts)', async () => { + expect(await detectFileType('file.ts')).toBe('text'); + expect(await detectFileType('file.test.ts')).toBe('text'); }); - it('should detect image type by extension (png)', () => { + it('should detect image type by extension (png)', async () => { mockMimeLookup.mockReturnValueOnce('image/png'); - expect(detectFileType('file.png')).toBe('image'); + expect(await detectFileType('file.png')).toBe('image'); }); - it('should detect image type by extension (jpeg)', () => { + it('should detect image type by extension (jpeg)', async () => { mockMimeLookup.mockReturnValueOnce('image/jpeg'); - expect(detectFileType('file.jpg')).toBe('image'); + expect(await detectFileType('file.jpg')).toBe('image'); }); - it('should detect svg type by extension', () => { - expect(detectFileType('image.svg')).toBe('svg'); - expect(detectFileType('image.icon.svg')).toBe('svg'); + it('should detect svg type by extension', async () => { + expect(await detectFileType('image.svg')).toBe('svg'); + expect(await detectFileType('image.icon.svg')).toBe('svg'); }); - it('should detect pdf type by extension', () => { + it('should detect pdf type by extension', async () => { mockMimeLookup.mockReturnValueOnce('application/pdf'); - expect(detectFileType('file.pdf')).toBe('pdf'); + expect(await detectFileType('file.pdf')).toBe('pdf'); }); - it('should detect audio type by extension', () => { + it('should detect audio type by extension', async () => { mockMimeLookup.mockReturnValueOnce('audio/mpeg'); - expect(detectFileType('song.mp3')).toBe('audio'); + expect(await detectFileType('song.mp3')).toBe('audio'); }); - it('should detect video type by extension', () => { + it('should detect video type by extension', async () => { mockMimeLookup.mockReturnValueOnce('video/mp4'); - expect(detectFileType('movie.mp4')).toBe('video'); + expect(await detectFileType('movie.mp4')).toBe('video'); }); - it('should detect known binary extensions as binary (e.g. .zip)', () => { + it('should detect known binary extensions as binary (e.g. .zip)', async () => { mockMimeLookup.mockReturnValueOnce('application/zip'); - expect(detectFileType('archive.zip')).toBe('binary'); + expect(await detectFileType('archive.zip')).toBe('binary'); }); - it('should detect known binary extensions as binary (e.g. .exe)', () => { + it('should detect known binary extensions as binary (e.g. .exe)', async () => { mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe - expect(detectFileType('app.exe')).toBe('binary'); + expect(await detectFileType('app.exe')).toBe('binary'); }); - it('should use isBinaryFile for unknown extensions and detect as binary', () => { + it('should use isBinaryFile for unknown extensions and detect as binary', async () => { mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type // Create a file that isBinaryFile will identify as binary const binaryContent = Buffer.from([ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, ]); actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent); - expect(detectFileType(filePathForDetectTest)).toBe('binary'); + expect(await detectFileType(filePathForDetectTest)).toBe('binary'); }); - it('should default to text if mime type is unknown and content is not binary', () => { + it('should default to text if mime type is unknown and content is not binary', async () => { mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type // filePathForDetectTest is already a text file by default from beforeEach - expect(detectFileType(filePathForDetectTest)).toBe('text'); + expect(await detectFileType(filePathForDetectTest)).toBe('text'); }); }); diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 33eda6ab..1489c4ee 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -4,8 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -import fs from 'fs'; -import path from 'path'; +import fs from 'node:fs'; +import path from 'node:path'; import { PartUnion } from '@google/genai'; import mime from 'mime-types'; @@ -56,22 +56,24 @@ export function isWithinRoot( /** * Determines if a file is likely binary based on content sampling. * @param filePath Path to the file. - * @returns True if the file appears to be binary. + * @returns Promise that resolves to true if the file appears to be binary. */ -export function isBinaryFile(filePath: string): boolean { +export async function isBinaryFile(filePath: string): Promise { + let fileHandle: fs.promises.FileHandle | undefined; try { - const fd = fs.openSync(filePath, 'r'); + fileHandle = await fs.promises.open(filePath, 'r'); + // Read up to 4KB or file size, whichever is smaller - const fileSize = fs.fstatSync(fd).size; + const stats = await fileHandle.stat(); + const fileSize = stats.size; if (fileSize === 0) { // Empty file is not considered binary for content checking - fs.closeSync(fd); return false; } const bufferSize = Math.min(4096, fileSize); const buffer = Buffer.alloc(bufferSize); - const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0); - fs.closeSync(fd); + const result = await fileHandle.read(buffer, 0, buffer.length, 0); + const bytesRead = result.bytesRead; if (bytesRead === 0) return false; @@ -84,21 +86,40 @@ export function isBinaryFile(filePath: string): boolean { } // If >30% non-printable characters, consider it binary return nonPrintableCount / bytesRead > 0.3; - } catch { + } catch (error) { + // Log error for debugging while maintaining existing behavior + console.warn( + `Failed to check if file is binary: ${filePath}`, + error instanceof Error ? error.message : String(error), + ); // If any error occurs (e.g. file not found, permissions), // treat as not binary here; let higher-level functions handle existence/access errors. return false; + } finally { + // Safely close the file handle if it was successfully opened + if (fileHandle) { + try { + await fileHandle.close(); + } catch (closeError) { + // Log close errors for debugging while continuing with cleanup + console.warn( + `Failed to close file handle for: ${filePath}`, + closeError instanceof Error ? closeError.message : String(closeError), + ); + // The important thing is that we attempted to clean up + } + } } } /** * Detects the type of file based on extension and content. * @param filePath Path to the file. - * @returns 'text', 'image', 'pdf', 'audio', 'video', or 'binary'. + * @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'. */ -export function detectFileType( +export async function detectFileType( filePath: string, -): 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg' { +): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> { const ext = path.extname(filePath).toLowerCase(); // The mimetype for "ts" is MPEG transport stream (a video format) but we want @@ -166,7 +187,7 @@ export function detectFileType( // Fallback to content-based check if mime type wasn't conclusive for image/pdf // and it's not a known binary extension. - if (isBinaryFile(filePath)) { + if (await isBinaryFile(filePath)) { return 'binary'; } @@ -227,7 +248,7 @@ export async function processSingleFileContent( ); } - const fileType = detectFileType(filePath); + const fileType = await detectFileType(filePath); const relativePathForDisplay = path .relative(rootDirectory, filePath) .replace(/\\/g, '/');