This commit is contained in:
parent
f4d077cc1f
commit
45b764943a
|
@ -405,7 +405,7 @@ Use this tool when the user's query implies needing the content of several files
|
||||||
.relative(this.config.getTargetDir(), filePath)
|
.relative(this.config.getTargetDir(), filePath)
|
||||||
.replace(/\\/g, '/');
|
.replace(/\\/g, '/');
|
||||||
|
|
||||||
const fileType = detectFileType(filePath);
|
const fileType = await detectFileType(filePath);
|
||||||
|
|
||||||
if (fileType === 'image' || fileType === 'pdf') {
|
if (fileType === 'image' || fileType === 'pdf') {
|
||||||
const fileExtension = path.extname(filePath).toLowerCase();
|
const fileExtension = path.extname(filePath).toLowerCase();
|
||||||
|
|
|
@ -142,41 +142,41 @@ describe('fileUtils', () => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return false for an empty file', () => {
|
it('should return false for an empty file', async () => {
|
||||||
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
|
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
|
||||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return false for a typical text file', () => {
|
it('should return false for a typical text file', async () => {
|
||||||
actualNodeFs.writeFileSync(
|
actualNodeFs.writeFileSync(
|
||||||
filePathForBinaryTest,
|
filePathForBinaryTest,
|
||||||
'Hello, world!\nThis is a test file with normal text content.',
|
'Hello, world!\nThis is a test file with normal text content.',
|
||||||
);
|
);
|
||||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return true for a file with many null bytes', () => {
|
it('should return true for a file with many null bytes', async () => {
|
||||||
const binaryContent = Buffer.from([
|
const binaryContent = Buffer.from([
|
||||||
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
]); // "He\0llo\0\0\0\0\0"
|
]); // "He\0llo\0\0\0\0\0"
|
||||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return true for a file with high percentage of non-printable ASCII', () => {
|
it('should return true for a file with high percentage of non-printable ASCII', async () => {
|
||||||
const binaryContent = Buffer.from([
|
const binaryContent = Buffer.from([
|
||||||
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
|
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
|
||||||
]); // AB\x01\x02\x03\x04\x05CD\x06
|
]); // AB\x01\x02\x03\x04\x05CD\x06
|
||||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return false if file access fails (e.g., ENOENT)', () => {
|
it('should return false if file access fails (e.g., ENOENT)', async () => {
|
||||||
// Ensure the file does not exist
|
// Ensure the file does not exist
|
||||||
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
||||||
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
||||||
}
|
}
|
||||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -196,64 +196,64 @@ describe('fileUtils', () => {
|
||||||
vi.restoreAllMocks(); // Restore spies on actualNodeFs
|
vi.restoreAllMocks(); // Restore spies on actualNodeFs
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect typescript type by extension (ts)', () => {
|
it('should detect typescript type by extension (ts)', async () => {
|
||||||
expect(detectFileType('file.ts')).toBe('text');
|
expect(await detectFileType('file.ts')).toBe('text');
|
||||||
expect(detectFileType('file.test.ts')).toBe('text');
|
expect(await detectFileType('file.test.ts')).toBe('text');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect image type by extension (png)', () => {
|
it('should detect image type by extension (png)', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('image/png');
|
mockMimeLookup.mockReturnValueOnce('image/png');
|
||||||
expect(detectFileType('file.png')).toBe('image');
|
expect(await detectFileType('file.png')).toBe('image');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect image type by extension (jpeg)', () => {
|
it('should detect image type by extension (jpeg)', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('image/jpeg');
|
mockMimeLookup.mockReturnValueOnce('image/jpeg');
|
||||||
expect(detectFileType('file.jpg')).toBe('image');
|
expect(await detectFileType('file.jpg')).toBe('image');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect svg type by extension', () => {
|
it('should detect svg type by extension', async () => {
|
||||||
expect(detectFileType('image.svg')).toBe('svg');
|
expect(await detectFileType('image.svg')).toBe('svg');
|
||||||
expect(detectFileType('image.icon.svg')).toBe('svg');
|
expect(await detectFileType('image.icon.svg')).toBe('svg');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect pdf type by extension', () => {
|
it('should detect pdf type by extension', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('application/pdf');
|
mockMimeLookup.mockReturnValueOnce('application/pdf');
|
||||||
expect(detectFileType('file.pdf')).toBe('pdf');
|
expect(await detectFileType('file.pdf')).toBe('pdf');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect audio type by extension', () => {
|
it('should detect audio type by extension', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('audio/mpeg');
|
mockMimeLookup.mockReturnValueOnce('audio/mpeg');
|
||||||
expect(detectFileType('song.mp3')).toBe('audio');
|
expect(await detectFileType('song.mp3')).toBe('audio');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect video type by extension', () => {
|
it('should detect video type by extension', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('video/mp4');
|
mockMimeLookup.mockReturnValueOnce('video/mp4');
|
||||||
expect(detectFileType('movie.mp4')).toBe('video');
|
expect(await detectFileType('movie.mp4')).toBe('video');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should detect known binary extensions as binary (e.g. .zip)', () => {
|
it('should detect known binary extensions as binary (e.g. .zip)', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('application/zip');
|
mockMimeLookup.mockReturnValueOnce('application/zip');
|
||||||
expect(detectFileType('archive.zip')).toBe('binary');
|
expect(await detectFileType('archive.zip')).toBe('binary');
|
||||||
});
|
});
|
||||||
it('should detect known binary extensions as binary (e.g. .exe)', () => {
|
it('should detect known binary extensions as binary (e.g. .exe)', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
|
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
|
||||||
expect(detectFileType('app.exe')).toBe('binary');
|
expect(await detectFileType('app.exe')).toBe('binary');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should use isBinaryFile for unknown extensions and detect as binary', () => {
|
it('should use isBinaryFile for unknown extensions and detect as binary', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||||
// Create a file that isBinaryFile will identify as binary
|
// Create a file that isBinaryFile will identify as binary
|
||||||
const binaryContent = Buffer.from([
|
const binaryContent = Buffer.from([
|
||||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||||
]);
|
]);
|
||||||
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
|
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
|
||||||
expect(detectFileType(filePathForDetectTest)).toBe('binary');
|
expect(await detectFileType(filePathForDetectTest)).toBe('binary');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should default to text if mime type is unknown and content is not binary', () => {
|
it('should default to text if mime type is unknown and content is not binary', async () => {
|
||||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||||
// filePathForDetectTest is already a text file by default from beforeEach
|
// filePathForDetectTest is already a text file by default from beforeEach
|
||||||
expect(detectFileType(filePathForDetectTest)).toBe('text');
|
expect(await detectFileType(filePathForDetectTest)).toBe('text');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import fs from 'fs';
|
import fs from 'node:fs';
|
||||||
import path from 'path';
|
import path from 'node:path';
|
||||||
import { PartUnion } from '@google/genai';
|
import { PartUnion } from '@google/genai';
|
||||||
import mime from 'mime-types';
|
import mime from 'mime-types';
|
||||||
|
|
||||||
|
@ -56,22 +56,24 @@ export function isWithinRoot(
|
||||||
/**
|
/**
|
||||||
* Determines if a file is likely binary based on content sampling.
|
* Determines if a file is likely binary based on content sampling.
|
||||||
* @param filePath Path to the file.
|
* @param filePath Path to the file.
|
||||||
* @returns True if the file appears to be binary.
|
* @returns Promise that resolves to true if the file appears to be binary.
|
||||||
*/
|
*/
|
||||||
export function isBinaryFile(filePath: string): boolean {
|
export async function isBinaryFile(filePath: string): Promise<boolean> {
|
||||||
|
let fileHandle: fs.promises.FileHandle | undefined;
|
||||||
try {
|
try {
|
||||||
const fd = fs.openSync(filePath, 'r');
|
fileHandle = await fs.promises.open(filePath, 'r');
|
||||||
|
|
||||||
// Read up to 4KB or file size, whichever is smaller
|
// Read up to 4KB or file size, whichever is smaller
|
||||||
const fileSize = fs.fstatSync(fd).size;
|
const stats = await fileHandle.stat();
|
||||||
|
const fileSize = stats.size;
|
||||||
if (fileSize === 0) {
|
if (fileSize === 0) {
|
||||||
// Empty file is not considered binary for content checking
|
// Empty file is not considered binary for content checking
|
||||||
fs.closeSync(fd);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const bufferSize = Math.min(4096, fileSize);
|
const bufferSize = Math.min(4096, fileSize);
|
||||||
const buffer = Buffer.alloc(bufferSize);
|
const buffer = Buffer.alloc(bufferSize);
|
||||||
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
|
const result = await fileHandle.read(buffer, 0, buffer.length, 0);
|
||||||
fs.closeSync(fd);
|
const bytesRead = result.bytesRead;
|
||||||
|
|
||||||
if (bytesRead === 0) return false;
|
if (bytesRead === 0) return false;
|
||||||
|
|
||||||
|
@ -84,21 +86,40 @@ export function isBinaryFile(filePath: string): boolean {
|
||||||
}
|
}
|
||||||
// If >30% non-printable characters, consider it binary
|
// If >30% non-printable characters, consider it binary
|
||||||
return nonPrintableCount / bytesRead > 0.3;
|
return nonPrintableCount / bytesRead > 0.3;
|
||||||
} catch {
|
} catch (error) {
|
||||||
|
// Log error for debugging while maintaining existing behavior
|
||||||
|
console.warn(
|
||||||
|
`Failed to check if file is binary: ${filePath}`,
|
||||||
|
error instanceof Error ? error.message : String(error),
|
||||||
|
);
|
||||||
// If any error occurs (e.g. file not found, permissions),
|
// If any error occurs (e.g. file not found, permissions),
|
||||||
// treat as not binary here; let higher-level functions handle existence/access errors.
|
// treat as not binary here; let higher-level functions handle existence/access errors.
|
||||||
return false;
|
return false;
|
||||||
|
} finally {
|
||||||
|
// Safely close the file handle if it was successfully opened
|
||||||
|
if (fileHandle) {
|
||||||
|
try {
|
||||||
|
await fileHandle.close();
|
||||||
|
} catch (closeError) {
|
||||||
|
// Log close errors for debugging while continuing with cleanup
|
||||||
|
console.warn(
|
||||||
|
`Failed to close file handle for: ${filePath}`,
|
||||||
|
closeError instanceof Error ? closeError.message : String(closeError),
|
||||||
|
);
|
||||||
|
// The important thing is that we attempted to clean up
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Detects the type of file based on extension and content.
|
* Detects the type of file based on extension and content.
|
||||||
* @param filePath Path to the file.
|
* @param filePath Path to the file.
|
||||||
* @returns 'text', 'image', 'pdf', 'audio', 'video', or 'binary'.
|
* @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
|
||||||
*/
|
*/
|
||||||
export function detectFileType(
|
export async function detectFileType(
|
||||||
filePath: string,
|
filePath: string,
|
||||||
): 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg' {
|
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
|
||||||
const ext = path.extname(filePath).toLowerCase();
|
const ext = path.extname(filePath).toLowerCase();
|
||||||
|
|
||||||
// The mimetype for "ts" is MPEG transport stream (a video format) but we want
|
// The mimetype for "ts" is MPEG transport stream (a video format) but we want
|
||||||
|
@ -166,7 +187,7 @@ export function detectFileType(
|
||||||
|
|
||||||
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
|
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
|
||||||
// and it's not a known binary extension.
|
// and it's not a known binary extension.
|
||||||
if (isBinaryFile(filePath)) {
|
if (await isBinaryFile(filePath)) {
|
||||||
return 'binary';
|
return 'binary';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,7 +248,7 @@ export async function processSingleFileContent(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileType = detectFileType(filePath);
|
const fileType = await detectFileType(filePath);
|
||||||
const relativePathForDisplay = path
|
const relativePathForDisplay = path
|
||||||
.relative(rootDirectory, filePath)
|
.relative(rootDirectory, filePath)
|
||||||
.replace(/\\/g, '/');
|
.replace(/\\/g, '/');
|
||||||
|
|
Loading…
Reference in New Issue