/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import fs from 'node:fs'; import path from 'node:path'; import { PartUnion } from '@google/genai'; import mime from 'mime-types'; // Constants for text file processing const DEFAULT_MAX_LINES_TEXT_FILE = 2000; const MAX_LINE_LENGTH_TEXT_FILE = 2000; // Default values for encoding and separator format export const DEFAULT_ENCODING: BufferEncoding = 'utf-8'; /** * Looks up the specific MIME type for a file path. * @param filePath Path to the file. * @returns The specific MIME type string (e.g., 'text/python', 'application/javascript') or undefined if not found or ambiguous. */ export function getSpecificMimeType(filePath: string): string | undefined { const lookedUpMime = mime.lookup(filePath); return typeof lookedUpMime === 'string' ? lookedUpMime : undefined; } /** * Checks if a path is within a given root directory. * @param pathToCheck The absolute path to check. * @param rootDirectory The absolute root directory. * @returns True if the path is within the root directory, false otherwise. */ export function isWithinRoot( pathToCheck: string, rootDirectory: string, ): boolean { const normalizedPathToCheck = path.resolve(pathToCheck); const normalizedRootDirectory = path.resolve(rootDirectory); // Ensure the rootDirectory path ends with a separator for correct startsWith comparison, // unless it's the root path itself (e.g., '/' or 'C:\'). const rootWithSeparator = normalizedRootDirectory === path.sep || normalizedRootDirectory.endsWith(path.sep) ? normalizedRootDirectory : normalizedRootDirectory + path.sep; return ( normalizedPathToCheck === normalizedRootDirectory || normalizedPathToCheck.startsWith(rootWithSeparator) ); } /** * Determines if a file is likely binary based on content sampling. * @param filePath Path to the file. * @returns Promise that resolves to true if the file appears to be binary. */ export async function isBinaryFile(filePath: string): Promise { let fileHandle: fs.promises.FileHandle | undefined; try { fileHandle = await fs.promises.open(filePath, 'r'); // Read up to 4KB or file size, whichever is smaller const stats = await fileHandle.stat(); const fileSize = stats.size; if (fileSize === 0) { // Empty file is not considered binary for content checking return false; } const bufferSize = Math.min(4096, fileSize); const buffer = Buffer.alloc(bufferSize); const result = await fileHandle.read(buffer, 0, buffer.length, 0); const bytesRead = result.bytesRead; if (bytesRead === 0) return false; let nonPrintableCount = 0; for (let i = 0; i < bytesRead; i++) { if (buffer[i] === 0) return true; // Null byte is a strong indicator if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) { nonPrintableCount++; } } // If >30% non-printable characters, consider it binary return nonPrintableCount / bytesRead > 0.3; } catch (error) { // Log error for debugging while maintaining existing behavior console.warn( `Failed to check if file is binary: ${filePath}`, error instanceof Error ? error.message : String(error), ); // If any error occurs (e.g. file not found, permissions), // treat as not binary here; let higher-level functions handle existence/access errors. return false; } finally { // Safely close the file handle if it was successfully opened if (fileHandle) { try { await fileHandle.close(); } catch (closeError) { // Log close errors for debugging while continuing with cleanup console.warn( `Failed to close file handle for: ${filePath}`, closeError instanceof Error ? closeError.message : String(closeError), ); // The important thing is that we attempted to clean up } } } } /** * Detects the type of file based on extension and content. * @param filePath Path to the file. * @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'. */ export async function detectFileType( filePath: string, ): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> { const ext = path.extname(filePath).toLowerCase(); // The mimetype for "ts" is MPEG transport stream (a video format) but we want // to assume these are typescript files instead. if (ext === '.ts') { return 'text'; } if (ext === '.svg') { return 'svg'; } const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string if (lookedUpMimeType) { if (lookedUpMimeType.startsWith('image/')) { return 'image'; } if (lookedUpMimeType.startsWith('audio/')) { return 'audio'; } if (lookedUpMimeType.startsWith('video/')) { return 'video'; } if (lookedUpMimeType === 'application/pdf') { return 'pdf'; } } // Stricter binary check for common non-text extensions before content check // These are often not well-covered by mime-types or might be misidentified. if ( [ '.zip', '.tar', '.gz', '.exe', '.dll', '.so', '.class', '.jar', '.war', '.7z', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp', '.bin', '.dat', '.obj', '.o', '.a', '.lib', '.wasm', '.pyc', '.pyo', ].includes(ext) ) { return 'binary'; } // Fall back to content-based check if mime type wasn't conclusive for image/pdf // and it's not a known binary extension. if (await isBinaryFile(filePath)) { return 'binary'; } return 'text'; } export interface ProcessedFileReadResult { llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary returnDisplay: string; error?: string; // Optional error message for the LLM if file processing failed isTruncated?: boolean; // For text files, indicates if content was truncated originalLineCount?: number; // For text files linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display) } /** * Reads and processes a single file, handling text, images, and PDFs. * @param filePath Absolute path to the file. * @param rootDirectory Absolute path to the project root for relative path display. * @param offset Optional offset for text files (0-based line number). * @param limit Optional limit for text files (number of lines to read). * @returns ProcessedFileReadResult object. */ export async function processSingleFileContent( filePath: string, rootDirectory: string, offset?: number, limit?: number, ): Promise { try { if (!fs.existsSync(filePath)) { // Sync check is acceptable before async read return { llmContent: '', returnDisplay: 'File not found.', error: `File not found: ${filePath}`, }; } const stats = await fs.promises.stat(filePath); if (stats.isDirectory()) { return { llmContent: '', returnDisplay: 'Path is a directory.', error: `Path is a directory, not a file: ${filePath}`, }; } const fileSizeInBytes = stats.size; // 20MB limit const maxFileSize = 20 * 1024 * 1024; if (fileSizeInBytes > maxFileSize) { throw new Error( `File size exceeds the 20MB limit: ${filePath} (${( fileSizeInBytes / (1024 * 1024) ).toFixed(2)}MB)`, ); } const fileType = await detectFileType(filePath); const relativePathForDisplay = path .relative(rootDirectory, filePath) .replace(/\\/g, '/'); switch (fileType) { case 'binary': { return { llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`, returnDisplay: `Skipped binary file: ${relativePathForDisplay}`, }; } case 'svg': { const SVG_MAX_SIZE_BYTES = 1 * 1024 * 1024; if (stats.size > SVG_MAX_SIZE_BYTES) { return { llmContent: `Cannot display content of SVG file larger than 1MB: ${relativePathForDisplay}`, returnDisplay: `Skipped large SVG file (>1MB): ${relativePathForDisplay}`, }; } const content = await fs.promises.readFile(filePath, 'utf8'); return { llmContent: content, returnDisplay: `Read SVG as text: ${relativePathForDisplay}`, }; } case 'text': { const content = await fs.promises.readFile(filePath, 'utf8'); const lines = content.split('\n'); const originalLineCount = lines.length; const startLine = offset || 0; const effectiveLimit = limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit; // Ensure endLine does not exceed originalLineCount const endLine = Math.min(startLine + effectiveLimit, originalLineCount); // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high const actualStartLine = Math.min(startLine, originalLineCount); const selectedLines = lines.slice(actualStartLine, endLine); let linesWereTruncatedInLength = false; const formattedLines = selectedLines.map((line) => { if (line.length > MAX_LINE_LENGTH_TEXT_FILE) { linesWereTruncatedInLength = true; return ( line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]' ); } return line; }); const contentRangeTruncated = startLine > 0 || endLine < originalLineCount; const isTruncated = contentRangeTruncated || linesWereTruncatedInLength; let llmTextContent = ''; if (contentRangeTruncated) { llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`; } else if (linesWereTruncatedInLength) { llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`; } llmTextContent += formattedLines.join('\n'); // By default, return nothing to streamline the common case of a successful read_file. let returnDisplay = ''; if (contentRangeTruncated) { returnDisplay = `Read lines ${ actualStartLine + 1 }-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`; if (linesWereTruncatedInLength) { returnDisplay += ' (some lines were shortened)'; } } else if (linesWereTruncatedInLength) { returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`; } return { llmContent: llmTextContent, returnDisplay, isTruncated, originalLineCount, linesShown: [actualStartLine + 1, endLine], }; } case 'image': case 'pdf': case 'audio': case 'video': { const contentBuffer = await fs.promises.readFile(filePath); const base64Data = contentBuffer.toString('base64'); return { llmContent: { inlineData: { data: base64Data, mimeType: mime.lookup(filePath) || 'application/octet-stream', }, }, returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`, }; } default: { // Should not happen with current detectFileType logic const exhaustiveCheck: never = fileType; return { llmContent: `Unhandled file type: ${exhaustiveCheck}`, returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`, error: `Unhandled file type for ${filePath}`, }; } } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); const displayPath = path .relative(rootDirectory, filePath) .replace(/\\/g, '/'); return { llmContent: `Error reading file ${displayPath}: ${errorMessage}`, returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`, error: `Error reading file ${filePath}: ${errorMessage}`, }; } }