diff --git a/.vscode/launch.json b/.vscode/launch.json index b7976fd6..ccdb9d02 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -83,6 +83,66 @@ "internalConsoleOptions": "neverOpen", "skipFiles": ["/**"] }, + { + "type": "node", + "request": "launch", + "name": "Debug Server Test: read-file", + "runtimeExecutable": "npm", + "runtimeArgs": [ + "run", + "test", + "-w", + "packages/server", + "--", + "--inspect-brk=9229", + "--no-file-parallelism", + "${workspaceFolder}/packages/server/src/tools/read-file.test.ts" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen", + "skipFiles": ["/**"] + }, + { + "type": "node", + "request": "launch", + "name": "Debug Server Test: turn", + "runtimeExecutable": "npm", + "runtimeArgs": [ + "run", + "test", + "-w", + "packages/server", + "--", + "--inspect-brk=9229", + "--no-file-parallelism", + "${workspaceFolder}/packages/server/src/core/turn.test.ts" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen", + "skipFiles": ["/**"] + }, + { + "type": "node", + "request": "launch", + "name": "Debug Server Test: fileUtils", + "runtimeExecutable": "npm", + "runtimeArgs": [ + "run", + "test", + "-w", + "packages/server", + "--", + "--inspect-brk=9229", + "--no-file-parallelism", + "${workspaceFolder}/packages/server/src/utils/fileUtils.test.ts" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen", + "skipFiles": ["/**"] + }, { "type": "node", "request": "launch", @@ -122,6 +182,26 @@ "console": "integratedTerminal", "internalConsoleOptions": "neverOpen", "skipFiles": ["/**"] + }, + { + "type": "node", + "request": "launch", + "name": "Debug CLI Test: useGeminiStream", + "runtimeExecutable": "npm", + "runtimeArgs": [ + "run", + "test", + "-w", + "packages/cli", + "--", + "--inspect-brk=9229", + "--no-file-parallelism", + "${workspaceFolder}/packages/cli/src/ui/hooks/useGeminiStream.test.tsx" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen", + "skipFiles": ["/**"] } ] } diff --git a/docs/tools/file-system.md b/docs/tools/file-system.md index ae006e53..99289641 100644 --- a/docs/tools/file-system.md +++ b/docs/tools/file-system.md @@ -25,17 +25,20 @@ All file system tools operate within a `rootDirectory` (usually the current work - **Tool Name:** `read_file` - **Display Name:** ReadFile - **File:** `read-file.ts` -- **Description:** Reads and returns the content of a specified file. It can handle large files by allowing reading of specific line ranges and will attempt to detect and skip binary files. +- **Description:** Reads and returns the content of a specified file. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped. - **Parameters:** - `path` (string, required): The absolute path to the file to read. - - `offset` (number, optional): The 0-based line number to start reading from. Requires `limit` to be set. - - `limit` (number, optional): The maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines). + - `offset` (number, optional): For text files, the 0-based line number to start reading from. Requires `limit` to be set. + - `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible. - **Behavior:** - - Returns the content of the specified text file. - - If `offset` and `limit` are used, returns only that slice of lines. - - Indicates if the content was truncated due to line limits or line length limits. - - Attempts to identify binary files (images, executables) and returns a message indicating it's a binary file instead of its content. -- **Output (`llmContent`):** The file content, potentially prefixed with a truncation message (e.g., `[File truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). For binary files: `Binary file: /path/to/image.png (image)`. + - For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits. + - For image and PDF files: Returns the file content as a base64 encoded data structure suitable for model consumption. + - For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file. +- **Output:** (`llmContent`): + - For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). + - For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). + - For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`. +- **Confirmation:** No. - **Confirmation:** No. ## 3. `write_file` (WriteFile) diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx new file mode 100644 index 00000000..6959d9a7 --- /dev/null +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -0,0 +1,137 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { mergePartListUnions } from './useGeminiStream.js'; +import { Part, PartListUnion } from '@google/genai'; + +// Mock useToolScheduler +vi.mock('./useToolScheduler', async () => { + const actual = await vi.importActual('./useToolScheduler'); + return { + ...actual, // We need mapToDisplay from actual + useToolScheduler: vi.fn(), + }; +}); + +describe('mergePartListUnions', () => { + it('should merge multiple PartListUnion arrays', () => { + const list1: PartListUnion = [{ text: 'Hello' }]; + const list2: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: 'abc' } }, + ]; + const list3: PartListUnion = [{ text: 'World' }, { text: '!' }]; + const result = mergePartListUnions([list1, list2, list3]); + expect(result).toEqual([ + { text: 'Hello' }, + { inlineData: { mimeType: 'image/png', data: 'abc' } }, + { text: 'World' }, + { text: '!' }, + ]); + }); + + it('should handle empty arrays in the input list', () => { + const list1: PartListUnion = [{ text: 'First' }]; + const list2: PartListUnion = []; + const list3: PartListUnion = [{ text: 'Last' }]; + const result = mergePartListUnions([list1, list2, list3]); + expect(result).toEqual([{ text: 'First' }, { text: 'Last' }]); + }); + + it('should handle a single PartListUnion array', () => { + const list1: PartListUnion = [ + { text: 'One' }, + { inlineData: { mimeType: 'image/jpeg', data: 'xyz' } }, + ]; + const result = mergePartListUnions([list1]); + expect(result).toEqual(list1); + }); + + it('should return an empty array if all input arrays are empty', () => { + const list1: PartListUnion = []; + const list2: PartListUnion = []; + const result = mergePartListUnions([list1, list2]); + expect(result).toEqual([]); + }); + + it('should handle input list being empty', () => { + const result = mergePartListUnions([]); + expect(result).toEqual([]); + }); + + it('should correctly merge when PartListUnion items are single Parts not in arrays', () => { + const part1: Part = { text: 'Single part 1' }; + const part2: Part = { inlineData: { mimeType: 'image/gif', data: 'gif' } }; + const listContainingSingleParts: PartListUnion[] = [ + part1, + [part2], + { text: 'Another single part' }, + ]; + const result = mergePartListUnions(listContainingSingleParts); + expect(result).toEqual([ + { text: 'Single part 1' }, + { inlineData: { mimeType: 'image/gif', data: 'gif' } }, + { text: 'Another single part' }, + ]); + }); + + it('should handle a mix of arrays and single parts, including empty arrays and undefined/null parts if they were possible (though PartListUnion typing restricts this)', () => { + const list1: PartListUnion = [{ text: 'A' }]; + const list2: PartListUnion = []; + const part3: Part = { text: 'B' }; + const list4: PartListUnion = [ + { text: 'C' }, + { inlineData: { mimeType: 'text/plain', data: 'D' } }, + ]; + const result = mergePartListUnions([list1, list2, part3, list4]); + expect(result).toEqual([ + { text: 'A' }, + { text: 'B' }, + { text: 'C' }, + { inlineData: { mimeType: 'text/plain', data: 'D' } }, + ]); + }); + + it('should preserve the order of parts from the input arrays', () => { + const listA: PartListUnion = [{ text: '1' }, { text: '2' }]; + const listB: PartListUnion = [{ text: '3' }]; + const listC: PartListUnion = [{ text: '4' }, { text: '5' }]; + const result = mergePartListUnions([listA, listB, listC]); + expect(result).toEqual([ + { text: '1' }, + { text: '2' }, + { text: '3' }, + { text: '4' }, + { text: '5' }, + ]); + }); + + it('should handle cases where some PartListUnion items are single Parts and others are arrays of Parts', () => { + const singlePart1: Part = { text: 'First single' }; + const arrayPart1: Part[] = [ + { text: 'Array item 1' }, + { text: 'Array item 2' }, + ]; + const singlePart2: Part = { + inlineData: { mimeType: 'application/json', data: 'e30=' }, + }; // {} + const arrayPart2: Part[] = [{ text: 'Last array item' }]; + + const result = mergePartListUnions([ + singlePart1, + arrayPart1, + singlePart2, + arrayPart2, + ]); + expect(result).toEqual([ + { text: 'First single' }, + { text: 'Array item 1' }, + { text: 'Array item 2' }, + { inlineData: { mimeType: 'application/json', data: 'e30=' } }, + { text: 'Last array item' }, + ]); + }); +}); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index d91eea3d..afaf0ccd 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -41,6 +41,18 @@ import { useLogger } from './useLogger.js'; import { useToolScheduler, mapToDisplay } from './useToolScheduler.js'; import { GeminiChat } from '@gemini-code/server/src/core/geminiChat.js'; +export function mergePartListUnions(list: PartListUnion[]): PartListUnion { + const resultParts: PartListUnion = []; + for (const item of list) { + if (Array.isArray(item)) { + resultParts.push(...item); + } else { + resultParts.push(item); + } + } + return resultParts; +} + enum StreamProcessingStatus { Completed, UserCancelled, @@ -74,16 +86,16 @@ export const useGeminiStream = ( (tools) => { if (tools.length) { addItem(mapToDisplay(tools), Date.now()); - submitQuery( - tools - .filter( - (t) => - t.status === 'error' || - t.status === 'cancelled' || - t.status === 'success', - ) - .map((t) => t.response.responsePart), - ); + const toolResponses = tools + .filter( + (t) => + t.status === 'error' || + t.status === 'cancelled' || + t.status === 'success', + ) + .map((t) => t.response.responseParts); + + submitQuery(mergePartListUnions(toolResponses)); } }, config, @@ -313,7 +325,7 @@ export const useGeminiStream = ( }; const responseInfo: ToolCallResponseInfo = { callId: request.callId, - responsePart: functionResponse, + responseParts: functionResponse, resultDisplay, error: new Error(declineMessage), }; diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts new file mode 100644 index 00000000..10ba4f28 --- /dev/null +++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts @@ -0,0 +1,126 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { formatLlmContentForFunctionResponse } from './useToolScheduler.js'; +import { Part, PartListUnion } from '@google/genai'; + +describe('formatLlmContentForFunctionResponse', () => { + it('should handle simple string llmContent', () => { + const llmContent = 'Simple text output'; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ output: 'Simple text output' }); + expect(additionalParts).toEqual([]); + }); + + it('should handle llmContent as a single Part with text', () => { + const llmContent: Part = { text: 'Text from Part object' }; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ output: 'Text from Part object' }); + expect(additionalParts).toEqual([]); + }); + + it('should handle llmContent as a PartListUnion array with a single text Part', () => { + const llmContent: PartListUnion = [{ text: 'Text from array' }]; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ output: 'Text from array' }); + expect(additionalParts).toEqual([]); + }); + + it('should handle llmContent with inlineData', () => { + const llmContent: Part = { + inlineData: { mimeType: 'image/png', data: 'base64...' }, + }; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Binary content of type image/png was processed.', + }); + expect(additionalParts).toEqual([llmContent]); + }); + + it('should handle llmContent with fileData', () => { + const llmContent: Part = { + fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, + }; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Binary content of type application/pdf was processed.', + }); + expect(additionalParts).toEqual([llmContent]); + }); + + it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => { + const llmContent: PartListUnion = [ + { text: 'Some textual description' }, + { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } }, + { text: 'Another text part' }, + ]; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Tool execution succeeded.', + }); + expect(additionalParts).toEqual(llmContent); + }); + + it('should handle llmContent as an array with a single inlineData Part', () => { + const llmContent: PartListUnion = [ + { inlineData: { mimeType: 'image/gif', data: 'gifdata...' } }, + ]; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + // When the array is a single Part and that part is inlineData + expect(functionResponseJson).toEqual({ + status: 'Binary content of type image/gif was processed.', + }); + expect(additionalParts).toEqual(llmContent); + }); + + it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => { + // This case might represent a malformed or unexpected Part type. + // For example, a Part that is just an empty object or has other properties. + const llmContent: Part = { functionCall: { name: 'test', args: {} } }; // Example of a non-standard part for this context + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Tool execution succeeded.', + }); + expect(additionalParts).toEqual([llmContent]); + }); + + it('should handle empty string llmContent', () => { + const llmContent = ''; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ output: '' }); + expect(additionalParts).toEqual([]); + }); + + it('should handle llmContent as an empty array', () => { + const llmContent: PartListUnion = []; + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Tool execution succeeded.', + }); + expect(additionalParts).toEqual([]); + }); + + it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => { + const llmContent: Part = {}; // An empty part object + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(llmContent); + expect(functionResponseJson).toEqual({ + status: 'Tool execution succeeded.', + }); + expect(additionalParts).toEqual([llmContent]); + }); +}); diff --git a/packages/cli/src/ui/hooks/useToolScheduler.ts b/packages/cli/src/ui/hooks/useToolScheduler.ts index 7d8cfbe4..e6e80785 100644 --- a/packages/cli/src/ui/hooks/useToolScheduler.ts +++ b/packages/cli/src/ui/hooks/useToolScheduler.ts @@ -13,7 +13,7 @@ import { ToolCallConfirmationDetails, ToolResult, } from '@gemini-code/server'; -import { Part } from '@google/genai'; +import { Part, PartUnion, PartListUnion } from '@google/genai'; import { useCallback, useEffect, useState } from 'react'; import { HistoryItemToolGroup, @@ -88,6 +88,60 @@ export type CompletedToolCall = | CancelledToolCall | ErroredToolCall; +/** + * Formats a PartListUnion response from a tool into JSON suitable for a Gemini + * FunctionResponse and additional Parts to include after that response. + * + * This is required because FunctionReponse appears to only support JSON + * and not arbitrary parts. Including parts like inlineData or fileData + * directly in a FunctionResponse confuses the model resulting in a failure + * to interpret the multimodal content and context window exceeded errors. + */ + +export function formatLlmContentForFunctionResponse( + llmContent: PartListUnion, +): { + functionResponseJson: Record; + additionalParts: PartUnion[]; +} { + const additionalParts: PartUnion[] = []; + let functionResponseJson: Record; + + if (Array.isArray(llmContent) && llmContent.length === 1) { + // Ensure that length 1 arrays are treated as a single Part. + llmContent = llmContent[0]; + } + + if (typeof llmContent === 'string') { + functionResponseJson = { output: llmContent }; + } else if (Array.isArray(llmContent)) { + functionResponseJson = { status: 'Tool execution succeeded.' }; + additionalParts.push(...llmContent); + } else { + if ( + llmContent.inlineData !== undefined || + llmContent.fileData !== undefined + ) { + // For Parts like inlineData or fileData, use the returnDisplay as the textual output for the functionResponse. + // The actual Part will be added to additionalParts. + functionResponseJson = { + status: `Binary content of type ${llmContent.inlineData?.mimeType || llmContent.fileData?.mimeType || 'unknown'} was processed.`, + }; + additionalParts.push(llmContent); + } else if (llmContent.text !== undefined) { + functionResponseJson = { output: llmContent.text }; + } else { + functionResponseJson = { status: 'Tool execution succeeded.' }; + additionalParts.push(llmContent); + } + } + + return { + functionResponseJson, + additionalParts, + }; +} + export function useToolScheduler( onComplete: (tools: CompletedToolCall[]) => void, config: Config, @@ -201,7 +255,7 @@ export function useToolScheduler( status: 'cancelled', response: { callId: c.request.callId, - responsePart: { + responseParts: { functionResponse: { id: c.request.callId, name: c.request.name, @@ -276,21 +330,24 @@ export function useToolScheduler( .execute(t.request.args, signal, onOutputChunk) .then((result: ToolResult) => { if (signal.aborted) { + // TODO(jacobr): avoid stringifying the LLM content. setToolCalls( setStatus(callId, 'cancelled', String(result.llmContent)), ); return; } + const { functionResponseJson, additionalParts } = + formatLlmContentForFunctionResponse(result.llmContent); const functionResponse: Part = { functionResponse: { name: t.request.name, id: callId, - response: { output: result.llmContent }, + response: functionResponseJson, }, }; const response: ToolCallResponseInfo = { callId, - responsePart: functionResponse, + responseParts: [functionResponse, ...additionalParts], resultDisplay: result.returnDisplay, error: undefined, }; @@ -401,7 +458,7 @@ function setStatus( status: 'cancelled', response: { callId: t.request.callId, - responsePart: { + responseParts: { functionResponse: { id: t.request.callId, name: t.request.name, @@ -446,7 +503,7 @@ const toolErrorResponse = ( ): ToolCallResponseInfo => ({ callId: request.callId, error, - responsePart: { + responseParts: { functionResponse: { id: request.callId, name: request.name, diff --git a/packages/server/src/core/turn.ts b/packages/server/src/core/turn.ts index 97e93f59..22b01cce 100644 --- a/packages/server/src/core/turn.ts +++ b/packages/server/src/core/turn.ts @@ -5,7 +5,6 @@ */ import { - Part, PartListUnion, GenerateContentResponse, FunctionCall, @@ -57,7 +56,7 @@ export interface ToolCallRequestInfo { export interface ToolCallResponseInfo { callId: string; - responsePart: Part; + responseParts: PartListUnion; resultDisplay: ToolResultDisplay | undefined; error: Error | undefined; } diff --git a/packages/server/src/tools/read-file.test.ts b/packages/server/src/tools/read-file.test.ts new file mode 100644 index 00000000..8ea42134 --- /dev/null +++ b/packages/server/src/tools/read-file.test.ts @@ -0,0 +1,228 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { vi, describe, it, expect, beforeEach, afterEach, Mock } from 'vitest'; +import { ReadFileTool, ReadFileToolParams } from './read-file.js'; +import * as fileUtils from '../utils/fileUtils.js'; +import path from 'path'; +import os from 'os'; +import fs from 'fs'; // For actual fs operations in setup + +// Mock fileUtils.processSingleFileContent +vi.mock('../utils/fileUtils', async () => { + const actualFileUtils = + await vi.importActual('../utils/fileUtils'); + return { + ...actualFileUtils, // Spread actual implementations + processSingleFileContent: vi.fn(), // Mock specific function + }; +}); + +const mockProcessSingleFileContent = fileUtils.processSingleFileContent as Mock; + +describe('ReadFileTool', () => { + let tempRootDir: string; + let tool: ReadFileTool; + const abortSignal = new AbortController().signal; + + beforeEach(() => { + // Create a unique temporary root directory for each test run + tempRootDir = fs.mkdtempSync( + path.join(os.tmpdir(), 'read-file-tool-root-'), + ); + tool = new ReadFileTool(tempRootDir); + mockProcessSingleFileContent.mockReset(); + }); + + afterEach(() => { + // Clean up the temporary root directory + if (fs.existsSync(tempRootDir)) { + fs.rmSync(tempRootDir, { recursive: true, force: true }); + } + }); + + describe('validateToolParams', () => { + it('should return null for valid params (absolute path within root)', () => { + const params: ReadFileToolParams = { + path: path.join(tempRootDir, 'test.txt'), + }; + expect(tool.validateToolParams(params)).toBeNull(); + }); + + it('should return null for valid params with offset and limit', () => { + const params: ReadFileToolParams = { + path: path.join(tempRootDir, 'test.txt'), + offset: 0, + limit: 10, + }; + expect(tool.validateToolParams(params)).toBeNull(); + }); + + it('should return error for relative path', () => { + const params: ReadFileToolParams = { path: 'test.txt' }; + expect(tool.validateToolParams(params)).toMatch( + /File path must be absolute/, + ); + }); + + it('should return error for path outside root', () => { + const outsidePath = path.resolve(os.tmpdir(), 'outside-root.txt'); + const params: ReadFileToolParams = { path: outsidePath }; + expect(tool.validateToolParams(params)).toMatch( + /File path must be within the root directory/, + ); + }); + + it('should return error for negative offset', () => { + const params: ReadFileToolParams = { + path: path.join(tempRootDir, 'test.txt'), + offset: -1, + limit: 10, + }; + expect(tool.validateToolParams(params)).toBe( + 'Offset must be a non-negative number', + ); + }); + + it('should return error for non-positive limit', () => { + const paramsZero: ReadFileToolParams = { + path: path.join(tempRootDir, 'test.txt'), + offset: 0, + limit: 0, + }; + expect(tool.validateToolParams(paramsZero)).toBe( + 'Limit must be a positive number', + ); + const paramsNegative: ReadFileToolParams = { + path: path.join(tempRootDir, 'test.txt'), + offset: 0, + limit: -5, + }; + expect(tool.validateToolParams(paramsNegative)).toBe( + 'Limit must be a positive number', + ); + }); + + it('should return error for schema validation failure (e.g. missing path)', () => { + const params = { offset: 0 } as unknown as ReadFileToolParams; + expect(tool.validateToolParams(params)).toBe( + 'Parameters failed schema validation.', + ); + }); + }); + + describe('getDescription', () => { + it('should return a shortened, relative path', () => { + const filePath = path.join(tempRootDir, 'sub', 'dir', 'file.txt'); + const params: ReadFileToolParams = { path: filePath }; + // Assuming tempRootDir is something like /tmp/read-file-tool-root-XXXXXX + // The relative path would be sub/dir/file.txt + expect(tool.getDescription(params)).toBe('sub/dir/file.txt'); + }); + + it('should return . if path is the root directory', () => { + const params: ReadFileToolParams = { path: tempRootDir }; + expect(tool.getDescription(params)).toBe('.'); + }); + }); + + describe('execute', () => { + it('should return validation error if params are invalid', async () => { + const params: ReadFileToolParams = { path: 'relative/path.txt' }; + const result = await tool.execute(params, abortSignal); + expect(result.llmContent).toMatch(/Error: Invalid parameters provided/); + expect(result.returnDisplay).toMatch(/File path must be absolute/); + }); + + it('should return error from processSingleFileContent if it fails', async () => { + const filePath = path.join(tempRootDir, 'error.txt'); + const params: ReadFileToolParams = { path: filePath }; + const errorMessage = 'Simulated read error'; + mockProcessSingleFileContent.mockResolvedValue({ + llmContent: `Error reading file ${filePath}: ${errorMessage}`, + returnDisplay: `Error reading file ${filePath}: ${errorMessage}`, + error: errorMessage, + }); + + const result = await tool.execute(params, abortSignal); + expect(mockProcessSingleFileContent).toHaveBeenCalledWith( + filePath, + tempRootDir, + undefined, + undefined, + ); + expect(result.llmContent).toContain(errorMessage); + expect(result.returnDisplay).toContain(errorMessage); + }); + + it('should return success result for a text file', async () => { + const filePath = path.join(tempRootDir, 'textfile.txt'); + const fileContent = 'This is a test file.'; + const params: ReadFileToolParams = { path: filePath }; + mockProcessSingleFileContent.mockResolvedValue({ + llmContent: fileContent, + returnDisplay: `Read text file: ${path.basename(filePath)}`, + }); + + const result = await tool.execute(params, abortSignal); + expect(mockProcessSingleFileContent).toHaveBeenCalledWith( + filePath, + tempRootDir, + undefined, + undefined, + ); + expect(result.llmContent).toBe(fileContent); + expect(result.returnDisplay).toBe( + `Read text file: ${path.basename(filePath)}`, + ); + }); + + it('should return success result for an image file', async () => { + const filePath = path.join(tempRootDir, 'image.png'); + const imageData = { + inlineData: { mimeType: 'image/png', data: 'base64...' }, + }; + const params: ReadFileToolParams = { path: filePath }; + mockProcessSingleFileContent.mockResolvedValue({ + llmContent: imageData, + returnDisplay: `Read image file: ${path.basename(filePath)}`, + }); + + const result = await tool.execute(params, abortSignal); + expect(mockProcessSingleFileContent).toHaveBeenCalledWith( + filePath, + tempRootDir, + undefined, + undefined, + ); + expect(result.llmContent).toEqual(imageData); + expect(result.returnDisplay).toBe( + `Read image file: ${path.basename(filePath)}`, + ); + }); + + it('should pass offset and limit to processSingleFileContent', async () => { + const filePath = path.join(tempRootDir, 'paginated.txt'); + const params: ReadFileToolParams = { + path: filePath, + offset: 10, + limit: 5, + }; + mockProcessSingleFileContent.mockResolvedValue({ + llmContent: 'some lines', + returnDisplay: 'Read text file (paginated)', + }); + + await tool.execute(params, abortSignal); + expect(mockProcessSingleFileContent).toHaveBeenCalledWith( + filePath, + tempRootDir, + 10, + 5, + ); + }); + }); +}); diff --git a/packages/server/src/tools/read-file.ts b/packages/server/src/tools/read-file.ts index de09161d..4bb3bd56 100644 --- a/packages/server/src/tools/read-file.ts +++ b/packages/server/src/tools/read-file.ts @@ -4,11 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import fs from 'fs'; import path from 'path'; import { SchemaValidator } from '../utils/schemaValidator.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { BaseTool, ToolResult } from './tools.js'; +import { isWithinRoot, processSingleFileContent } from '../utils/fileUtils.js'; /** * Parameters for the ReadFile tool @@ -35,14 +35,12 @@ export interface ReadFileToolParams { */ export class ReadFileTool extends BaseTool { static readonly Name: string = 'read_file'; - private static readonly DEFAULT_MAX_LINES = 2000; - private static readonly MAX_LINE_LENGTH = 2000; constructor(private rootDirectory: string) { super( ReadFileTool.Name, 'ReadFile', - 'Reads and returns the content of a specified file from the local filesystem. Handles large files by allowing reading specific line ranges.', + 'Reads and returns the content of a specified file from the local filesystem. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.', { properties: { path: { @@ -52,12 +50,12 @@ export class ReadFileTool extends BaseTool { }, offset: { description: - "Optional: The 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.", + "Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.", type: 'number', }, limit: { description: - "Optional: Maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible).", + "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).", type: 'number', }, }, @@ -68,28 +66,6 @@ export class ReadFileTool extends BaseTool { this.rootDirectory = path.resolve(rootDirectory); } - /** - * Checks if a path is within the root directory - * @param pathToCheck The path to check - * @returns True if the path is within the root directory, false otherwise - */ - private isWithinRoot(pathToCheck: string): boolean { - const normalizedPath = path.normalize(pathToCheck); - const normalizedRoot = path.normalize(this.rootDirectory); - const rootWithSep = normalizedRoot.endsWith(path.sep) - ? normalizedRoot - : normalizedRoot + path.sep; - return ( - normalizedPath === normalizedRoot || - normalizedPath.startsWith(rootWithSep) - ); - } - - /** - * Validates the parameters for the ReadFile tool - * @param params Parameters to validate - * @returns True if parameters are valid, false otherwise - */ validateToolParams(params: ReadFileToolParams): string | null { if ( this.schema.parameters && @@ -104,7 +80,7 @@ export class ReadFileTool extends BaseTool { if (!path.isAbsolute(filePath)) { return `File path must be absolute: ${filePath}`; } - if (!this.isWithinRoot(filePath)) { + if (!isWithinRoot(filePath, this.rootDirectory)) { return `File path must be within the root directory (${this.rootDirectory}): ${filePath}`; } if (params.offset !== undefined && params.offset < 0) { @@ -116,83 +92,11 @@ export class ReadFileTool extends BaseTool { return null; } - /** - * Determines if a file is likely binary based on content sampling - * @param filePath Path to the file - * @returns True if the file appears to be binary - */ - private isBinaryFile(filePath: string): boolean { - try { - // Read the first 4KB of the file - const fd = fs.openSync(filePath, 'r'); - const buffer = Buffer.alloc(4096); - const bytesRead = fs.readSync(fd, buffer, 0, 4096, 0); - fs.closeSync(fd); - - // Check for null bytes or high concentration of non-printable characters - let nonPrintableCount = 0; - for (let i = 0; i < bytesRead; i++) { - // Null byte is a strong indicator of binary data - if (buffer[i] === 0) { - return true; - } - - // Count non-printable characters - if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) { - nonPrintableCount++; - } - } - - // If more than 30% are non-printable, likely binary - return nonPrintableCount / bytesRead > 0.3; - } catch { - return false; - } - } - - /** - * Detects the type of file based on extension and content - * @param filePath Path to the file - * @returns File type description - */ - private detectFileType(filePath: string): string { - const ext = path.extname(filePath).toLowerCase(); - - // Common image formats - if ( - ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'].includes(ext) - ) { - return 'image'; - } - - // Other known binary formats - if (['.pdf', '.zip', '.tar', '.gz', '.exe', '.dll', '.so'].includes(ext)) { - return 'binary'; - } - - // Check content for binary indicators - if (this.isBinaryFile(filePath)) { - return 'binary'; - } - - return 'text'; - } - - /** - * Gets a description of the file reading operation - * @param params Parameters for the file reading - * @returns A string describing the file being read - */ getDescription(params: ReadFileToolParams): string { const relativePath = makeRelative(params.path, this.rootDirectory); return shortenPath(relativePath); } - /** - * Reads a file and returns its contents with line numbers - * @param params Parameters for the file reading - * @returns Result with file contents - */ async execute( params: ReadFileToolParams, _signal: AbortSignal, @@ -205,75 +109,23 @@ export class ReadFileTool extends BaseTool { }; } - const filePath = params.path; - try { - if (!fs.existsSync(filePath)) { - return { - llmContent: `File not found: ${filePath}`, - returnDisplay: `File not found.`, - }; - } + const result = await processSingleFileContent( + params.path, + this.rootDirectory, + params.offset, + params.limit, + ); - const stats = fs.statSync(filePath); - if (stats.isDirectory()) { - return { - llmContent: `Path is a directory, not a file: ${filePath}`, - returnDisplay: `File is directory.`, - }; - } - - const fileType = this.detectFileType(filePath); - if (fileType !== 'text') { - return { - llmContent: `Binary file: ${filePath} (${fileType})`, - // For binary files, maybe returnDisplay should be empty or indicate binary? - // Keeping it empty for now. - returnDisplay: ``, - }; - } - - const content = fs.readFileSync(filePath, 'utf8'); - const lines = content.split('\n'); - - const startLine = params.offset || 0; - const endLine = params.limit - ? startLine + params.limit - : Math.min(startLine + ReadFileTool.DEFAULT_MAX_LINES, lines.length); - const selectedLines = lines.slice(startLine, endLine); - - let truncated = false; - const formattedLines = selectedLines.map((line) => { - let processedLine = line; - if (line.length > ReadFileTool.MAX_LINE_LENGTH) { - processedLine = - line.substring(0, ReadFileTool.MAX_LINE_LENGTH) + '... [truncated]'; - truncated = true; - } - - return processedLine; - }); - - const contentTruncated = endLine < lines.length || truncated; - - let llmContent = ''; - if (contentTruncated) { - llmContent += `[File truncated: showing lines ${startLine + 1}-${endLine} of ${lines.length} total lines. Use offset parameter to view more.]\n`; - } - llmContent += formattedLines.join('\n'); - - // Here, returnDisplay could potentially be enhanced, but for now, - // it's kept empty as the LLM content itself is descriptive. + if (result.error) { return { - llmContent, - returnDisplay: '', - }; - } catch (error) { - const errorMsg = `Error reading file: ${error instanceof Error ? error.message : String(error)}`; - - return { - llmContent: `Error reading file ${filePath}: ${errorMsg}`, - returnDisplay: `Failed to read file: ${errorMsg}`, + llmContent: result.error, // The detailed error for LLM + returnDisplay: result.returnDisplay, // User-friendly error }; } + + return { + llmContent: result.llmContent, + returnDisplay: result.returnDisplay, + }; } } diff --git a/packages/server/src/tools/read-many-files.test.ts b/packages/server/src/tools/read-many-files.test.ts index ea801ddb..5c6d94fa 100644 --- a/packages/server/src/tools/read-many-files.test.ts +++ b/packages/server/src/tools/read-many-files.test.ts @@ -115,6 +115,33 @@ describe('ReadManyFilesTool', () => { }; expect(tool.validateParams(params)).toBeNull(); }); + + it('should return error if paths array contains an empty string', () => { + const params = { paths: ['file1.txt', ''] }; + expect(tool.validateParams(params)).toBe( + 'Each item in "paths" must be a non-empty string/glob pattern.', + ); + }); + + it('should return error if include array contains non-string elements', () => { + const params = { + paths: ['file1.txt'], + include: ['*.ts', 123] as string[], + }; + expect(tool.validateParams(params)).toBe( + 'If provided, "include" must be an array of strings/glob patterns.', + ); + }); + + it('should return error if exclude array contains non-string elements', () => { + const params = { + paths: ['file1.txt'], + exclude: ['*.log', {}] as string[], + }; + expect(tool.validateParams(params)).toBe( + 'If provided, "exclude" must be an array of strings/glob patterns.', + ); + }); }); describe('execute', () => { diff --git a/packages/server/src/tools/read-many-files.ts b/packages/server/src/tools/read-many-files.ts index b825de04..d826c9ba 100644 --- a/packages/server/src/tools/read-many-files.ts +++ b/packages/server/src/tools/read-many-files.ts @@ -7,13 +7,16 @@ import { BaseTool, ToolResult } from './tools.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; import { getErrorMessage } from '../utils/errors.js'; -import * as fs from 'fs/promises'; import * as path from 'path'; import fg from 'fast-glob'; import { GEMINI_MD_FILENAME } from './memoryTool.js'; - +import { + detectFileType, + processSingleFileContent, + DEFAULT_ENCODING, +} from '../utils/fileUtils.js'; import { PartListUnion } from '@google/genai'; -import mime from 'mime-types'; + /** * Parameters for the ReadManyFilesTool. */ @@ -98,8 +101,6 @@ const DEFAULT_EXCLUDES: string[] = [ `**/${GEMINI_MD_FILENAME}`, ]; -// Default values for encoding and separator format -const DEFAULT_ENCODING: BufferEncoding = 'utf-8'; const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---'; /** @@ -256,11 +257,10 @@ Use this tool when the user's query implies needing the content of several files } = params; const toolBaseDir = this.targetDir; - const filesToConsider = new Set(); const skippedFiles: Array<{ path: string; reason: string }> = []; const processedFilesRelativePaths: string[] = []; - const content: PartListUnion = []; + const contentParts: PartListUnion = []; const effectiveExcludes = useDefaultExcludes ? [...DEFAULT_EXCLUDES, ...exclude] @@ -315,69 +315,50 @@ Use this tool when the user's query implies needing the content of several files const relativePathForDisplay = path .relative(toolBaseDir, filePath) .replace(/\\/g, '/'); - try { - const mimeType = mime.lookup(filePath); - if ( - mimeType && - (mimeType.startsWith('image/') || mimeType === 'application/pdf') - ) { - const fileExtension = path.extname(filePath); - const fileNameWithoutExtension = path.basename( - filePath, - fileExtension, - ); - const requestedExplicitly = inputPatterns.some( - (pattern: string) => - pattern.toLowerCase().includes(fileExtension) || - pattern.includes(fileNameWithoutExtension), - ); - if (!requestedExplicitly) { - skippedFiles.push({ - path: relativePathForDisplay, - reason: - 'asset file (image/pdf) was not explicitly requested by name or extension', - }); - continue; - } - const contentBuffer = await fs.readFile(filePath); - const base64Data = contentBuffer.toString('base64'); - content.push({ - inlineData: { - data: base64Data, - mimeType, - }, + const fileType = detectFileType(filePath); + + if (fileType === 'image' || fileType === 'pdf') { + const fileExtension = path.extname(filePath).toLowerCase(); + const fileNameWithoutExtension = path.basename(filePath, fileExtension); + const requestedExplicitly = inputPatterns.some( + (pattern: string) => + pattern.toLowerCase().includes(fileExtension) || + pattern.includes(fileNameWithoutExtension), + ); + + if (!requestedExplicitly) { + skippedFiles.push({ + path: relativePathForDisplay, + reason: + 'asset file (image/pdf) was not explicitly requested by name or extension', }); - processedFilesRelativePaths.push(relativePathForDisplay); - } else { - const contentBuffer = await fs.readFile(filePath); - // Basic binary detection: check for null bytes in the first 1KB - const sample = contentBuffer.subarray( - 0, - Math.min(contentBuffer.length, 1024), - ); - if (sample.includes(0)) { - skippedFiles.push({ - path: relativePathForDisplay, - reason: 'appears to be binary', - }); - continue; - } - // Using default encoding - const fileContent = contentBuffer.toString(DEFAULT_ENCODING); - // Using default separator format + continue; + } + } + + // Use processSingleFileContent for all file types now + const fileReadResult = await processSingleFileContent( + filePath, + toolBaseDir, + ); + + if (fileReadResult.error) { + skippedFiles.push({ + path: relativePathForDisplay, + reason: `Read error: ${fileReadResult.error}`, + }); + } else { + if (typeof fileReadResult.llmContent === 'string') { const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace( '{filePath}', relativePathForDisplay, ); - content.push(`${separator}\n\n${fileContent}\n\n`); - processedFilesRelativePaths.push(relativePathForDisplay); + contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`); + } else { + contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf } - } catch (error) { - skippedFiles.push({ - path: relativePathForDisplay, - reason: `Read error: ${getErrorMessage(error)}`, - }); + processedFilesRelativePaths.push(relativePathForDisplay); } } @@ -422,13 +403,13 @@ Use this tool when the user's query implies needing the content of several files displayMessage += `No files were read and concatenated based on the criteria.\n`; } - if (content.length === 0) { - content.push( + if (contentParts.length === 0) { + contentParts.push( 'No files matching the criteria were found or all were skipped.', ); } return { - llmContent: content, + llmContent: contentParts, returnDisplay: displayMessage.trim(), }; } diff --git a/packages/server/src/utils/fileUtils.test.ts b/packages/server/src/utils/fileUtils.test.ts new file mode 100644 index 00000000..df912b0c --- /dev/null +++ b/packages/server/src/utils/fileUtils.test.ts @@ -0,0 +1,433 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + type Mock, +} from 'vitest'; + +import * as actualNodeFs from 'node:fs'; // For setup/teardown +import fsPromises from 'node:fs/promises'; +import path from 'node:path'; +import os from 'node:os'; +import mime from 'mime-types'; + +import { + isWithinRoot, + isBinaryFile, + detectFileType, + processSingleFileContent, +} from './fileUtils.js'; + +vi.mock('mime-types', () => ({ + default: { lookup: vi.fn() }, + lookup: vi.fn(), +})); + +const mockMimeLookup = mime.lookup as Mock; + +describe('fileUtils', () => { + let tempRootDir: string; + const originalProcessCwd = process.cwd; + + let testTextFilePath: string; + let testImageFilePath: string; + let testPdfFilePath: string; + let testBinaryFilePath: string; + let nonExistentFilePath: string; + let directoryPath: string; + + beforeEach(() => { + vi.resetAllMocks(); // Reset all mocks, including mime.lookup + + tempRootDir = actualNodeFs.mkdtempSync( + path.join(os.tmpdir(), 'fileUtils-test-'), + ); + process.cwd = vi.fn(() => tempRootDir); // Mock cwd if necessary for relative path logic within tests + + testTextFilePath = path.join(tempRootDir, 'test.txt'); + testImageFilePath = path.join(tempRootDir, 'image.png'); + testPdfFilePath = path.join(tempRootDir, 'document.pdf'); + testBinaryFilePath = path.join(tempRootDir, 'app.exe'); + nonExistentFilePath = path.join(tempRootDir, 'notfound.txt'); + directoryPath = path.join(tempRootDir, 'subdir'); + + actualNodeFs.mkdirSync(directoryPath, { recursive: true }); // Ensure subdir exists + }); + + afterEach(() => { + if (actualNodeFs.existsSync(tempRootDir)) { + actualNodeFs.rmSync(tempRootDir, { recursive: true, force: true }); + } + process.cwd = originalProcessCwd; + vi.restoreAllMocks(); // Restore any spies + }); + + describe('isWithinRoot', () => { + const root = path.resolve('/project/root'); + + it('should return true for paths directly within the root', () => { + expect(isWithinRoot(path.join(root, 'file.txt'), root)).toBe(true); + expect(isWithinRoot(path.join(root, 'subdir', 'file.txt'), root)).toBe( + true, + ); + }); + + it('should return true for the root path itself', () => { + expect(isWithinRoot(root, root)).toBe(true); + }); + + it('should return false for paths outside the root', () => { + expect( + isWithinRoot(path.resolve('/project/other', 'file.txt'), root), + ).toBe(false); + expect(isWithinRoot(path.resolve('/unrelated', 'file.txt'), root)).toBe( + false, + ); + }); + + it('should return false for paths that only partially match the root prefix', () => { + expect( + isWithinRoot( + path.resolve('/project/root-but-actually-different'), + root, + ), + ).toBe(false); + }); + + it('should handle paths with trailing slashes correctly', () => { + expect(isWithinRoot(path.join(root, 'file.txt') + path.sep, root)).toBe( + true, + ); + expect(isWithinRoot(root + path.sep, root)).toBe(true); + }); + + it('should handle different path separators (POSIX vs Windows)', () => { + const posixRoot = '/project/root'; + const posixPathInside = '/project/root/file.txt'; + const posixPathOutside = '/project/other/file.txt'; + expect(isWithinRoot(posixPathInside, posixRoot)).toBe(true); + expect(isWithinRoot(posixPathOutside, posixRoot)).toBe(false); + }); + + it('should return false for a root path that is a sub-path of the path to check', () => { + const pathToCheck = path.resolve('/project/root/sub'); + const rootSub = path.resolve('/project/root'); + expect(isWithinRoot(pathToCheck, rootSub)).toBe(true); + + const pathToCheckSuper = path.resolve('/project/root'); + const rootSuper = path.resolve('/project/root/sub'); + expect(isWithinRoot(pathToCheckSuper, rootSuper)).toBe(false); + }); + }); + + describe('isBinaryFile', () => { + let filePathForBinaryTest: string; + + beforeEach(() => { + filePathForBinaryTest = path.join(tempRootDir, 'binaryCheck.tmp'); + }); + + afterEach(() => { + if (actualNodeFs.existsSync(filePathForBinaryTest)) { + actualNodeFs.unlinkSync(filePathForBinaryTest); + } + }); + + it('should return false for an empty file', () => { + actualNodeFs.writeFileSync(filePathForBinaryTest, ''); + expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + }); + + it('should return false for a typical text file', () => { + actualNodeFs.writeFileSync( + filePathForBinaryTest, + 'Hello, world!\nThis is a test file with normal text content.', + ); + expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + }); + + it('should return true for a file with many null bytes', () => { + const binaryContent = Buffer.from([ + 0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, + ]); // "He\0llo\0\0\0\0\0" + actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); + expect(isBinaryFile(filePathForBinaryTest)).toBe(true); + }); + + it('should return true for a file with high percentage of non-printable ASCII', () => { + const binaryContent = Buffer.from([ + 0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06, + ]); // AB\x01\x02\x03\x04\x05CD\x06 + actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent); + expect(isBinaryFile(filePathForBinaryTest)).toBe(true); + }); + + it('should return false if file access fails (e.g., ENOENT)', () => { + // Ensure the file does not exist + if (actualNodeFs.existsSync(filePathForBinaryTest)) { + actualNodeFs.unlinkSync(filePathForBinaryTest); + } + expect(isBinaryFile(filePathForBinaryTest)).toBe(false); + }); + }); + + describe('detectFileType', () => { + let filePathForDetectTest: string; + + beforeEach(() => { + filePathForDetectTest = path.join(tempRootDir, 'detectType.tmp'); + // Default: create as a text file for isBinaryFile fallback + actualNodeFs.writeFileSync(filePathForDetectTest, 'Plain text content'); + }); + + afterEach(() => { + if (actualNodeFs.existsSync(filePathForDetectTest)) { + actualNodeFs.unlinkSync(filePathForDetectTest); + } + vi.restoreAllMocks(); // Restore spies on actualNodeFs + }); + + it('should detect image type by extension (png)', () => { + mockMimeLookup.mockReturnValueOnce('image/png'); + expect(detectFileType('file.png')).toBe('image'); + }); + + it('should detect image type by extension (jpeg)', () => { + mockMimeLookup.mockReturnValueOnce('image/jpeg'); + expect(detectFileType('file.jpg')).toBe('image'); + }); + + it('should detect pdf type by extension', () => { + mockMimeLookup.mockReturnValueOnce('application/pdf'); + expect(detectFileType('file.pdf')).toBe('pdf'); + }); + + it('should detect known binary extensions as binary (e.g. .zip)', () => { + mockMimeLookup.mockReturnValueOnce('application/zip'); + expect(detectFileType('archive.zip')).toBe('binary'); + }); + it('should detect known binary extensions as binary (e.g. .exe)', () => { + mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe + expect(detectFileType('app.exe')).toBe('binary'); + }); + + it('should use isBinaryFile for unknown extensions and detect as binary', () => { + mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type + // Create a file that isBinaryFile will identify as binary + const binaryContent = Buffer.from([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + ]); + actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent); + expect(detectFileType(filePathForDetectTest)).toBe('binary'); + }); + + it('should default to text if mime type is unknown and content is not binary', () => { + mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type + // filePathForDetectTest is already a text file by default from beforeEach + expect(detectFileType(filePathForDetectTest)).toBe('text'); + }); + }); + + describe('processSingleFileContent', () => { + beforeEach(() => { + // Ensure files exist for statSync checks before readFile might be mocked + if (actualNodeFs.existsSync(testTextFilePath)) + actualNodeFs.unlinkSync(testTextFilePath); + if (actualNodeFs.existsSync(testImageFilePath)) + actualNodeFs.unlinkSync(testImageFilePath); + if (actualNodeFs.existsSync(testPdfFilePath)) + actualNodeFs.unlinkSync(testPdfFilePath); + if (actualNodeFs.existsSync(testBinaryFilePath)) + actualNodeFs.unlinkSync(testBinaryFilePath); + }); + + it('should read a text file successfully', async () => { + const content = 'Line 1\\nLine 2\\nLine 3'; + actualNodeFs.writeFileSync(testTextFilePath, content); + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + ); + expect(result.llmContent).toBe(content); + expect(result.returnDisplay).toContain('Read text file: test.txt'); + expect(result.error).toBeUndefined(); + }); + + it('should handle file not found', async () => { + const result = await processSingleFileContent( + nonExistentFilePath, + tempRootDir, + ); + expect(result.error).toContain('File not found'); + expect(result.returnDisplay).toContain('File not found'); + }); + + it('should handle read errors for text files', async () => { + actualNodeFs.writeFileSync(testTextFilePath, 'content'); // File must exist for initial statSync + const readError = new Error('Simulated read error'); + vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError); + + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + ); + expect(result.error).toContain('Simulated read error'); + expect(result.returnDisplay).toContain('Simulated read error'); + }); + + it('should handle read errors for image/pdf files', async () => { + actualNodeFs.writeFileSync(testImageFilePath, 'content'); // File must exist + mockMimeLookup.mockReturnValue('image/png'); + const readError = new Error('Simulated image read error'); + vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError); + + const result = await processSingleFileContent( + testImageFilePath, + tempRootDir, + ); + expect(result.error).toContain('Simulated image read error'); + expect(result.returnDisplay).toContain('Simulated image read error'); + }); + + it('should process an image file', async () => { + const fakePngData = Buffer.from('fake png data'); + actualNodeFs.writeFileSync(testImageFilePath, fakePngData); + mockMimeLookup.mockReturnValue('image/png'); + const result = await processSingleFileContent( + testImageFilePath, + tempRootDir, + ); + expect( + (result.llmContent as { inlineData: unknown }).inlineData, + ).toBeDefined(); + expect( + (result.llmContent as { inlineData: { mimeType: string } }).inlineData + .mimeType, + ).toBe('image/png'); + expect( + (result.llmContent as { inlineData: { data: string } }).inlineData.data, + ).toBe(fakePngData.toString('base64')); + expect(result.returnDisplay).toContain('Read image file: image.png'); + }); + + it('should process a PDF file', async () => { + const fakePdfData = Buffer.from('fake pdf data'); + actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData); + mockMimeLookup.mockReturnValue('application/pdf'); + const result = await processSingleFileContent( + testPdfFilePath, + tempRootDir, + ); + expect( + (result.llmContent as { inlineData: unknown }).inlineData, + ).toBeDefined(); + expect( + (result.llmContent as { inlineData: { mimeType: string } }).inlineData + .mimeType, + ).toBe('application/pdf'); + expect( + (result.llmContent as { inlineData: { data: string } }).inlineData.data, + ).toBe(fakePdfData.toString('base64')); + expect(result.returnDisplay).toContain('Read pdf file: document.pdf'); + }); + + it('should skip binary files', async () => { + actualNodeFs.writeFileSync( + testBinaryFilePath, + Buffer.from([0x00, 0x01, 0x02]), + ); + mockMimeLookup.mockReturnValueOnce('application/octet-stream'); + // isBinaryFile will operate on the real file. + + const result = await processSingleFileContent( + testBinaryFilePath, + tempRootDir, + ); + expect(result.llmContent).toContain( + 'Cannot display content of binary file', + ); + expect(result.returnDisplay).toContain('Skipped binary file: app.exe'); + }); + + it('should handle path being a directory', async () => { + const result = await processSingleFileContent(directoryPath, tempRootDir); + expect(result.error).toContain('Path is a directory'); + expect(result.returnDisplay).toContain('Path is a directory'); + }); + + it('should paginate text files correctly (offset and limit)', async () => { + const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`); + actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); + + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + 5, + 5, + ); // Read lines 6-10 + const expectedContent = lines.slice(5, 10).join('\n'); + + expect(result.llmContent).toContain(expectedContent); + expect(result.llmContent).toContain( + '[File content truncated: showing lines 6-10 of 20 total lines. Use offset/limit parameters to view more.]', + ); + expect(result.returnDisplay).toContain( + 'Read text file: test.txt (truncated)', + ); + expect(result.isTruncated).toBe(true); + expect(result.originalLineCount).toBe(20); + expect(result.linesShown).toEqual([6, 10]); + }); + + it('should handle limit exceeding file length', async () => { + const lines = ['Line 1', 'Line 2']; + actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n')); + + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + 0, + 10, + ); + const expectedContent = lines.join('\n'); + + expect(result.llmContent).toBe(expectedContent); + expect(result.returnDisplay).toContain('Read text file: test.txt'); + expect(result.isTruncated).toBe(false); + expect(result.originalLineCount).toBe(2); + expect(result.linesShown).toEqual([1, 2]); + }); + + it('should truncate long lines in text files', async () => { + const longLine = 'a'.repeat(2500); + actualNodeFs.writeFileSync( + testTextFilePath, + `Short line\n${longLine}\nAnother short line`, + ); + + const result = await processSingleFileContent( + testTextFilePath, + tempRootDir, + ); + + expect(result.llmContent).toContain('Short line'); + expect(result.llmContent).toContain( + longLine.substring(0, 2000) + '... [truncated]', + ); + expect(result.llmContent).toContain('Another short line'); + expect(result.llmContent).toContain( + '[File content partially truncated: some lines exceeded maximum length of 2000 characters.]', + ); + expect(result.isTruncated).toBe(true); + }); + }); +}); diff --git a/packages/server/src/utils/fileUtils.ts b/packages/server/src/utils/fileUtils.ts new file mode 100644 index 00000000..0e9eef4c --- /dev/null +++ b/packages/server/src/utils/fileUtils.ts @@ -0,0 +1,280 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'fs'; +import path from 'path'; +import { PartUnion } from '@google/genai'; +import mime from 'mime-types'; + +// Constants for text file processing +const DEFAULT_MAX_LINES_TEXT_FILE = 2000; +const MAX_LINE_LENGTH_TEXT_FILE = 2000; + +// Default values for encoding and separator format +export const DEFAULT_ENCODING: BufferEncoding = 'utf-8'; + +/** + * Checks if a path is within a given root directory. + * @param pathToCheck The absolute path to check. + * @param rootDirectory The absolute root directory. + * @returns True if the path is within the root directory, false otherwise. + */ +export function isWithinRoot( + pathToCheck: string, + rootDirectory: string, +): boolean { + const normalizedPathToCheck = path.normalize(pathToCheck); + const normalizedRootDirectory = path.normalize(rootDirectory); + + // Ensure the rootDirectory path ends with a separator for correct startsWith comparison, + // unless it's the root path itself (e.g., '/' or 'C:\'). + const rootWithSeparator = + normalizedRootDirectory === path.sep || + normalizedRootDirectory.endsWith(path.sep) + ? normalizedRootDirectory + : normalizedRootDirectory + path.sep; + + return ( + normalizedPathToCheck === normalizedRootDirectory || + normalizedPathToCheck.startsWith(rootWithSeparator) + ); +} + +/** + * Determines if a file is likely binary based on content sampling. + * @param filePath Path to the file. + * @returns True if the file appears to be binary. + */ +export function isBinaryFile(filePath: string): boolean { + try { + const fd = fs.openSync(filePath, 'r'); + // Read up to 4KB or file size, whichever is smaller + const fileSize = fs.fstatSync(fd).size; + if (fileSize === 0) { + // Empty file is not considered binary for content checking + fs.closeSync(fd); + return false; + } + const bufferSize = Math.min(4096, fileSize); + const buffer = Buffer.alloc(bufferSize); + const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0); + fs.closeSync(fd); + + if (bytesRead === 0) return false; + + let nonPrintableCount = 0; + for (let i = 0; i < bytesRead; i++) { + if (buffer[i] === 0) return true; // Null byte is a strong indicator + if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) { + nonPrintableCount++; + } + } + // If >30% non-printable characters, consider it binary + return nonPrintableCount / bytesRead > 0.3; + } catch { + // If any error occurs (e.g. file not found, permissions), + // treat as not binary here; let higher-level functions handle existence/access errors. + return false; + } +} + +/** + * Detects the type of file based on extension and content. + * @param filePath Path to the file. + * @returns 'text', 'image', 'pdf', or 'binary'. + */ +export function detectFileType( + filePath: string, +): 'text' | 'image' | 'pdf' | 'binary' { + const ext = path.extname(filePath).toLowerCase(); + const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string + + if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) { + return 'image'; + } + if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') { + return 'pdf'; + } + + // Stricter binary check for common non-text extensions before content check + // These are often not well-covered by mime-types or might be misidentified. + if ( + [ + '.zip', + '.tar', + '.gz', + '.exe', + '.dll', + '.so', + '.class', + '.jar', + '.war', + '.7z', + '.doc', + '.docx', + '.xls', + '.xlsx', + '.ppt', + '.pptx', + '.odt', + '.ods', + '.odp', + '.bin', + '.dat', + '.obj', + '.o', + '.a', + '.lib', + '.wasm', + '.pyc', + '.pyo', + ].includes(ext) + ) { + return 'binary'; + } + + // Fallback to content-based check if mime type wasn't conclusive for image/pdf + // and it's not a known binary extension. + if (isBinaryFile(filePath)) { + return 'binary'; + } + + return 'text'; +} + +export interface ProcessedFileReadResult { + llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary + returnDisplay: string; + error?: string; // Optional error message for the LLM if file processing failed + isTruncated?: boolean; // For text files, indicates if content was truncated + originalLineCount?: number; // For text files + linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display) +} + +/** + * Reads and processes a single file, handling text, images, and PDFs. + * @param filePath Absolute path to the file. + * @param rootDirectory Absolute path to the project root for relative path display. + * @param offset Optional offset for text files (0-based line number). + * @param limit Optional limit for text files (number of lines to read). + * @returns ProcessedFileReadResult object. + */ +export async function processSingleFileContent( + filePath: string, + rootDirectory: string, + offset?: number, + limit?: number, +): Promise { + try { + if (!fs.existsSync(filePath)) { + // Sync check is acceptable before async read + return { + llmContent: '', + returnDisplay: 'File not found.', + error: `File not found: ${filePath}`, + }; + } + const stats = fs.statSync(filePath); // Sync check + if (stats.isDirectory()) { + return { + llmContent: '', + returnDisplay: 'Path is a directory.', + error: `Path is a directory, not a file: ${filePath}`, + }; + } + + const fileType = detectFileType(filePath); + const relativePathForDisplay = path + .relative(rootDirectory, filePath) + .replace(/\\/g, '/'); + + switch (fileType) { + case 'binary': { + return { + llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`, + returnDisplay: `Skipped binary file: ${relativePathForDisplay}`, + }; + } + case 'text': { + const content = await fs.promises.readFile(filePath, 'utf8'); + const lines = content.split('\n'); + const originalLineCount = lines.length; + + const startLine = offset || 0; + const effectiveLimit = + limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit; + // Ensure endLine does not exceed originalLineCount + const endLine = Math.min(startLine + effectiveLimit, originalLineCount); + // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high + const actualStartLine = Math.min(startLine, originalLineCount); + const selectedLines = lines.slice(actualStartLine, endLine); + + let linesWereTruncatedInLength = false; + const formattedLines = selectedLines.map((line) => { + if (line.length > MAX_LINE_LENGTH_TEXT_FILE) { + linesWereTruncatedInLength = true; + return ( + line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]' + ); + } + return line; + }); + + const contentRangeTruncated = endLine < originalLineCount; + const isTruncated = contentRangeTruncated || linesWereTruncatedInLength; + + let llmTextContent = ''; + if (contentRangeTruncated) { + llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`; + } else if (linesWereTruncatedInLength) { + llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`; + } + llmTextContent += formattedLines.join('\n'); + + return { + llmContent: llmTextContent, + returnDisplay: `Read text file: ${relativePathForDisplay}${isTruncated ? ' (truncated)' : ''}`, + isTruncated, + originalLineCount, + linesShown: [actualStartLine + 1, endLine], + }; + } + case 'image': + case 'pdf': { + const contentBuffer = await fs.promises.readFile(filePath); + const base64Data = contentBuffer.toString('base64'); + return { + llmContent: { + inlineData: { + data: base64Data, + mimeType: mime.lookup(filePath) || 'application/octet-stream', + }, + }, + returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`, + }; + } + default: { + // Should not happen with current detectFileType logic + const exhaustiveCheck: never = fileType; + return { + llmContent: `Unhandled file type: ${exhaustiveCheck}`, + returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`, + error: `Unhandled file type for ${filePath}`, + }; + } + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + const displayPath = path + .relative(rootDirectory, filePath) + .replace(/\\/g, '/'); + return { + llmContent: `Error reading file ${displayPath}: ${errorMessage}`, + returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`, + error: `Error reading file ${filePath}: ${errorMessage}`, + }; + } +}