Refactor read-file and support images. (#480)

This commit is contained in:
Jacob Richman 2025-05-29 22:30:18 +00:00 committed by GitHub
parent f21abdd1f0
commit dab7517622
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 1475 additions and 260 deletions

80
.vscode/launch.json vendored
View File

@ -83,6 +83,66 @@
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
},
{
"type": "node",
"request": "launch",
"name": "Debug Server Test: read-file",
"runtimeExecutable": "npm",
"runtimeArgs": [
"run",
"test",
"-w",
"packages/server",
"--",
"--inspect-brk=9229",
"--no-file-parallelism",
"${workspaceFolder}/packages/server/src/tools/read-file.test.ts"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
},
{
"type": "node",
"request": "launch",
"name": "Debug Server Test: turn",
"runtimeExecutable": "npm",
"runtimeArgs": [
"run",
"test",
"-w",
"packages/server",
"--",
"--inspect-brk=9229",
"--no-file-parallelism",
"${workspaceFolder}/packages/server/src/core/turn.test.ts"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
},
{
"type": "node",
"request": "launch",
"name": "Debug Server Test: fileUtils",
"runtimeExecutable": "npm",
"runtimeArgs": [
"run",
"test",
"-w",
"packages/server",
"--",
"--inspect-brk=9229",
"--no-file-parallelism",
"${workspaceFolder}/packages/server/src/utils/fileUtils.test.ts"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
},
{
"type": "node",
"request": "launch",
@ -122,6 +182,26 @@
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
},
{
"type": "node",
"request": "launch",
"name": "Debug CLI Test: useGeminiStream",
"runtimeExecutable": "npm",
"runtimeArgs": [
"run",
"test",
"-w",
"packages/cli",
"--",
"--inspect-brk=9229",
"--no-file-parallelism",
"${workspaceFolder}/packages/cli/src/ui/hooks/useGeminiStream.test.tsx"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"skipFiles": ["<node_internals>/**"]
}
]
}

View File

@ -25,17 +25,20 @@ All file system tools operate within a `rootDirectory` (usually the current work
- **Tool Name:** `read_file`
- **Display Name:** ReadFile
- **File:** `read-file.ts`
- **Description:** Reads and returns the content of a specified file. It can handle large files by allowing reading of specific line ranges and will attempt to detect and skip binary files.
- **Description:** Reads and returns the content of a specified file. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped.
- **Parameters:**
- `path` (string, required): The absolute path to the file to read.
- `offset` (number, optional): The 0-based line number to start reading from. Requires `limit` to be set.
- `limit` (number, optional): The maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines).
- `offset` (number, optional): For text files, the 0-based line number to start reading from. Requires `limit` to be set.
- `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible.
- **Behavior:**
- Returns the content of the specified text file.
- If `offset` and `limit` are used, returns only that slice of lines.
- Indicates if the content was truncated due to line limits or line length limits.
- Attempts to identify binary files (images, executables) and returns a message indicating it's a binary file instead of its content.
- **Output (`llmContent`):** The file content, potentially prefixed with a truncation message (e.g., `[File truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). For binary files: `Binary file: /path/to/image.png (image)`.
- For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits.
- For image and PDF files: Returns the file content as a base64 encoded data structure suitable for model consumption.
- For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file.
- **Output:** (`llmContent`):
- For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`).
- For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`).
- For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`.
- **Confirmation:** No.
- **Confirmation:** No.
## 3. `write_file` (WriteFile)

View File

@ -0,0 +1,137 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { mergePartListUnions } from './useGeminiStream.js';
import { Part, PartListUnion } from '@google/genai';
// Mock useToolScheduler
vi.mock('./useToolScheduler', async () => {
const actual = await vi.importActual('./useToolScheduler');
return {
...actual, // We need mapToDisplay from actual
useToolScheduler: vi.fn(),
};
});
describe('mergePartListUnions', () => {
it('should merge multiple PartListUnion arrays', () => {
const list1: PartListUnion = [{ text: 'Hello' }];
const list2: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
];
const list3: PartListUnion = [{ text: 'World' }, { text: '!' }];
const result = mergePartListUnions([list1, list2, list3]);
expect(result).toEqual([
{ text: 'Hello' },
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
{ text: 'World' },
{ text: '!' },
]);
});
it('should handle empty arrays in the input list', () => {
const list1: PartListUnion = [{ text: 'First' }];
const list2: PartListUnion = [];
const list3: PartListUnion = [{ text: 'Last' }];
const result = mergePartListUnions([list1, list2, list3]);
expect(result).toEqual([{ text: 'First' }, { text: 'Last' }]);
});
it('should handle a single PartListUnion array', () => {
const list1: PartListUnion = [
{ text: 'One' },
{ inlineData: { mimeType: 'image/jpeg', data: 'xyz' } },
];
const result = mergePartListUnions([list1]);
expect(result).toEqual(list1);
});
it('should return an empty array if all input arrays are empty', () => {
const list1: PartListUnion = [];
const list2: PartListUnion = [];
const result = mergePartListUnions([list1, list2]);
expect(result).toEqual([]);
});
it('should handle input list being empty', () => {
const result = mergePartListUnions([]);
expect(result).toEqual([]);
});
it('should correctly merge when PartListUnion items are single Parts not in arrays', () => {
const part1: Part = { text: 'Single part 1' };
const part2: Part = { inlineData: { mimeType: 'image/gif', data: 'gif' } };
const listContainingSingleParts: PartListUnion[] = [
part1,
[part2],
{ text: 'Another single part' },
];
const result = mergePartListUnions(listContainingSingleParts);
expect(result).toEqual([
{ text: 'Single part 1' },
{ inlineData: { mimeType: 'image/gif', data: 'gif' } },
{ text: 'Another single part' },
]);
});
it('should handle a mix of arrays and single parts, including empty arrays and undefined/null parts if they were possible (though PartListUnion typing restricts this)', () => {
const list1: PartListUnion = [{ text: 'A' }];
const list2: PartListUnion = [];
const part3: Part = { text: 'B' };
const list4: PartListUnion = [
{ text: 'C' },
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
];
const result = mergePartListUnions([list1, list2, part3, list4]);
expect(result).toEqual([
{ text: 'A' },
{ text: 'B' },
{ text: 'C' },
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
]);
});
it('should preserve the order of parts from the input arrays', () => {
const listA: PartListUnion = [{ text: '1' }, { text: '2' }];
const listB: PartListUnion = [{ text: '3' }];
const listC: PartListUnion = [{ text: '4' }, { text: '5' }];
const result = mergePartListUnions([listA, listB, listC]);
expect(result).toEqual([
{ text: '1' },
{ text: '2' },
{ text: '3' },
{ text: '4' },
{ text: '5' },
]);
});
it('should handle cases where some PartListUnion items are single Parts and others are arrays of Parts', () => {
const singlePart1: Part = { text: 'First single' };
const arrayPart1: Part[] = [
{ text: 'Array item 1' },
{ text: 'Array item 2' },
];
const singlePart2: Part = {
inlineData: { mimeType: 'application/json', data: 'e30=' },
}; // {}
const arrayPart2: Part[] = [{ text: 'Last array item' }];
const result = mergePartListUnions([
singlePart1,
arrayPart1,
singlePart2,
arrayPart2,
]);
expect(result).toEqual([
{ text: 'First single' },
{ text: 'Array item 1' },
{ text: 'Array item 2' },
{ inlineData: { mimeType: 'application/json', data: 'e30=' } },
{ text: 'Last array item' },
]);
});
});

View File

@ -41,6 +41,18 @@ import { useLogger } from './useLogger.js';
import { useToolScheduler, mapToDisplay } from './useToolScheduler.js';
import { GeminiChat } from '@gemini-code/server/src/core/geminiChat.js';
export function mergePartListUnions(list: PartListUnion[]): PartListUnion {
const resultParts: PartListUnion = [];
for (const item of list) {
if (Array.isArray(item)) {
resultParts.push(...item);
} else {
resultParts.push(item);
}
}
return resultParts;
}
enum StreamProcessingStatus {
Completed,
UserCancelled,
@ -74,16 +86,16 @@ export const useGeminiStream = (
(tools) => {
if (tools.length) {
addItem(mapToDisplay(tools), Date.now());
submitQuery(
tools
const toolResponses = tools
.filter(
(t) =>
t.status === 'error' ||
t.status === 'cancelled' ||
t.status === 'success',
)
.map((t) => t.response.responsePart),
);
.map((t) => t.response.responseParts);
submitQuery(mergePartListUnions(toolResponses));
}
},
config,
@ -313,7 +325,7 @@ export const useGeminiStream = (
};
const responseInfo: ToolCallResponseInfo = {
callId: request.callId,
responsePart: functionResponse,
responseParts: functionResponse,
resultDisplay,
error: new Error(declineMessage),
};

View File

@ -0,0 +1,126 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { formatLlmContentForFunctionResponse } from './useToolScheduler.js';
import { Part, PartListUnion } from '@google/genai';
describe('formatLlmContentForFunctionResponse', () => {
it('should handle simple string llmContent', () => {
const llmContent = 'Simple text output';
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Simple text output' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a single Part with text', () => {
const llmContent: Part = { text: 'Text from Part object' };
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Text from Part object' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a PartListUnion array with a single text Part', () => {
const llmContent: PartListUnion = [{ text: 'Text from array' }];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Text from array' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent with inlineData', () => {
const llmContent: Part = {
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Binary content of type image/png was processed.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle llmContent with fileData', () => {
const llmContent: Part = {
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
};
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Binary content of type application/pdf was processed.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
const llmContent: PartListUnion = [
{ text: 'Some textual description' },
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
{ text: 'Another text part' },
];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual(llmContent);
});
it('should handle llmContent as an array with a single inlineData Part', () => {
const llmContent: PartListUnion = [
{ inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
// When the array is a single Part and that part is inlineData
expect(functionResponseJson).toEqual({
status: 'Binary content of type image/gif was processed.',
});
expect(additionalParts).toEqual(llmContent);
});
it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
// This case might represent a malformed or unexpected Part type.
// For example, a Part that is just an empty object or has other properties.
const llmContent: Part = { functionCall: { name: 'test', args: {} } }; // Example of a non-standard part for this context
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle empty string llmContent', () => {
const llmContent = '';
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: '' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as an empty array', () => {
const llmContent: PartListUnion = [];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
const llmContent: Part = {}; // An empty part object
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([llmContent]);
});
});

View File

@ -13,7 +13,7 @@ import {
ToolCallConfirmationDetails,
ToolResult,
} from '@gemini-code/server';
import { Part } from '@google/genai';
import { Part, PartUnion, PartListUnion } from '@google/genai';
import { useCallback, useEffect, useState } from 'react';
import {
HistoryItemToolGroup,
@ -88,6 +88,60 @@ export type CompletedToolCall =
| CancelledToolCall
| ErroredToolCall;
/**
* Formats a PartListUnion response from a tool into JSON suitable for a Gemini
* FunctionResponse and additional Parts to include after that response.
*
* This is required because FunctionReponse appears to only support JSON
* and not arbitrary parts. Including parts like inlineData or fileData
* directly in a FunctionResponse confuses the model resulting in a failure
* to interpret the multimodal content and context window exceeded errors.
*/
export function formatLlmContentForFunctionResponse(
llmContent: PartListUnion,
): {
functionResponseJson: Record<string, string>;
additionalParts: PartUnion[];
} {
const additionalParts: PartUnion[] = [];
let functionResponseJson: Record<string, string>;
if (Array.isArray(llmContent) && llmContent.length === 1) {
// Ensure that length 1 arrays are treated as a single Part.
llmContent = llmContent[0];
}
if (typeof llmContent === 'string') {
functionResponseJson = { output: llmContent };
} else if (Array.isArray(llmContent)) {
functionResponseJson = { status: 'Tool execution succeeded.' };
additionalParts.push(...llmContent);
} else {
if (
llmContent.inlineData !== undefined ||
llmContent.fileData !== undefined
) {
// For Parts like inlineData or fileData, use the returnDisplay as the textual output for the functionResponse.
// The actual Part will be added to additionalParts.
functionResponseJson = {
status: `Binary content of type ${llmContent.inlineData?.mimeType || llmContent.fileData?.mimeType || 'unknown'} was processed.`,
};
additionalParts.push(llmContent);
} else if (llmContent.text !== undefined) {
functionResponseJson = { output: llmContent.text };
} else {
functionResponseJson = { status: 'Tool execution succeeded.' };
additionalParts.push(llmContent);
}
}
return {
functionResponseJson,
additionalParts,
};
}
export function useToolScheduler(
onComplete: (tools: CompletedToolCall[]) => void,
config: Config,
@ -201,7 +255,7 @@ export function useToolScheduler(
status: 'cancelled',
response: {
callId: c.request.callId,
responsePart: {
responseParts: {
functionResponse: {
id: c.request.callId,
name: c.request.name,
@ -276,21 +330,24 @@ export function useToolScheduler(
.execute(t.request.args, signal, onOutputChunk)
.then((result: ToolResult) => {
if (signal.aborted) {
// TODO(jacobr): avoid stringifying the LLM content.
setToolCalls(
setStatus(callId, 'cancelled', String(result.llmContent)),
);
return;
}
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(result.llmContent);
const functionResponse: Part = {
functionResponse: {
name: t.request.name,
id: callId,
response: { output: result.llmContent },
response: functionResponseJson,
},
};
const response: ToolCallResponseInfo = {
callId,
responsePart: functionResponse,
responseParts: [functionResponse, ...additionalParts],
resultDisplay: result.returnDisplay,
error: undefined,
};
@ -401,7 +458,7 @@ function setStatus(
status: 'cancelled',
response: {
callId: t.request.callId,
responsePart: {
responseParts: {
functionResponse: {
id: t.request.callId,
name: t.request.name,
@ -446,7 +503,7 @@ const toolErrorResponse = (
): ToolCallResponseInfo => ({
callId: request.callId,
error,
responsePart: {
responseParts: {
functionResponse: {
id: request.callId,
name: request.name,

View File

@ -5,7 +5,6 @@
*/
import {
Part,
PartListUnion,
GenerateContentResponse,
FunctionCall,
@ -57,7 +56,7 @@ export interface ToolCallRequestInfo {
export interface ToolCallResponseInfo {
callId: string;
responsePart: Part;
responseParts: PartListUnion;
resultDisplay: ToolResultDisplay | undefined;
error: Error | undefined;
}

View File

@ -0,0 +1,228 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, afterEach, Mock } from 'vitest';
import { ReadFileTool, ReadFileToolParams } from './read-file.js';
import * as fileUtils from '../utils/fileUtils.js';
import path from 'path';
import os from 'os';
import fs from 'fs'; // For actual fs operations in setup
// Mock fileUtils.processSingleFileContent
vi.mock('../utils/fileUtils', async () => {
const actualFileUtils =
await vi.importActual<typeof fileUtils>('../utils/fileUtils');
return {
...actualFileUtils, // Spread actual implementations
processSingleFileContent: vi.fn(), // Mock specific function
};
});
const mockProcessSingleFileContent = fileUtils.processSingleFileContent as Mock;
describe('ReadFileTool', () => {
let tempRootDir: string;
let tool: ReadFileTool;
const abortSignal = new AbortController().signal;
beforeEach(() => {
// Create a unique temporary root directory for each test run
tempRootDir = fs.mkdtempSync(
path.join(os.tmpdir(), 'read-file-tool-root-'),
);
tool = new ReadFileTool(tempRootDir);
mockProcessSingleFileContent.mockReset();
});
afterEach(() => {
// Clean up the temporary root directory
if (fs.existsSync(tempRootDir)) {
fs.rmSync(tempRootDir, { recursive: true, force: true });
}
});
describe('validateToolParams', () => {
it('should return null for valid params (absolute path within root)', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
};
expect(tool.validateToolParams(params)).toBeNull();
});
it('should return null for valid params with offset and limit', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: 10,
};
expect(tool.validateToolParams(params)).toBeNull();
});
it('should return error for relative path', () => {
const params: ReadFileToolParams = { path: 'test.txt' };
expect(tool.validateToolParams(params)).toMatch(
/File path must be absolute/,
);
});
it('should return error for path outside root', () => {
const outsidePath = path.resolve(os.tmpdir(), 'outside-root.txt');
const params: ReadFileToolParams = { path: outsidePath };
expect(tool.validateToolParams(params)).toMatch(
/File path must be within the root directory/,
);
});
it('should return error for negative offset', () => {
const params: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: -1,
limit: 10,
};
expect(tool.validateToolParams(params)).toBe(
'Offset must be a non-negative number',
);
});
it('should return error for non-positive limit', () => {
const paramsZero: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: 0,
};
expect(tool.validateToolParams(paramsZero)).toBe(
'Limit must be a positive number',
);
const paramsNegative: ReadFileToolParams = {
path: path.join(tempRootDir, 'test.txt'),
offset: 0,
limit: -5,
};
expect(tool.validateToolParams(paramsNegative)).toBe(
'Limit must be a positive number',
);
});
it('should return error for schema validation failure (e.g. missing path)', () => {
const params = { offset: 0 } as unknown as ReadFileToolParams;
expect(tool.validateToolParams(params)).toBe(
'Parameters failed schema validation.',
);
});
});
describe('getDescription', () => {
it('should return a shortened, relative path', () => {
const filePath = path.join(tempRootDir, 'sub', 'dir', 'file.txt');
const params: ReadFileToolParams = { path: filePath };
// Assuming tempRootDir is something like /tmp/read-file-tool-root-XXXXXX
// The relative path would be sub/dir/file.txt
expect(tool.getDescription(params)).toBe('sub/dir/file.txt');
});
it('should return . if path is the root directory', () => {
const params: ReadFileToolParams = { path: tempRootDir };
expect(tool.getDescription(params)).toBe('.');
});
});
describe('execute', () => {
it('should return validation error if params are invalid', async () => {
const params: ReadFileToolParams = { path: 'relative/path.txt' };
const result = await tool.execute(params, abortSignal);
expect(result.llmContent).toMatch(/Error: Invalid parameters provided/);
expect(result.returnDisplay).toMatch(/File path must be absolute/);
});
it('should return error from processSingleFileContent if it fails', async () => {
const filePath = path.join(tempRootDir, 'error.txt');
const params: ReadFileToolParams = { path: filePath };
const errorMessage = 'Simulated read error';
mockProcessSingleFileContent.mockResolvedValue({
llmContent: `Error reading file ${filePath}: ${errorMessage}`,
returnDisplay: `Error reading file ${filePath}: ${errorMessage}`,
error: errorMessage,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toContain(errorMessage);
expect(result.returnDisplay).toContain(errorMessage);
});
it('should return success result for a text file', async () => {
const filePath = path.join(tempRootDir, 'textfile.txt');
const fileContent = 'This is a test file.';
const params: ReadFileToolParams = { path: filePath };
mockProcessSingleFileContent.mockResolvedValue({
llmContent: fileContent,
returnDisplay: `Read text file: ${path.basename(filePath)}`,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toBe(fileContent);
expect(result.returnDisplay).toBe(
`Read text file: ${path.basename(filePath)}`,
);
});
it('should return success result for an image file', async () => {
const filePath = path.join(tempRootDir, 'image.png');
const imageData = {
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const params: ReadFileToolParams = { path: filePath };
mockProcessSingleFileContent.mockResolvedValue({
llmContent: imageData,
returnDisplay: `Read image file: ${path.basename(filePath)}`,
});
const result = await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
undefined,
undefined,
);
expect(result.llmContent).toEqual(imageData);
expect(result.returnDisplay).toBe(
`Read image file: ${path.basename(filePath)}`,
);
});
it('should pass offset and limit to processSingleFileContent', async () => {
const filePath = path.join(tempRootDir, 'paginated.txt');
const params: ReadFileToolParams = {
path: filePath,
offset: 10,
limit: 5,
};
mockProcessSingleFileContent.mockResolvedValue({
llmContent: 'some lines',
returnDisplay: 'Read text file (paginated)',
});
await tool.execute(params, abortSignal);
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
filePath,
tempRootDir,
10,
5,
);
});
});
});

View File

@ -4,11 +4,11 @@
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'fs';
import path from 'path';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { BaseTool, ToolResult } from './tools.js';
import { isWithinRoot, processSingleFileContent } from '../utils/fileUtils.js';
/**
* Parameters for the ReadFile tool
@ -35,14 +35,12 @@ export interface ReadFileToolParams {
*/
export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
static readonly Name: string = 'read_file';
private static readonly DEFAULT_MAX_LINES = 2000;
private static readonly MAX_LINE_LENGTH = 2000;
constructor(private rootDirectory: string) {
super(
ReadFileTool.Name,
'ReadFile',
'Reads and returns the content of a specified file from the local filesystem. Handles large files by allowing reading specific line ranges.',
'Reads and returns the content of a specified file from the local filesystem. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.',
{
properties: {
path: {
@ -52,12 +50,12 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
},
offset: {
description:
"Optional: The 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
type: 'number',
},
limit: {
description:
"Optional: Maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible).",
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
type: 'number',
},
},
@ -68,28 +66,6 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
this.rootDirectory = path.resolve(rootDirectory);
}
/**
* Checks if a path is within the root directory
* @param pathToCheck The path to check
* @returns True if the path is within the root directory, false otherwise
*/
private isWithinRoot(pathToCheck: string): boolean {
const normalizedPath = path.normalize(pathToCheck);
const normalizedRoot = path.normalize(this.rootDirectory);
const rootWithSep = normalizedRoot.endsWith(path.sep)
? normalizedRoot
: normalizedRoot + path.sep;
return (
normalizedPath === normalizedRoot ||
normalizedPath.startsWith(rootWithSep)
);
}
/**
* Validates the parameters for the ReadFile tool
* @param params Parameters to validate
* @returns True if parameters are valid, false otherwise
*/
validateToolParams(params: ReadFileToolParams): string | null {
if (
this.schema.parameters &&
@ -104,7 +80,7 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
if (!path.isAbsolute(filePath)) {
return `File path must be absolute: ${filePath}`;
}
if (!this.isWithinRoot(filePath)) {
if (!isWithinRoot(filePath, this.rootDirectory)) {
return `File path must be within the root directory (${this.rootDirectory}): ${filePath}`;
}
if (params.offset !== undefined && params.offset < 0) {
@ -116,83 +92,11 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
return null;
}
/**
* Determines if a file is likely binary based on content sampling
* @param filePath Path to the file
* @returns True if the file appears to be binary
*/
private isBinaryFile(filePath: string): boolean {
try {
// Read the first 4KB of the file
const fd = fs.openSync(filePath, 'r');
const buffer = Buffer.alloc(4096);
const bytesRead = fs.readSync(fd, buffer, 0, 4096, 0);
fs.closeSync(fd);
// Check for null bytes or high concentration of non-printable characters
let nonPrintableCount = 0;
for (let i = 0; i < bytesRead; i++) {
// Null byte is a strong indicator of binary data
if (buffer[i] === 0) {
return true;
}
// Count non-printable characters
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
nonPrintableCount++;
}
}
// If more than 30% are non-printable, likely binary
return nonPrintableCount / bytesRead > 0.3;
} catch {
return false;
}
}
/**
* Detects the type of file based on extension and content
* @param filePath Path to the file
* @returns File type description
*/
private detectFileType(filePath: string): string {
const ext = path.extname(filePath).toLowerCase();
// Common image formats
if (
['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'].includes(ext)
) {
return 'image';
}
// Other known binary formats
if (['.pdf', '.zip', '.tar', '.gz', '.exe', '.dll', '.so'].includes(ext)) {
return 'binary';
}
// Check content for binary indicators
if (this.isBinaryFile(filePath)) {
return 'binary';
}
return 'text';
}
/**
* Gets a description of the file reading operation
* @param params Parameters for the file reading
* @returns A string describing the file being read
*/
getDescription(params: ReadFileToolParams): string {
const relativePath = makeRelative(params.path, this.rootDirectory);
return shortenPath(relativePath);
}
/**
* Reads a file and returns its contents with line numbers
* @param params Parameters for the file reading
* @returns Result with file contents
*/
async execute(
params: ReadFileToolParams,
_signal: AbortSignal,
@ -205,75 +109,23 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
};
}
const filePath = params.path;
try {
if (!fs.existsSync(filePath)) {
const result = await processSingleFileContent(
params.path,
this.rootDirectory,
params.offset,
params.limit,
);
if (result.error) {
return {
llmContent: `File not found: ${filePath}`,
returnDisplay: `File not found.`,
llmContent: result.error, // The detailed error for LLM
returnDisplay: result.returnDisplay, // User-friendly error
};
}
const stats = fs.statSync(filePath);
if (stats.isDirectory()) {
return {
llmContent: `Path is a directory, not a file: ${filePath}`,
returnDisplay: `File is directory.`,
llmContent: result.llmContent,
returnDisplay: result.returnDisplay,
};
}
const fileType = this.detectFileType(filePath);
if (fileType !== 'text') {
return {
llmContent: `Binary file: ${filePath} (${fileType})`,
// For binary files, maybe returnDisplay should be empty or indicate binary?
// Keeping it empty for now.
returnDisplay: ``,
};
}
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
const startLine = params.offset || 0;
const endLine = params.limit
? startLine + params.limit
: Math.min(startLine + ReadFileTool.DEFAULT_MAX_LINES, lines.length);
const selectedLines = lines.slice(startLine, endLine);
let truncated = false;
const formattedLines = selectedLines.map((line) => {
let processedLine = line;
if (line.length > ReadFileTool.MAX_LINE_LENGTH) {
processedLine =
line.substring(0, ReadFileTool.MAX_LINE_LENGTH) + '... [truncated]';
truncated = true;
}
return processedLine;
});
const contentTruncated = endLine < lines.length || truncated;
let llmContent = '';
if (contentTruncated) {
llmContent += `[File truncated: showing lines ${startLine + 1}-${endLine} of ${lines.length} total lines. Use offset parameter to view more.]\n`;
}
llmContent += formattedLines.join('\n');
// Here, returnDisplay could potentially be enhanced, but for now,
// it's kept empty as the LLM content itself is descriptive.
return {
llmContent,
returnDisplay: '',
};
} catch (error) {
const errorMsg = `Error reading file: ${error instanceof Error ? error.message : String(error)}`;
return {
llmContent: `Error reading file ${filePath}: ${errorMsg}`,
returnDisplay: `Failed to read file: ${errorMsg}`,
};
}
}
}

View File

@ -115,6 +115,33 @@ describe('ReadManyFilesTool', () => {
};
expect(tool.validateParams(params)).toBeNull();
});
it('should return error if paths array contains an empty string', () => {
const params = { paths: ['file1.txt', ''] };
expect(tool.validateParams(params)).toBe(
'Each item in "paths" must be a non-empty string/glob pattern.',
);
});
it('should return error if include array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
include: ['*.ts', 123] as string[],
};
expect(tool.validateParams(params)).toBe(
'If provided, "include" must be an array of strings/glob patterns.',
);
});
it('should return error if exclude array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
exclude: ['*.log', {}] as string[],
};
expect(tool.validateParams(params)).toBe(
'If provided, "exclude" must be an array of strings/glob patterns.',
);
});
});
describe('execute', () => {

View File

@ -7,13 +7,16 @@
import { BaseTool, ToolResult } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { getErrorMessage } from '../utils/errors.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import fg from 'fast-glob';
import { GEMINI_MD_FILENAME } from './memoryTool.js';
import {
detectFileType,
processSingleFileContent,
DEFAULT_ENCODING,
} from '../utils/fileUtils.js';
import { PartListUnion } from '@google/genai';
import mime from 'mime-types';
/**
* Parameters for the ReadManyFilesTool.
*/
@ -98,8 +101,6 @@ const DEFAULT_EXCLUDES: string[] = [
`**/${GEMINI_MD_FILENAME}`,
];
// Default values for encoding and separator format
const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---';
/**
@ -256,11 +257,10 @@ Use this tool when the user's query implies needing the content of several files
} = params;
const toolBaseDir = this.targetDir;
const filesToConsider = new Set<string>();
const skippedFiles: Array<{ path: string; reason: string }> = [];
const processedFilesRelativePaths: string[] = [];
const content: PartListUnion = [];
const contentParts: PartListUnion = [];
const effectiveExcludes = useDefaultExcludes
? [...DEFAULT_EXCLUDES, ...exclude]
@ -315,17 +315,12 @@ Use this tool when the user's query implies needing the content of several files
const relativePathForDisplay = path
.relative(toolBaseDir, filePath)
.replace(/\\/g, '/');
try {
const mimeType = mime.lookup(filePath);
if (
mimeType &&
(mimeType.startsWith('image/') || mimeType === 'application/pdf')
) {
const fileExtension = path.extname(filePath);
const fileNameWithoutExtension = path.basename(
filePath,
fileExtension,
);
const fileType = detectFileType(filePath);
if (fileType === 'image' || fileType === 'pdf') {
const fileExtension = path.extname(filePath).toLowerCase();
const fileNameWithoutExtension = path.basename(filePath, fileExtension);
const requestedExplicitly = inputPatterns.some(
(pattern: string) =>
pattern.toLowerCase().includes(fileExtension) ||
@ -340,44 +335,30 @@ Use this tool when the user's query implies needing the content of several files
});
continue;
}
const contentBuffer = await fs.readFile(filePath);
const base64Data = contentBuffer.toString('base64');
content.push({
inlineData: {
data: base64Data,
mimeType,
},
});
processedFilesRelativePaths.push(relativePathForDisplay);
} else {
const contentBuffer = await fs.readFile(filePath);
// Basic binary detection: check for null bytes in the first 1KB
const sample = contentBuffer.subarray(
0,
Math.min(contentBuffer.length, 1024),
}
// Use processSingleFileContent for all file types now
const fileReadResult = await processSingleFileContent(
filePath,
toolBaseDir,
);
if (sample.includes(0)) {
if (fileReadResult.error) {
skippedFiles.push({
path: relativePathForDisplay,
reason: 'appears to be binary',
reason: `Read error: ${fileReadResult.error}`,
});
continue;
}
// Using default encoding
const fileContent = contentBuffer.toString(DEFAULT_ENCODING);
// Using default separator format
} else {
if (typeof fileReadResult.llmContent === 'string') {
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
'{filePath}',
relativePathForDisplay,
);
content.push(`${separator}\n\n${fileContent}\n\n`);
processedFilesRelativePaths.push(relativePathForDisplay);
contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
} else {
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
}
} catch (error) {
skippedFiles.push({
path: relativePathForDisplay,
reason: `Read error: ${getErrorMessage(error)}`,
});
processedFilesRelativePaths.push(relativePathForDisplay);
}
}
@ -422,13 +403,13 @@ Use this tool when the user's query implies needing the content of several files
displayMessage += `No files were read and concatenated based on the criteria.\n`;
}
if (content.length === 0) {
content.push(
if (contentParts.length === 0) {
contentParts.push(
'No files matching the criteria were found or all were skipped.',
);
}
return {
llmContent: content,
llmContent: contentParts,
returnDisplay: displayMessage.trim(),
};
}

View File

@ -0,0 +1,433 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {
describe,
it,
expect,
vi,
beforeEach,
afterEach,
type Mock,
} from 'vitest';
import * as actualNodeFs from 'node:fs'; // For setup/teardown
import fsPromises from 'node:fs/promises';
import path from 'node:path';
import os from 'node:os';
import mime from 'mime-types';
import {
isWithinRoot,
isBinaryFile,
detectFileType,
processSingleFileContent,
} from './fileUtils.js';
vi.mock('mime-types', () => ({
default: { lookup: vi.fn() },
lookup: vi.fn(),
}));
const mockMimeLookup = mime.lookup as Mock;
describe('fileUtils', () => {
let tempRootDir: string;
const originalProcessCwd = process.cwd;
let testTextFilePath: string;
let testImageFilePath: string;
let testPdfFilePath: string;
let testBinaryFilePath: string;
let nonExistentFilePath: string;
let directoryPath: string;
beforeEach(() => {
vi.resetAllMocks(); // Reset all mocks, including mime.lookup
tempRootDir = actualNodeFs.mkdtempSync(
path.join(os.tmpdir(), 'fileUtils-test-'),
);
process.cwd = vi.fn(() => tempRootDir); // Mock cwd if necessary for relative path logic within tests
testTextFilePath = path.join(tempRootDir, 'test.txt');
testImageFilePath = path.join(tempRootDir, 'image.png');
testPdfFilePath = path.join(tempRootDir, 'document.pdf');
testBinaryFilePath = path.join(tempRootDir, 'app.exe');
nonExistentFilePath = path.join(tempRootDir, 'notfound.txt');
directoryPath = path.join(tempRootDir, 'subdir');
actualNodeFs.mkdirSync(directoryPath, { recursive: true }); // Ensure subdir exists
});
afterEach(() => {
if (actualNodeFs.existsSync(tempRootDir)) {
actualNodeFs.rmSync(tempRootDir, { recursive: true, force: true });
}
process.cwd = originalProcessCwd;
vi.restoreAllMocks(); // Restore any spies
});
describe('isWithinRoot', () => {
const root = path.resolve('/project/root');
it('should return true for paths directly within the root', () => {
expect(isWithinRoot(path.join(root, 'file.txt'), root)).toBe(true);
expect(isWithinRoot(path.join(root, 'subdir', 'file.txt'), root)).toBe(
true,
);
});
it('should return true for the root path itself', () => {
expect(isWithinRoot(root, root)).toBe(true);
});
it('should return false for paths outside the root', () => {
expect(
isWithinRoot(path.resolve('/project/other', 'file.txt'), root),
).toBe(false);
expect(isWithinRoot(path.resolve('/unrelated', 'file.txt'), root)).toBe(
false,
);
});
it('should return false for paths that only partially match the root prefix', () => {
expect(
isWithinRoot(
path.resolve('/project/root-but-actually-different'),
root,
),
).toBe(false);
});
it('should handle paths with trailing slashes correctly', () => {
expect(isWithinRoot(path.join(root, 'file.txt') + path.sep, root)).toBe(
true,
);
expect(isWithinRoot(root + path.sep, root)).toBe(true);
});
it('should handle different path separators (POSIX vs Windows)', () => {
const posixRoot = '/project/root';
const posixPathInside = '/project/root/file.txt';
const posixPathOutside = '/project/other/file.txt';
expect(isWithinRoot(posixPathInside, posixRoot)).toBe(true);
expect(isWithinRoot(posixPathOutside, posixRoot)).toBe(false);
});
it('should return false for a root path that is a sub-path of the path to check', () => {
const pathToCheck = path.resolve('/project/root/sub');
const rootSub = path.resolve('/project/root');
expect(isWithinRoot(pathToCheck, rootSub)).toBe(true);
const pathToCheckSuper = path.resolve('/project/root');
const rootSuper = path.resolve('/project/root/sub');
expect(isWithinRoot(pathToCheckSuper, rootSuper)).toBe(false);
});
});
describe('isBinaryFile', () => {
let filePathForBinaryTest: string;
beforeEach(() => {
filePathForBinaryTest = path.join(tempRootDir, 'binaryCheck.tmp');
});
afterEach(() => {
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
actualNodeFs.unlinkSync(filePathForBinaryTest);
}
});
it('should return false for an empty file', () => {
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
});
it('should return false for a typical text file', () => {
actualNodeFs.writeFileSync(
filePathForBinaryTest,
'Hello, world!\nThis is a test file with normal text content.',
);
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
});
it('should return true for a file with many null bytes', () => {
const binaryContent = Buffer.from([
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
]); // "He\0llo\0\0\0\0\0"
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
});
it('should return true for a file with high percentage of non-printable ASCII', () => {
const binaryContent = Buffer.from([
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
]); // AB\x01\x02\x03\x04\x05CD\x06
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
});
it('should return false if file access fails (e.g., ENOENT)', () => {
// Ensure the file does not exist
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
actualNodeFs.unlinkSync(filePathForBinaryTest);
}
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
});
});
describe('detectFileType', () => {
let filePathForDetectTest: string;
beforeEach(() => {
filePathForDetectTest = path.join(tempRootDir, 'detectType.tmp');
// Default: create as a text file for isBinaryFile fallback
actualNodeFs.writeFileSync(filePathForDetectTest, 'Plain text content');
});
afterEach(() => {
if (actualNodeFs.existsSync(filePathForDetectTest)) {
actualNodeFs.unlinkSync(filePathForDetectTest);
}
vi.restoreAllMocks(); // Restore spies on actualNodeFs
});
it('should detect image type by extension (png)', () => {
mockMimeLookup.mockReturnValueOnce('image/png');
expect(detectFileType('file.png')).toBe('image');
});
it('should detect image type by extension (jpeg)', () => {
mockMimeLookup.mockReturnValueOnce('image/jpeg');
expect(detectFileType('file.jpg')).toBe('image');
});
it('should detect pdf type by extension', () => {
mockMimeLookup.mockReturnValueOnce('application/pdf');
expect(detectFileType('file.pdf')).toBe('pdf');
});
it('should detect known binary extensions as binary (e.g. .zip)', () => {
mockMimeLookup.mockReturnValueOnce('application/zip');
expect(detectFileType('archive.zip')).toBe('binary');
});
it('should detect known binary extensions as binary (e.g. .exe)', () => {
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
expect(detectFileType('app.exe')).toBe('binary');
});
it('should use isBinaryFile for unknown extensions and detect as binary', () => {
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
// Create a file that isBinaryFile will identify as binary
const binaryContent = Buffer.from([
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
]);
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
expect(detectFileType(filePathForDetectTest)).toBe('binary');
});
it('should default to text if mime type is unknown and content is not binary', () => {
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
// filePathForDetectTest is already a text file by default from beforeEach
expect(detectFileType(filePathForDetectTest)).toBe('text');
});
});
describe('processSingleFileContent', () => {
beforeEach(() => {
// Ensure files exist for statSync checks before readFile might be mocked
if (actualNodeFs.existsSync(testTextFilePath))
actualNodeFs.unlinkSync(testTextFilePath);
if (actualNodeFs.existsSync(testImageFilePath))
actualNodeFs.unlinkSync(testImageFilePath);
if (actualNodeFs.existsSync(testPdfFilePath))
actualNodeFs.unlinkSync(testPdfFilePath);
if (actualNodeFs.existsSync(testBinaryFilePath))
actualNodeFs.unlinkSync(testBinaryFilePath);
});
it('should read a text file successfully', async () => {
const content = 'Line 1\\nLine 2\\nLine 3';
actualNodeFs.writeFileSync(testTextFilePath, content);
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
);
expect(result.llmContent).toBe(content);
expect(result.returnDisplay).toContain('Read text file: test.txt');
expect(result.error).toBeUndefined();
});
it('should handle file not found', async () => {
const result = await processSingleFileContent(
nonExistentFilePath,
tempRootDir,
);
expect(result.error).toContain('File not found');
expect(result.returnDisplay).toContain('File not found');
});
it('should handle read errors for text files', async () => {
actualNodeFs.writeFileSync(testTextFilePath, 'content'); // File must exist for initial statSync
const readError = new Error('Simulated read error');
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
);
expect(result.error).toContain('Simulated read error');
expect(result.returnDisplay).toContain('Simulated read error');
});
it('should handle read errors for image/pdf files', async () => {
actualNodeFs.writeFileSync(testImageFilePath, 'content'); // File must exist
mockMimeLookup.mockReturnValue('image/png');
const readError = new Error('Simulated image read error');
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
const result = await processSingleFileContent(
testImageFilePath,
tempRootDir,
);
expect(result.error).toContain('Simulated image read error');
expect(result.returnDisplay).toContain('Simulated image read error');
});
it('should process an image file', async () => {
const fakePngData = Buffer.from('fake png data');
actualNodeFs.writeFileSync(testImageFilePath, fakePngData);
mockMimeLookup.mockReturnValue('image/png');
const result = await processSingleFileContent(
testImageFilePath,
tempRootDir,
);
expect(
(result.llmContent as { inlineData: unknown }).inlineData,
).toBeDefined();
expect(
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
.mimeType,
).toBe('image/png');
expect(
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
).toBe(fakePngData.toString('base64'));
expect(result.returnDisplay).toContain('Read image file: image.png');
});
it('should process a PDF file', async () => {
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeLookup.mockReturnValue('application/pdf');
const result = await processSingleFileContent(
testPdfFilePath,
tempRootDir,
);
expect(
(result.llmContent as { inlineData: unknown }).inlineData,
).toBeDefined();
expect(
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
.mimeType,
).toBe('application/pdf');
expect(
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
).toBe(fakePdfData.toString('base64'));
expect(result.returnDisplay).toContain('Read pdf file: document.pdf');
});
it('should skip binary files', async () => {
actualNodeFs.writeFileSync(
testBinaryFilePath,
Buffer.from([0x00, 0x01, 0x02]),
);
mockMimeLookup.mockReturnValueOnce('application/octet-stream');
// isBinaryFile will operate on the real file.
const result = await processSingleFileContent(
testBinaryFilePath,
tempRootDir,
);
expect(result.llmContent).toContain(
'Cannot display content of binary file',
);
expect(result.returnDisplay).toContain('Skipped binary file: app.exe');
});
it('should handle path being a directory', async () => {
const result = await processSingleFileContent(directoryPath, tempRootDir);
expect(result.error).toContain('Path is a directory');
expect(result.returnDisplay).toContain('Path is a directory');
});
it('should paginate text files correctly (offset and limit)', async () => {
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
5,
5,
); // Read lines 6-10
const expectedContent = lines.slice(5, 10).join('\n');
expect(result.llmContent).toContain(expectedContent);
expect(result.llmContent).toContain(
'[File content truncated: showing lines 6-10 of 20 total lines. Use offset/limit parameters to view more.]',
);
expect(result.returnDisplay).toContain(
'Read text file: test.txt (truncated)',
);
expect(result.isTruncated).toBe(true);
expect(result.originalLineCount).toBe(20);
expect(result.linesShown).toEqual([6, 10]);
});
it('should handle limit exceeding file length', async () => {
const lines = ['Line 1', 'Line 2'];
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
0,
10,
);
const expectedContent = lines.join('\n');
expect(result.llmContent).toBe(expectedContent);
expect(result.returnDisplay).toContain('Read text file: test.txt');
expect(result.isTruncated).toBe(false);
expect(result.originalLineCount).toBe(2);
expect(result.linesShown).toEqual([1, 2]);
});
it('should truncate long lines in text files', async () => {
const longLine = 'a'.repeat(2500);
actualNodeFs.writeFileSync(
testTextFilePath,
`Short line\n${longLine}\nAnother short line`,
);
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
);
expect(result.llmContent).toContain('Short line');
expect(result.llmContent).toContain(
longLine.substring(0, 2000) + '... [truncated]',
);
expect(result.llmContent).toContain('Another short line');
expect(result.llmContent).toContain(
'[File content partially truncated: some lines exceeded maximum length of 2000 characters.]',
);
expect(result.isTruncated).toBe(true);
});
});
});

View File

@ -0,0 +1,280 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'fs';
import path from 'path';
import { PartUnion } from '@google/genai';
import mime from 'mime-types';
// Constants for text file processing
const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
const MAX_LINE_LENGTH_TEXT_FILE = 2000;
// Default values for encoding and separator format
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
/**
* Checks if a path is within a given root directory.
* @param pathToCheck The absolute path to check.
* @param rootDirectory The absolute root directory.
* @returns True if the path is within the root directory, false otherwise.
*/
export function isWithinRoot(
pathToCheck: string,
rootDirectory: string,
): boolean {
const normalizedPathToCheck = path.normalize(pathToCheck);
const normalizedRootDirectory = path.normalize(rootDirectory);
// Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
// unless it's the root path itself (e.g., '/' or 'C:\').
const rootWithSeparator =
normalizedRootDirectory === path.sep ||
normalizedRootDirectory.endsWith(path.sep)
? normalizedRootDirectory
: normalizedRootDirectory + path.sep;
return (
normalizedPathToCheck === normalizedRootDirectory ||
normalizedPathToCheck.startsWith(rootWithSeparator)
);
}
/**
* Determines if a file is likely binary based on content sampling.
* @param filePath Path to the file.
* @returns True if the file appears to be binary.
*/
export function isBinaryFile(filePath: string): boolean {
try {
const fd = fs.openSync(filePath, 'r');
// Read up to 4KB or file size, whichever is smaller
const fileSize = fs.fstatSync(fd).size;
if (fileSize === 0) {
// Empty file is not considered binary for content checking
fs.closeSync(fd);
return false;
}
const bufferSize = Math.min(4096, fileSize);
const buffer = Buffer.alloc(bufferSize);
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
fs.closeSync(fd);
if (bytesRead === 0) return false;
let nonPrintableCount = 0;
for (let i = 0; i < bytesRead; i++) {
if (buffer[i] === 0) return true; // Null byte is a strong indicator
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
nonPrintableCount++;
}
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / bytesRead > 0.3;
} catch {
// If any error occurs (e.g. file not found, permissions),
// treat as not binary here; let higher-level functions handle existence/access errors.
return false;
}
}
/**
* Detects the type of file based on extension and content.
* @param filePath Path to the file.
* @returns 'text', 'image', 'pdf', or 'binary'.
*/
export function detectFileType(
filePath: string,
): 'text' | 'image' | 'pdf' | 'binary' {
const ext = path.extname(filePath).toLowerCase();
const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string
if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) {
return 'image';
}
if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') {
return 'pdf';
}
// Stricter binary check for common non-text extensions before content check
// These are often not well-covered by mime-types or might be misidentified.
if (
[
'.zip',
'.tar',
'.gz',
'.exe',
'.dll',
'.so',
'.class',
'.jar',
'.war',
'.7z',
'.doc',
'.docx',
'.xls',
'.xlsx',
'.ppt',
'.pptx',
'.odt',
'.ods',
'.odp',
'.bin',
'.dat',
'.obj',
'.o',
'.a',
'.lib',
'.wasm',
'.pyc',
'.pyo',
].includes(ext)
) {
return 'binary';
}
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
// and it's not a known binary extension.
if (isBinaryFile(filePath)) {
return 'binary';
}
return 'text';
}
export interface ProcessedFileReadResult {
llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
returnDisplay: string;
error?: string; // Optional error message for the LLM if file processing failed
isTruncated?: boolean; // For text files, indicates if content was truncated
originalLineCount?: number; // For text files
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
}
/**
* Reads and processes a single file, handling text, images, and PDFs.
* @param filePath Absolute path to the file.
* @param rootDirectory Absolute path to the project root for relative path display.
* @param offset Optional offset for text files (0-based line number).
* @param limit Optional limit for text files (number of lines to read).
* @returns ProcessedFileReadResult object.
*/
export async function processSingleFileContent(
filePath: string,
rootDirectory: string,
offset?: number,
limit?: number,
): Promise<ProcessedFileReadResult> {
try {
if (!fs.existsSync(filePath)) {
// Sync check is acceptable before async read
return {
llmContent: '',
returnDisplay: 'File not found.',
error: `File not found: ${filePath}`,
};
}
const stats = fs.statSync(filePath); // Sync check
if (stats.isDirectory()) {
return {
llmContent: '',
returnDisplay: 'Path is a directory.',
error: `Path is a directory, not a file: ${filePath}`,
};
}
const fileType = detectFileType(filePath);
const relativePathForDisplay = path
.relative(rootDirectory, filePath)
.replace(/\\/g, '/');
switch (fileType) {
case 'binary': {
return {
llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
};
}
case 'text': {
const content = await fs.promises.readFile(filePath, 'utf8');
const lines = content.split('\n');
const originalLineCount = lines.length;
const startLine = offset || 0;
const effectiveLimit =
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
// Ensure endLine does not exceed originalLineCount
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
const actualStartLine = Math.min(startLine, originalLineCount);
const selectedLines = lines.slice(actualStartLine, endLine);
let linesWereTruncatedInLength = false;
const formattedLines = selectedLines.map((line) => {
if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
linesWereTruncatedInLength = true;
return (
line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
);
}
return line;
});
const contentRangeTruncated = endLine < originalLineCount;
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
let llmTextContent = '';
if (contentRangeTruncated) {
llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
} else if (linesWereTruncatedInLength) {
llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
}
llmTextContent += formattedLines.join('\n');
return {
llmContent: llmTextContent,
returnDisplay: `Read text file: ${relativePathForDisplay}${isTruncated ? ' (truncated)' : ''}`,
isTruncated,
originalLineCount,
linesShown: [actualStartLine + 1, endLine],
};
}
case 'image':
case 'pdf': {
const contentBuffer = await fs.promises.readFile(filePath);
const base64Data = contentBuffer.toString('base64');
return {
llmContent: {
inlineData: {
data: base64Data,
mimeType: mime.lookup(filePath) || 'application/octet-stream',
},
},
returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
};
}
default: {
// Should not happen with current detectFileType logic
const exhaustiveCheck: never = fileType;
return {
llmContent: `Unhandled file type: ${exhaustiveCheck}`,
returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
error: `Unhandled file type for ${filePath}`,
};
}
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
const displayPath = path
.relative(rootDirectory, filePath)
.replace(/\\/g, '/');
return {
llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
error: `Error reading file ${filePath}: ${errorMessage}`,
};
}
}