Refactor read-file and support images. (#480)
This commit is contained in:
parent
f21abdd1f0
commit
dab7517622
|
@ -83,6 +83,66 @@
|
|||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Debug Server Test: read-file",
|
||||
"runtimeExecutable": "npm",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"test",
|
||||
"-w",
|
||||
"packages/server",
|
||||
"--",
|
||||
"--inspect-brk=9229",
|
||||
"--no-file-parallelism",
|
||||
"${workspaceFolder}/packages/server/src/tools/read-file.test.ts"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Debug Server Test: turn",
|
||||
"runtimeExecutable": "npm",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"test",
|
||||
"-w",
|
||||
"packages/server",
|
||||
"--",
|
||||
"--inspect-brk=9229",
|
||||
"--no-file-parallelism",
|
||||
"${workspaceFolder}/packages/server/src/core/turn.test.ts"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Debug Server Test: fileUtils",
|
||||
"runtimeExecutable": "npm",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"test",
|
||||
"-w",
|
||||
"packages/server",
|
||||
"--",
|
||||
"--inspect-brk=9229",
|
||||
"--no-file-parallelism",
|
||||
"${workspaceFolder}/packages/server/src/utils/fileUtils.test.ts"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
|
@ -122,6 +182,26 @@
|
|||
"console": "integratedTerminal",
|
||||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Debug CLI Test: useGeminiStream",
|
||||
"runtimeExecutable": "npm",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"test",
|
||||
"-w",
|
||||
"packages/cli",
|
||||
"--",
|
||||
"--inspect-brk=9229",
|
||||
"--no-file-parallelism",
|
||||
"${workspaceFolder}/packages/cli/src/ui/hooks/useGeminiStream.test.tsx"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"internalConsoleOptions": "neverOpen",
|
||||
"skipFiles": ["<node_internals>/**"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -25,17 +25,20 @@ All file system tools operate within a `rootDirectory` (usually the current work
|
|||
- **Tool Name:** `read_file`
|
||||
- **Display Name:** ReadFile
|
||||
- **File:** `read-file.ts`
|
||||
- **Description:** Reads and returns the content of a specified file. It can handle large files by allowing reading of specific line ranges and will attempt to detect and skip binary files.
|
||||
- **Description:** Reads and returns the content of a specified file. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped.
|
||||
- **Parameters:**
|
||||
- `path` (string, required): The absolute path to the file to read.
|
||||
- `offset` (number, optional): The 0-based line number to start reading from. Requires `limit` to be set.
|
||||
- `limit` (number, optional): The maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines).
|
||||
- `offset` (number, optional): For text files, the 0-based line number to start reading from. Requires `limit` to be set.
|
||||
- `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible.
|
||||
- **Behavior:**
|
||||
- Returns the content of the specified text file.
|
||||
- If `offset` and `limit` are used, returns only that slice of lines.
|
||||
- Indicates if the content was truncated due to line limits or line length limits.
|
||||
- Attempts to identify binary files (images, executables) and returns a message indicating it's a binary file instead of its content.
|
||||
- **Output (`llmContent`):** The file content, potentially prefixed with a truncation message (e.g., `[File truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). For binary files: `Binary file: /path/to/image.png (image)`.
|
||||
- For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits.
|
||||
- For image and PDF files: Returns the file content as a base64 encoded data structure suitable for model consumption.
|
||||
- For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file.
|
||||
- **Output:** (`llmContent`):
|
||||
- For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`).
|
||||
- For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`).
|
||||
- For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`.
|
||||
- **Confirmation:** No.
|
||||
- **Confirmation:** No.
|
||||
|
||||
## 3. `write_file` (WriteFile)
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { mergePartListUnions } from './useGeminiStream.js';
|
||||
import { Part, PartListUnion } from '@google/genai';
|
||||
|
||||
// Mock useToolScheduler
|
||||
vi.mock('./useToolScheduler', async () => {
|
||||
const actual = await vi.importActual('./useToolScheduler');
|
||||
return {
|
||||
...actual, // We need mapToDisplay from actual
|
||||
useToolScheduler: vi.fn(),
|
||||
};
|
||||
});
|
||||
|
||||
describe('mergePartListUnions', () => {
|
||||
it('should merge multiple PartListUnion arrays', () => {
|
||||
const list1: PartListUnion = [{ text: 'Hello' }];
|
||||
const list2: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
|
||||
];
|
||||
const list3: PartListUnion = [{ text: 'World' }, { text: '!' }];
|
||||
const result = mergePartListUnions([list1, list2, list3]);
|
||||
expect(result).toEqual([
|
||||
{ text: 'Hello' },
|
||||
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
|
||||
{ text: 'World' },
|
||||
{ text: '!' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle empty arrays in the input list', () => {
|
||||
const list1: PartListUnion = [{ text: 'First' }];
|
||||
const list2: PartListUnion = [];
|
||||
const list3: PartListUnion = [{ text: 'Last' }];
|
||||
const result = mergePartListUnions([list1, list2, list3]);
|
||||
expect(result).toEqual([{ text: 'First' }, { text: 'Last' }]);
|
||||
});
|
||||
|
||||
it('should handle a single PartListUnion array', () => {
|
||||
const list1: PartListUnion = [
|
||||
{ text: 'One' },
|
||||
{ inlineData: { mimeType: 'image/jpeg', data: 'xyz' } },
|
||||
];
|
||||
const result = mergePartListUnions([list1]);
|
||||
expect(result).toEqual(list1);
|
||||
});
|
||||
|
||||
it('should return an empty array if all input arrays are empty', () => {
|
||||
const list1: PartListUnion = [];
|
||||
const list2: PartListUnion = [];
|
||||
const result = mergePartListUnions([list1, list2]);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle input list being empty', () => {
|
||||
const result = mergePartListUnions([]);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should correctly merge when PartListUnion items are single Parts not in arrays', () => {
|
||||
const part1: Part = { text: 'Single part 1' };
|
||||
const part2: Part = { inlineData: { mimeType: 'image/gif', data: 'gif' } };
|
||||
const listContainingSingleParts: PartListUnion[] = [
|
||||
part1,
|
||||
[part2],
|
||||
{ text: 'Another single part' },
|
||||
];
|
||||
const result = mergePartListUnions(listContainingSingleParts);
|
||||
expect(result).toEqual([
|
||||
{ text: 'Single part 1' },
|
||||
{ inlineData: { mimeType: 'image/gif', data: 'gif' } },
|
||||
{ text: 'Another single part' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle a mix of arrays and single parts, including empty arrays and undefined/null parts if they were possible (though PartListUnion typing restricts this)', () => {
|
||||
const list1: PartListUnion = [{ text: 'A' }];
|
||||
const list2: PartListUnion = [];
|
||||
const part3: Part = { text: 'B' };
|
||||
const list4: PartListUnion = [
|
||||
{ text: 'C' },
|
||||
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
|
||||
];
|
||||
const result = mergePartListUnions([list1, list2, part3, list4]);
|
||||
expect(result).toEqual([
|
||||
{ text: 'A' },
|
||||
{ text: 'B' },
|
||||
{ text: 'C' },
|
||||
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should preserve the order of parts from the input arrays', () => {
|
||||
const listA: PartListUnion = [{ text: '1' }, { text: '2' }];
|
||||
const listB: PartListUnion = [{ text: '3' }];
|
||||
const listC: PartListUnion = [{ text: '4' }, { text: '5' }];
|
||||
const result = mergePartListUnions([listA, listB, listC]);
|
||||
expect(result).toEqual([
|
||||
{ text: '1' },
|
||||
{ text: '2' },
|
||||
{ text: '3' },
|
||||
{ text: '4' },
|
||||
{ text: '5' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle cases where some PartListUnion items are single Parts and others are arrays of Parts', () => {
|
||||
const singlePart1: Part = { text: 'First single' };
|
||||
const arrayPart1: Part[] = [
|
||||
{ text: 'Array item 1' },
|
||||
{ text: 'Array item 2' },
|
||||
];
|
||||
const singlePart2: Part = {
|
||||
inlineData: { mimeType: 'application/json', data: 'e30=' },
|
||||
}; // {}
|
||||
const arrayPart2: Part[] = [{ text: 'Last array item' }];
|
||||
|
||||
const result = mergePartListUnions([
|
||||
singlePart1,
|
||||
arrayPart1,
|
||||
singlePart2,
|
||||
arrayPart2,
|
||||
]);
|
||||
expect(result).toEqual([
|
||||
{ text: 'First single' },
|
||||
{ text: 'Array item 1' },
|
||||
{ text: 'Array item 2' },
|
||||
{ inlineData: { mimeType: 'application/json', data: 'e30=' } },
|
||||
{ text: 'Last array item' },
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -41,6 +41,18 @@ import { useLogger } from './useLogger.js';
|
|||
import { useToolScheduler, mapToDisplay } from './useToolScheduler.js';
|
||||
import { GeminiChat } from '@gemini-code/server/src/core/geminiChat.js';
|
||||
|
||||
export function mergePartListUnions(list: PartListUnion[]): PartListUnion {
|
||||
const resultParts: PartListUnion = [];
|
||||
for (const item of list) {
|
||||
if (Array.isArray(item)) {
|
||||
resultParts.push(...item);
|
||||
} else {
|
||||
resultParts.push(item);
|
||||
}
|
||||
}
|
||||
return resultParts;
|
||||
}
|
||||
|
||||
enum StreamProcessingStatus {
|
||||
Completed,
|
||||
UserCancelled,
|
||||
|
@ -74,16 +86,16 @@ export const useGeminiStream = (
|
|||
(tools) => {
|
||||
if (tools.length) {
|
||||
addItem(mapToDisplay(tools), Date.now());
|
||||
submitQuery(
|
||||
tools
|
||||
const toolResponses = tools
|
||||
.filter(
|
||||
(t) =>
|
||||
t.status === 'error' ||
|
||||
t.status === 'cancelled' ||
|
||||
t.status === 'success',
|
||||
)
|
||||
.map((t) => t.response.responsePart),
|
||||
);
|
||||
.map((t) => t.response.responseParts);
|
||||
|
||||
submitQuery(mergePartListUnions(toolResponses));
|
||||
}
|
||||
},
|
||||
config,
|
||||
|
@ -313,7 +325,7 @@ export const useGeminiStream = (
|
|||
};
|
||||
const responseInfo: ToolCallResponseInfo = {
|
||||
callId: request.callId,
|
||||
responsePart: functionResponse,
|
||||
responseParts: functionResponse,
|
||||
resultDisplay,
|
||||
error: new Error(declineMessage),
|
||||
};
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { formatLlmContentForFunctionResponse } from './useToolScheduler.js';
|
||||
import { Part, PartListUnion } from '@google/genai';
|
||||
|
||||
describe('formatLlmContentForFunctionResponse', () => {
|
||||
it('should handle simple string llmContent', () => {
|
||||
const llmContent = 'Simple text output';
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({ output: 'Simple text output' });
|
||||
expect(additionalParts).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle llmContent as a single Part with text', () => {
|
||||
const llmContent: Part = { text: 'Text from Part object' };
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({ output: 'Text from Part object' });
|
||||
expect(additionalParts).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle llmContent as a PartListUnion array with a single text Part', () => {
|
||||
const llmContent: PartListUnion = [{ text: 'Text from array' }];
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({ output: 'Text from array' });
|
||||
expect(additionalParts).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle llmContent with inlineData', () => {
|
||||
const llmContent: Part = {
|
||||
inlineData: { mimeType: 'image/png', data: 'base64...' },
|
||||
};
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Binary content of type image/png was processed.',
|
||||
});
|
||||
expect(additionalParts).toEqual([llmContent]);
|
||||
});
|
||||
|
||||
it('should handle llmContent with fileData', () => {
|
||||
const llmContent: Part = {
|
||||
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
|
||||
};
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Binary content of type application/pdf was processed.',
|
||||
});
|
||||
expect(additionalParts).toEqual([llmContent]);
|
||||
});
|
||||
|
||||
it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
|
||||
const llmContent: PartListUnion = [
|
||||
{ text: 'Some textual description' },
|
||||
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
|
||||
{ text: 'Another text part' },
|
||||
];
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Tool execution succeeded.',
|
||||
});
|
||||
expect(additionalParts).toEqual(llmContent);
|
||||
});
|
||||
|
||||
it('should handle llmContent as an array with a single inlineData Part', () => {
|
||||
const llmContent: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
|
||||
];
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
// When the array is a single Part and that part is inlineData
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Binary content of type image/gif was processed.',
|
||||
});
|
||||
expect(additionalParts).toEqual(llmContent);
|
||||
});
|
||||
|
||||
it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
|
||||
// This case might represent a malformed or unexpected Part type.
|
||||
// For example, a Part that is just an empty object or has other properties.
|
||||
const llmContent: Part = { functionCall: { name: 'test', args: {} } }; // Example of a non-standard part for this context
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Tool execution succeeded.',
|
||||
});
|
||||
expect(additionalParts).toEqual([llmContent]);
|
||||
});
|
||||
|
||||
it('should handle empty string llmContent', () => {
|
||||
const llmContent = '';
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({ output: '' });
|
||||
expect(additionalParts).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle llmContent as an empty array', () => {
|
||||
const llmContent: PartListUnion = [];
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Tool execution succeeded.',
|
||||
});
|
||||
expect(additionalParts).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
|
||||
const llmContent: Part = {}; // An empty part object
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(llmContent);
|
||||
expect(functionResponseJson).toEqual({
|
||||
status: 'Tool execution succeeded.',
|
||||
});
|
||||
expect(additionalParts).toEqual([llmContent]);
|
||||
});
|
||||
});
|
|
@ -13,7 +13,7 @@ import {
|
|||
ToolCallConfirmationDetails,
|
||||
ToolResult,
|
||||
} from '@gemini-code/server';
|
||||
import { Part } from '@google/genai';
|
||||
import { Part, PartUnion, PartListUnion } from '@google/genai';
|
||||
import { useCallback, useEffect, useState } from 'react';
|
||||
import {
|
||||
HistoryItemToolGroup,
|
||||
|
@ -88,6 +88,60 @@ export type CompletedToolCall =
|
|||
| CancelledToolCall
|
||||
| ErroredToolCall;
|
||||
|
||||
/**
|
||||
* Formats a PartListUnion response from a tool into JSON suitable for a Gemini
|
||||
* FunctionResponse and additional Parts to include after that response.
|
||||
*
|
||||
* This is required because FunctionReponse appears to only support JSON
|
||||
* and not arbitrary parts. Including parts like inlineData or fileData
|
||||
* directly in a FunctionResponse confuses the model resulting in a failure
|
||||
* to interpret the multimodal content and context window exceeded errors.
|
||||
*/
|
||||
|
||||
export function formatLlmContentForFunctionResponse(
|
||||
llmContent: PartListUnion,
|
||||
): {
|
||||
functionResponseJson: Record<string, string>;
|
||||
additionalParts: PartUnion[];
|
||||
} {
|
||||
const additionalParts: PartUnion[] = [];
|
||||
let functionResponseJson: Record<string, string>;
|
||||
|
||||
if (Array.isArray(llmContent) && llmContent.length === 1) {
|
||||
// Ensure that length 1 arrays are treated as a single Part.
|
||||
llmContent = llmContent[0];
|
||||
}
|
||||
|
||||
if (typeof llmContent === 'string') {
|
||||
functionResponseJson = { output: llmContent };
|
||||
} else if (Array.isArray(llmContent)) {
|
||||
functionResponseJson = { status: 'Tool execution succeeded.' };
|
||||
additionalParts.push(...llmContent);
|
||||
} else {
|
||||
if (
|
||||
llmContent.inlineData !== undefined ||
|
||||
llmContent.fileData !== undefined
|
||||
) {
|
||||
// For Parts like inlineData or fileData, use the returnDisplay as the textual output for the functionResponse.
|
||||
// The actual Part will be added to additionalParts.
|
||||
functionResponseJson = {
|
||||
status: `Binary content of type ${llmContent.inlineData?.mimeType || llmContent.fileData?.mimeType || 'unknown'} was processed.`,
|
||||
};
|
||||
additionalParts.push(llmContent);
|
||||
} else if (llmContent.text !== undefined) {
|
||||
functionResponseJson = { output: llmContent.text };
|
||||
} else {
|
||||
functionResponseJson = { status: 'Tool execution succeeded.' };
|
||||
additionalParts.push(llmContent);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
functionResponseJson,
|
||||
additionalParts,
|
||||
};
|
||||
}
|
||||
|
||||
export function useToolScheduler(
|
||||
onComplete: (tools: CompletedToolCall[]) => void,
|
||||
config: Config,
|
||||
|
@ -201,7 +255,7 @@ export function useToolScheduler(
|
|||
status: 'cancelled',
|
||||
response: {
|
||||
callId: c.request.callId,
|
||||
responsePart: {
|
||||
responseParts: {
|
||||
functionResponse: {
|
||||
id: c.request.callId,
|
||||
name: c.request.name,
|
||||
|
@ -276,21 +330,24 @@ export function useToolScheduler(
|
|||
.execute(t.request.args, signal, onOutputChunk)
|
||||
.then((result: ToolResult) => {
|
||||
if (signal.aborted) {
|
||||
// TODO(jacobr): avoid stringifying the LLM content.
|
||||
setToolCalls(
|
||||
setStatus(callId, 'cancelled', String(result.llmContent)),
|
||||
);
|
||||
return;
|
||||
}
|
||||
const { functionResponseJson, additionalParts } =
|
||||
formatLlmContentForFunctionResponse(result.llmContent);
|
||||
const functionResponse: Part = {
|
||||
functionResponse: {
|
||||
name: t.request.name,
|
||||
id: callId,
|
||||
response: { output: result.llmContent },
|
||||
response: functionResponseJson,
|
||||
},
|
||||
};
|
||||
const response: ToolCallResponseInfo = {
|
||||
callId,
|
||||
responsePart: functionResponse,
|
||||
responseParts: [functionResponse, ...additionalParts],
|
||||
resultDisplay: result.returnDisplay,
|
||||
error: undefined,
|
||||
};
|
||||
|
@ -401,7 +458,7 @@ function setStatus(
|
|||
status: 'cancelled',
|
||||
response: {
|
||||
callId: t.request.callId,
|
||||
responsePart: {
|
||||
responseParts: {
|
||||
functionResponse: {
|
||||
id: t.request.callId,
|
||||
name: t.request.name,
|
||||
|
@ -446,7 +503,7 @@ const toolErrorResponse = (
|
|||
): ToolCallResponseInfo => ({
|
||||
callId: request.callId,
|
||||
error,
|
||||
responsePart: {
|
||||
responseParts: {
|
||||
functionResponse: {
|
||||
id: request.callId,
|
||||
name: request.name,
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
*/
|
||||
|
||||
import {
|
||||
Part,
|
||||
PartListUnion,
|
||||
GenerateContentResponse,
|
||||
FunctionCall,
|
||||
|
@ -57,7 +56,7 @@ export interface ToolCallRequestInfo {
|
|||
|
||||
export interface ToolCallResponseInfo {
|
||||
callId: string;
|
||||
responsePart: Part;
|
||||
responseParts: PartListUnion;
|
||||
resultDisplay: ToolResultDisplay | undefined;
|
||||
error: Error | undefined;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { vi, describe, it, expect, beforeEach, afterEach, Mock } from 'vitest';
|
||||
import { ReadFileTool, ReadFileToolParams } from './read-file.js';
|
||||
import * as fileUtils from '../utils/fileUtils.js';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import fs from 'fs'; // For actual fs operations in setup
|
||||
|
||||
// Mock fileUtils.processSingleFileContent
|
||||
vi.mock('../utils/fileUtils', async () => {
|
||||
const actualFileUtils =
|
||||
await vi.importActual<typeof fileUtils>('../utils/fileUtils');
|
||||
return {
|
||||
...actualFileUtils, // Spread actual implementations
|
||||
processSingleFileContent: vi.fn(), // Mock specific function
|
||||
};
|
||||
});
|
||||
|
||||
const mockProcessSingleFileContent = fileUtils.processSingleFileContent as Mock;
|
||||
|
||||
describe('ReadFileTool', () => {
|
||||
let tempRootDir: string;
|
||||
let tool: ReadFileTool;
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
beforeEach(() => {
|
||||
// Create a unique temporary root directory for each test run
|
||||
tempRootDir = fs.mkdtempSync(
|
||||
path.join(os.tmpdir(), 'read-file-tool-root-'),
|
||||
);
|
||||
tool = new ReadFileTool(tempRootDir);
|
||||
mockProcessSingleFileContent.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clean up the temporary root directory
|
||||
if (fs.existsSync(tempRootDir)) {
|
||||
fs.rmSync(tempRootDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe('validateToolParams', () => {
|
||||
it('should return null for valid params (absolute path within root)', () => {
|
||||
const params: ReadFileToolParams = {
|
||||
path: path.join(tempRootDir, 'test.txt'),
|
||||
};
|
||||
expect(tool.validateToolParams(params)).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null for valid params with offset and limit', () => {
|
||||
const params: ReadFileToolParams = {
|
||||
path: path.join(tempRootDir, 'test.txt'),
|
||||
offset: 0,
|
||||
limit: 10,
|
||||
};
|
||||
expect(tool.validateToolParams(params)).toBeNull();
|
||||
});
|
||||
|
||||
it('should return error for relative path', () => {
|
||||
const params: ReadFileToolParams = { path: 'test.txt' };
|
||||
expect(tool.validateToolParams(params)).toMatch(
|
||||
/File path must be absolute/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error for path outside root', () => {
|
||||
const outsidePath = path.resolve(os.tmpdir(), 'outside-root.txt');
|
||||
const params: ReadFileToolParams = { path: outsidePath };
|
||||
expect(tool.validateToolParams(params)).toMatch(
|
||||
/File path must be within the root directory/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error for negative offset', () => {
|
||||
const params: ReadFileToolParams = {
|
||||
path: path.join(tempRootDir, 'test.txt'),
|
||||
offset: -1,
|
||||
limit: 10,
|
||||
};
|
||||
expect(tool.validateToolParams(params)).toBe(
|
||||
'Offset must be a non-negative number',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error for non-positive limit', () => {
|
||||
const paramsZero: ReadFileToolParams = {
|
||||
path: path.join(tempRootDir, 'test.txt'),
|
||||
offset: 0,
|
||||
limit: 0,
|
||||
};
|
||||
expect(tool.validateToolParams(paramsZero)).toBe(
|
||||
'Limit must be a positive number',
|
||||
);
|
||||
const paramsNegative: ReadFileToolParams = {
|
||||
path: path.join(tempRootDir, 'test.txt'),
|
||||
offset: 0,
|
||||
limit: -5,
|
||||
};
|
||||
expect(tool.validateToolParams(paramsNegative)).toBe(
|
||||
'Limit must be a positive number',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error for schema validation failure (e.g. missing path)', () => {
|
||||
const params = { offset: 0 } as unknown as ReadFileToolParams;
|
||||
expect(tool.validateToolParams(params)).toBe(
|
||||
'Parameters failed schema validation.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getDescription', () => {
|
||||
it('should return a shortened, relative path', () => {
|
||||
const filePath = path.join(tempRootDir, 'sub', 'dir', 'file.txt');
|
||||
const params: ReadFileToolParams = { path: filePath };
|
||||
// Assuming tempRootDir is something like /tmp/read-file-tool-root-XXXXXX
|
||||
// The relative path would be sub/dir/file.txt
|
||||
expect(tool.getDescription(params)).toBe('sub/dir/file.txt');
|
||||
});
|
||||
|
||||
it('should return . if path is the root directory', () => {
|
||||
const params: ReadFileToolParams = { path: tempRootDir };
|
||||
expect(tool.getDescription(params)).toBe('.');
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute', () => {
|
||||
it('should return validation error if params are invalid', async () => {
|
||||
const params: ReadFileToolParams = { path: 'relative/path.txt' };
|
||||
const result = await tool.execute(params, abortSignal);
|
||||
expect(result.llmContent).toMatch(/Error: Invalid parameters provided/);
|
||||
expect(result.returnDisplay).toMatch(/File path must be absolute/);
|
||||
});
|
||||
|
||||
it('should return error from processSingleFileContent if it fails', async () => {
|
||||
const filePath = path.join(tempRootDir, 'error.txt');
|
||||
const params: ReadFileToolParams = { path: filePath };
|
||||
const errorMessage = 'Simulated read error';
|
||||
mockProcessSingleFileContent.mockResolvedValue({
|
||||
llmContent: `Error reading file ${filePath}: ${errorMessage}`,
|
||||
returnDisplay: `Error reading file ${filePath}: ${errorMessage}`,
|
||||
error: errorMessage,
|
||||
});
|
||||
|
||||
const result = await tool.execute(params, abortSignal);
|
||||
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
|
||||
filePath,
|
||||
tempRootDir,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
expect(result.llmContent).toContain(errorMessage);
|
||||
expect(result.returnDisplay).toContain(errorMessage);
|
||||
});
|
||||
|
||||
it('should return success result for a text file', async () => {
|
||||
const filePath = path.join(tempRootDir, 'textfile.txt');
|
||||
const fileContent = 'This is a test file.';
|
||||
const params: ReadFileToolParams = { path: filePath };
|
||||
mockProcessSingleFileContent.mockResolvedValue({
|
||||
llmContent: fileContent,
|
||||
returnDisplay: `Read text file: ${path.basename(filePath)}`,
|
||||
});
|
||||
|
||||
const result = await tool.execute(params, abortSignal);
|
||||
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
|
||||
filePath,
|
||||
tempRootDir,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
expect(result.llmContent).toBe(fileContent);
|
||||
expect(result.returnDisplay).toBe(
|
||||
`Read text file: ${path.basename(filePath)}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return success result for an image file', async () => {
|
||||
const filePath = path.join(tempRootDir, 'image.png');
|
||||
const imageData = {
|
||||
inlineData: { mimeType: 'image/png', data: 'base64...' },
|
||||
};
|
||||
const params: ReadFileToolParams = { path: filePath };
|
||||
mockProcessSingleFileContent.mockResolvedValue({
|
||||
llmContent: imageData,
|
||||
returnDisplay: `Read image file: ${path.basename(filePath)}`,
|
||||
});
|
||||
|
||||
const result = await tool.execute(params, abortSignal);
|
||||
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
|
||||
filePath,
|
||||
tempRootDir,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
expect(result.llmContent).toEqual(imageData);
|
||||
expect(result.returnDisplay).toBe(
|
||||
`Read image file: ${path.basename(filePath)}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should pass offset and limit to processSingleFileContent', async () => {
|
||||
const filePath = path.join(tempRootDir, 'paginated.txt');
|
||||
const params: ReadFileToolParams = {
|
||||
path: filePath,
|
||||
offset: 10,
|
||||
limit: 5,
|
||||
};
|
||||
mockProcessSingleFileContent.mockResolvedValue({
|
||||
llmContent: 'some lines',
|
||||
returnDisplay: 'Read text file (paginated)',
|
||||
});
|
||||
|
||||
await tool.execute(params, abortSignal);
|
||||
expect(mockProcessSingleFileContent).toHaveBeenCalledWith(
|
||||
filePath,
|
||||
tempRootDir,
|
||||
10,
|
||||
5,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -4,11 +4,11 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { SchemaValidator } from '../utils/schemaValidator.js';
|
||||
import { makeRelative, shortenPath } from '../utils/paths.js';
|
||||
import { BaseTool, ToolResult } from './tools.js';
|
||||
import { isWithinRoot, processSingleFileContent } from '../utils/fileUtils.js';
|
||||
|
||||
/**
|
||||
* Parameters for the ReadFile tool
|
||||
|
@ -35,14 +35,12 @@ export interface ReadFileToolParams {
|
|||
*/
|
||||
export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
||||
static readonly Name: string = 'read_file';
|
||||
private static readonly DEFAULT_MAX_LINES = 2000;
|
||||
private static readonly MAX_LINE_LENGTH = 2000;
|
||||
|
||||
constructor(private rootDirectory: string) {
|
||||
super(
|
||||
ReadFileTool.Name,
|
||||
'ReadFile',
|
||||
'Reads and returns the content of a specified file from the local filesystem. Handles large files by allowing reading specific line ranges.',
|
||||
'Reads and returns the content of a specified file from the local filesystem. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.',
|
||||
{
|
||||
properties: {
|
||||
path: {
|
||||
|
@ -52,12 +50,12 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
|||
},
|
||||
offset: {
|
||||
description:
|
||||
"Optional: The 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
|
||||
"Optional: For text files, the 0-based line number to start reading from. Requires 'limit' to be set. Use for paginating through large files.",
|
||||
type: 'number',
|
||||
},
|
||||
limit: {
|
||||
description:
|
||||
"Optional: Maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible).",
|
||||
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
|
@ -68,28 +66,6 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
|||
this.rootDirectory = path.resolve(rootDirectory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a path is within the root directory
|
||||
* @param pathToCheck The path to check
|
||||
* @returns True if the path is within the root directory, false otherwise
|
||||
*/
|
||||
private isWithinRoot(pathToCheck: string): boolean {
|
||||
const normalizedPath = path.normalize(pathToCheck);
|
||||
const normalizedRoot = path.normalize(this.rootDirectory);
|
||||
const rootWithSep = normalizedRoot.endsWith(path.sep)
|
||||
? normalizedRoot
|
||||
: normalizedRoot + path.sep;
|
||||
return (
|
||||
normalizedPath === normalizedRoot ||
|
||||
normalizedPath.startsWith(rootWithSep)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates the parameters for the ReadFile tool
|
||||
* @param params Parameters to validate
|
||||
* @returns True if parameters are valid, false otherwise
|
||||
*/
|
||||
validateToolParams(params: ReadFileToolParams): string | null {
|
||||
if (
|
||||
this.schema.parameters &&
|
||||
|
@ -104,7 +80,7 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
|||
if (!path.isAbsolute(filePath)) {
|
||||
return `File path must be absolute: ${filePath}`;
|
||||
}
|
||||
if (!this.isWithinRoot(filePath)) {
|
||||
if (!isWithinRoot(filePath, this.rootDirectory)) {
|
||||
return `File path must be within the root directory (${this.rootDirectory}): ${filePath}`;
|
||||
}
|
||||
if (params.offset !== undefined && params.offset < 0) {
|
||||
|
@ -116,83 +92,11 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a file is likely binary based on content sampling
|
||||
* @param filePath Path to the file
|
||||
* @returns True if the file appears to be binary
|
||||
*/
|
||||
private isBinaryFile(filePath: string): boolean {
|
||||
try {
|
||||
// Read the first 4KB of the file
|
||||
const fd = fs.openSync(filePath, 'r');
|
||||
const buffer = Buffer.alloc(4096);
|
||||
const bytesRead = fs.readSync(fd, buffer, 0, 4096, 0);
|
||||
fs.closeSync(fd);
|
||||
|
||||
// Check for null bytes or high concentration of non-printable characters
|
||||
let nonPrintableCount = 0;
|
||||
for (let i = 0; i < bytesRead; i++) {
|
||||
// Null byte is a strong indicator of binary data
|
||||
if (buffer[i] === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Count non-printable characters
|
||||
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
|
||||
nonPrintableCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// If more than 30% are non-printable, likely binary
|
||||
return nonPrintableCount / bytesRead > 0.3;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the type of file based on extension and content
|
||||
* @param filePath Path to the file
|
||||
* @returns File type description
|
||||
*/
|
||||
private detectFileType(filePath: string): string {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
|
||||
// Common image formats
|
||||
if (
|
||||
['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'].includes(ext)
|
||||
) {
|
||||
return 'image';
|
||||
}
|
||||
|
||||
// Other known binary formats
|
||||
if (['.pdf', '.zip', '.tar', '.gz', '.exe', '.dll', '.so'].includes(ext)) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
// Check content for binary indicators
|
||||
if (this.isBinaryFile(filePath)) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
return 'text';
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a description of the file reading operation
|
||||
* @param params Parameters for the file reading
|
||||
* @returns A string describing the file being read
|
||||
*/
|
||||
getDescription(params: ReadFileToolParams): string {
|
||||
const relativePath = makeRelative(params.path, this.rootDirectory);
|
||||
return shortenPath(relativePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file and returns its contents with line numbers
|
||||
* @param params Parameters for the file reading
|
||||
* @returns Result with file contents
|
||||
*/
|
||||
async execute(
|
||||
params: ReadFileToolParams,
|
||||
_signal: AbortSignal,
|
||||
|
@ -205,75 +109,23 @@ export class ReadFileTool extends BaseTool<ReadFileToolParams, ToolResult> {
|
|||
};
|
||||
}
|
||||
|
||||
const filePath = params.path;
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
const result = await processSingleFileContent(
|
||||
params.path,
|
||||
this.rootDirectory,
|
||||
params.offset,
|
||||
params.limit,
|
||||
);
|
||||
|
||||
if (result.error) {
|
||||
return {
|
||||
llmContent: `File not found: ${filePath}`,
|
||||
returnDisplay: `File not found.`,
|
||||
llmContent: result.error, // The detailed error for LLM
|
||||
returnDisplay: result.returnDisplay, // User-friendly error
|
||||
};
|
||||
}
|
||||
|
||||
const stats = fs.statSync(filePath);
|
||||
if (stats.isDirectory()) {
|
||||
return {
|
||||
llmContent: `Path is a directory, not a file: ${filePath}`,
|
||||
returnDisplay: `File is directory.`,
|
||||
};
|
||||
}
|
||||
|
||||
const fileType = this.detectFileType(filePath);
|
||||
if (fileType !== 'text') {
|
||||
return {
|
||||
llmContent: `Binary file: ${filePath} (${fileType})`,
|
||||
// For binary files, maybe returnDisplay should be empty or indicate binary?
|
||||
// Keeping it empty for now.
|
||||
returnDisplay: ``,
|
||||
};
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
const startLine = params.offset || 0;
|
||||
const endLine = params.limit
|
||||
? startLine + params.limit
|
||||
: Math.min(startLine + ReadFileTool.DEFAULT_MAX_LINES, lines.length);
|
||||
const selectedLines = lines.slice(startLine, endLine);
|
||||
|
||||
let truncated = false;
|
||||
const formattedLines = selectedLines.map((line) => {
|
||||
let processedLine = line;
|
||||
if (line.length > ReadFileTool.MAX_LINE_LENGTH) {
|
||||
processedLine =
|
||||
line.substring(0, ReadFileTool.MAX_LINE_LENGTH) + '... [truncated]';
|
||||
truncated = true;
|
||||
}
|
||||
|
||||
return processedLine;
|
||||
});
|
||||
|
||||
const contentTruncated = endLine < lines.length || truncated;
|
||||
|
||||
let llmContent = '';
|
||||
if (contentTruncated) {
|
||||
llmContent += `[File truncated: showing lines ${startLine + 1}-${endLine} of ${lines.length} total lines. Use offset parameter to view more.]\n`;
|
||||
}
|
||||
llmContent += formattedLines.join('\n');
|
||||
|
||||
// Here, returnDisplay could potentially be enhanced, but for now,
|
||||
// it's kept empty as the LLM content itself is descriptive.
|
||||
return {
|
||||
llmContent,
|
||||
returnDisplay: '',
|
||||
};
|
||||
} catch (error) {
|
||||
const errorMsg = `Error reading file: ${error instanceof Error ? error.message : String(error)}`;
|
||||
|
||||
return {
|
||||
llmContent: `Error reading file ${filePath}: ${errorMsg}`,
|
||||
returnDisplay: `Failed to read file: ${errorMsg}`,
|
||||
llmContent: result.llmContent,
|
||||
returnDisplay: result.returnDisplay,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,6 +115,33 @@ describe('ReadManyFilesTool', () => {
|
|||
};
|
||||
expect(tool.validateParams(params)).toBeNull();
|
||||
});
|
||||
|
||||
it('should return error if paths array contains an empty string', () => {
|
||||
const params = { paths: ['file1.txt', ''] };
|
||||
expect(tool.validateParams(params)).toBe(
|
||||
'Each item in "paths" must be a non-empty string/glob pattern.',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error if include array contains non-string elements', () => {
|
||||
const params = {
|
||||
paths: ['file1.txt'],
|
||||
include: ['*.ts', 123] as string[],
|
||||
};
|
||||
expect(tool.validateParams(params)).toBe(
|
||||
'If provided, "include" must be an array of strings/glob patterns.',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error if exclude array contains non-string elements', () => {
|
||||
const params = {
|
||||
paths: ['file1.txt'],
|
||||
exclude: ['*.log', {}] as string[],
|
||||
};
|
||||
expect(tool.validateParams(params)).toBe(
|
||||
'If provided, "exclude" must be an array of strings/glob patterns.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute', () => {
|
||||
|
|
|
@ -7,13 +7,16 @@
|
|||
import { BaseTool, ToolResult } from './tools.js';
|
||||
import { SchemaValidator } from '../utils/schemaValidator.js';
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import fg from 'fast-glob';
|
||||
import { GEMINI_MD_FILENAME } from './memoryTool.js';
|
||||
|
||||
import {
|
||||
detectFileType,
|
||||
processSingleFileContent,
|
||||
DEFAULT_ENCODING,
|
||||
} from '../utils/fileUtils.js';
|
||||
import { PartListUnion } from '@google/genai';
|
||||
import mime from 'mime-types';
|
||||
|
||||
/**
|
||||
* Parameters for the ReadManyFilesTool.
|
||||
*/
|
||||
|
@ -98,8 +101,6 @@ const DEFAULT_EXCLUDES: string[] = [
|
|||
`**/${GEMINI_MD_FILENAME}`,
|
||||
];
|
||||
|
||||
// Default values for encoding and separator format
|
||||
const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
|
||||
const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---';
|
||||
|
||||
/**
|
||||
|
@ -256,11 +257,10 @@ Use this tool when the user's query implies needing the content of several files
|
|||
} = params;
|
||||
|
||||
const toolBaseDir = this.targetDir;
|
||||
|
||||
const filesToConsider = new Set<string>();
|
||||
const skippedFiles: Array<{ path: string; reason: string }> = [];
|
||||
const processedFilesRelativePaths: string[] = [];
|
||||
const content: PartListUnion = [];
|
||||
const contentParts: PartListUnion = [];
|
||||
|
||||
const effectiveExcludes = useDefaultExcludes
|
||||
? [...DEFAULT_EXCLUDES, ...exclude]
|
||||
|
@ -315,17 +315,12 @@ Use this tool when the user's query implies needing the content of several files
|
|||
const relativePathForDisplay = path
|
||||
.relative(toolBaseDir, filePath)
|
||||
.replace(/\\/g, '/');
|
||||
try {
|
||||
const mimeType = mime.lookup(filePath);
|
||||
if (
|
||||
mimeType &&
|
||||
(mimeType.startsWith('image/') || mimeType === 'application/pdf')
|
||||
) {
|
||||
const fileExtension = path.extname(filePath);
|
||||
const fileNameWithoutExtension = path.basename(
|
||||
filePath,
|
||||
fileExtension,
|
||||
);
|
||||
|
||||
const fileType = detectFileType(filePath);
|
||||
|
||||
if (fileType === 'image' || fileType === 'pdf') {
|
||||
const fileExtension = path.extname(filePath).toLowerCase();
|
||||
const fileNameWithoutExtension = path.basename(filePath, fileExtension);
|
||||
const requestedExplicitly = inputPatterns.some(
|
||||
(pattern: string) =>
|
||||
pattern.toLowerCase().includes(fileExtension) ||
|
||||
|
@ -340,44 +335,30 @@ Use this tool when the user's query implies needing the content of several files
|
|||
});
|
||||
continue;
|
||||
}
|
||||
const contentBuffer = await fs.readFile(filePath);
|
||||
const base64Data = contentBuffer.toString('base64');
|
||||
content.push({
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType,
|
||||
},
|
||||
});
|
||||
processedFilesRelativePaths.push(relativePathForDisplay);
|
||||
} else {
|
||||
const contentBuffer = await fs.readFile(filePath);
|
||||
// Basic binary detection: check for null bytes in the first 1KB
|
||||
const sample = contentBuffer.subarray(
|
||||
0,
|
||||
Math.min(contentBuffer.length, 1024),
|
||||
}
|
||||
|
||||
// Use processSingleFileContent for all file types now
|
||||
const fileReadResult = await processSingleFileContent(
|
||||
filePath,
|
||||
toolBaseDir,
|
||||
);
|
||||
if (sample.includes(0)) {
|
||||
|
||||
if (fileReadResult.error) {
|
||||
skippedFiles.push({
|
||||
path: relativePathForDisplay,
|
||||
reason: 'appears to be binary',
|
||||
reason: `Read error: ${fileReadResult.error}`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Using default encoding
|
||||
const fileContent = contentBuffer.toString(DEFAULT_ENCODING);
|
||||
// Using default separator format
|
||||
} else {
|
||||
if (typeof fileReadResult.llmContent === 'string') {
|
||||
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
|
||||
'{filePath}',
|
||||
relativePathForDisplay,
|
||||
);
|
||||
content.push(`${separator}\n\n${fileContent}\n\n`);
|
||||
processedFilesRelativePaths.push(relativePathForDisplay);
|
||||
contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
|
||||
} else {
|
||||
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
|
||||
}
|
||||
} catch (error) {
|
||||
skippedFiles.push({
|
||||
path: relativePathForDisplay,
|
||||
reason: `Read error: ${getErrorMessage(error)}`,
|
||||
});
|
||||
processedFilesRelativePaths.push(relativePathForDisplay);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -422,13 +403,13 @@ Use this tool when the user's query implies needing the content of several files
|
|||
displayMessage += `No files were read and concatenated based on the criteria.\n`;
|
||||
}
|
||||
|
||||
if (content.length === 0) {
|
||||
content.push(
|
||||
if (contentParts.length === 0) {
|
||||
contentParts.push(
|
||||
'No files matching the criteria were found or all were skipped.',
|
||||
);
|
||||
}
|
||||
return {
|
||||
llmContent: content,
|
||||
llmContent: contentParts,
|
||||
returnDisplay: displayMessage.trim(),
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,433 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import {
|
||||
describe,
|
||||
it,
|
||||
expect,
|
||||
vi,
|
||||
beforeEach,
|
||||
afterEach,
|
||||
type Mock,
|
||||
} from 'vitest';
|
||||
|
||||
import * as actualNodeFs from 'node:fs'; // For setup/teardown
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import mime from 'mime-types';
|
||||
|
||||
import {
|
||||
isWithinRoot,
|
||||
isBinaryFile,
|
||||
detectFileType,
|
||||
processSingleFileContent,
|
||||
} from './fileUtils.js';
|
||||
|
||||
vi.mock('mime-types', () => ({
|
||||
default: { lookup: vi.fn() },
|
||||
lookup: vi.fn(),
|
||||
}));
|
||||
|
||||
const mockMimeLookup = mime.lookup as Mock;
|
||||
|
||||
describe('fileUtils', () => {
|
||||
let tempRootDir: string;
|
||||
const originalProcessCwd = process.cwd;
|
||||
|
||||
let testTextFilePath: string;
|
||||
let testImageFilePath: string;
|
||||
let testPdfFilePath: string;
|
||||
let testBinaryFilePath: string;
|
||||
let nonExistentFilePath: string;
|
||||
let directoryPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks(); // Reset all mocks, including mime.lookup
|
||||
|
||||
tempRootDir = actualNodeFs.mkdtempSync(
|
||||
path.join(os.tmpdir(), 'fileUtils-test-'),
|
||||
);
|
||||
process.cwd = vi.fn(() => tempRootDir); // Mock cwd if necessary for relative path logic within tests
|
||||
|
||||
testTextFilePath = path.join(tempRootDir, 'test.txt');
|
||||
testImageFilePath = path.join(tempRootDir, 'image.png');
|
||||
testPdfFilePath = path.join(tempRootDir, 'document.pdf');
|
||||
testBinaryFilePath = path.join(tempRootDir, 'app.exe');
|
||||
nonExistentFilePath = path.join(tempRootDir, 'notfound.txt');
|
||||
directoryPath = path.join(tempRootDir, 'subdir');
|
||||
|
||||
actualNodeFs.mkdirSync(directoryPath, { recursive: true }); // Ensure subdir exists
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(tempRootDir)) {
|
||||
actualNodeFs.rmSync(tempRootDir, { recursive: true, force: true });
|
||||
}
|
||||
process.cwd = originalProcessCwd;
|
||||
vi.restoreAllMocks(); // Restore any spies
|
||||
});
|
||||
|
||||
describe('isWithinRoot', () => {
|
||||
const root = path.resolve('/project/root');
|
||||
|
||||
it('should return true for paths directly within the root', () => {
|
||||
expect(isWithinRoot(path.join(root, 'file.txt'), root)).toBe(true);
|
||||
expect(isWithinRoot(path.join(root, 'subdir', 'file.txt'), root)).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return true for the root path itself', () => {
|
||||
expect(isWithinRoot(root, root)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for paths outside the root', () => {
|
||||
expect(
|
||||
isWithinRoot(path.resolve('/project/other', 'file.txt'), root),
|
||||
).toBe(false);
|
||||
expect(isWithinRoot(path.resolve('/unrelated', 'file.txt'), root)).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return false for paths that only partially match the root prefix', () => {
|
||||
expect(
|
||||
isWithinRoot(
|
||||
path.resolve('/project/root-but-actually-different'),
|
||||
root,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle paths with trailing slashes correctly', () => {
|
||||
expect(isWithinRoot(path.join(root, 'file.txt') + path.sep, root)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isWithinRoot(root + path.sep, root)).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle different path separators (POSIX vs Windows)', () => {
|
||||
const posixRoot = '/project/root';
|
||||
const posixPathInside = '/project/root/file.txt';
|
||||
const posixPathOutside = '/project/other/file.txt';
|
||||
expect(isWithinRoot(posixPathInside, posixRoot)).toBe(true);
|
||||
expect(isWithinRoot(posixPathOutside, posixRoot)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a root path that is a sub-path of the path to check', () => {
|
||||
const pathToCheck = path.resolve('/project/root/sub');
|
||||
const rootSub = path.resolve('/project/root');
|
||||
expect(isWithinRoot(pathToCheck, rootSub)).toBe(true);
|
||||
|
||||
const pathToCheckSuper = path.resolve('/project/root');
|
||||
const rootSuper = path.resolve('/project/root/sub');
|
||||
expect(isWithinRoot(pathToCheckSuper, rootSuper)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isBinaryFile', () => {
|
||||
let filePathForBinaryTest: string;
|
||||
|
||||
beforeEach(() => {
|
||||
filePathForBinaryTest = path.join(tempRootDir, 'binaryCheck.tmp');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
||||
}
|
||||
});
|
||||
|
||||
it('should return false for an empty file', () => {
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a typical text file', () => {
|
||||
actualNodeFs.writeFileSync(
|
||||
filePathForBinaryTest,
|
||||
'Hello, world!\nThis is a test file with normal text content.',
|
||||
);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for a file with many null bytes', () => {
|
||||
const binaryContent = Buffer.from([
|
||||
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
]); // "He\0llo\0\0\0\0\0"
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return true for a file with high percentage of non-printable ASCII', () => {
|
||||
const binaryContent = Buffer.from([
|
||||
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
|
||||
]); // AB\x01\x02\x03\x04\x05CD\x06
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false if file access fails (e.g., ENOENT)', () => {
|
||||
// Ensure the file does not exist
|
||||
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
||||
}
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectFileType', () => {
|
||||
let filePathForDetectTest: string;
|
||||
|
||||
beforeEach(() => {
|
||||
filePathForDetectTest = path.join(tempRootDir, 'detectType.tmp');
|
||||
// Default: create as a text file for isBinaryFile fallback
|
||||
actualNodeFs.writeFileSync(filePathForDetectTest, 'Plain text content');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(filePathForDetectTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForDetectTest);
|
||||
}
|
||||
vi.restoreAllMocks(); // Restore spies on actualNodeFs
|
||||
});
|
||||
|
||||
it('should detect image type by extension (png)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('image/png');
|
||||
expect(detectFileType('file.png')).toBe('image');
|
||||
});
|
||||
|
||||
it('should detect image type by extension (jpeg)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('image/jpeg');
|
||||
expect(detectFileType('file.jpg')).toBe('image');
|
||||
});
|
||||
|
||||
it('should detect pdf type by extension', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/pdf');
|
||||
expect(detectFileType('file.pdf')).toBe('pdf');
|
||||
});
|
||||
|
||||
it('should detect known binary extensions as binary (e.g. .zip)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/zip');
|
||||
expect(detectFileType('archive.zip')).toBe('binary');
|
||||
});
|
||||
it('should detect known binary extensions as binary (e.g. .exe)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
|
||||
expect(detectFileType('app.exe')).toBe('binary');
|
||||
});
|
||||
|
||||
it('should use isBinaryFile for unknown extensions and detect as binary', () => {
|
||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||
// Create a file that isBinaryFile will identify as binary
|
||||
const binaryContent = Buffer.from([
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
]);
|
||||
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
|
||||
expect(detectFileType(filePathForDetectTest)).toBe('binary');
|
||||
});
|
||||
|
||||
it('should default to text if mime type is unknown and content is not binary', () => {
|
||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||
// filePathForDetectTest is already a text file by default from beforeEach
|
||||
expect(detectFileType(filePathForDetectTest)).toBe('text');
|
||||
});
|
||||
});
|
||||
|
||||
describe('processSingleFileContent', () => {
|
||||
beforeEach(() => {
|
||||
// Ensure files exist for statSync checks before readFile might be mocked
|
||||
if (actualNodeFs.existsSync(testTextFilePath))
|
||||
actualNodeFs.unlinkSync(testTextFilePath);
|
||||
if (actualNodeFs.existsSync(testImageFilePath))
|
||||
actualNodeFs.unlinkSync(testImageFilePath);
|
||||
if (actualNodeFs.existsSync(testPdfFilePath))
|
||||
actualNodeFs.unlinkSync(testPdfFilePath);
|
||||
if (actualNodeFs.existsSync(testBinaryFilePath))
|
||||
actualNodeFs.unlinkSync(testBinaryFilePath);
|
||||
});
|
||||
|
||||
it('should read a text file successfully', async () => {
|
||||
const content = 'Line 1\\nLine 2\\nLine 3';
|
||||
actualNodeFs.writeFileSync(testTextFilePath, content);
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.llmContent).toBe(content);
|
||||
expect(result.returnDisplay).toContain('Read text file: test.txt');
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle file not found', async () => {
|
||||
const result = await processSingleFileContent(
|
||||
nonExistentFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('File not found');
|
||||
expect(result.returnDisplay).toContain('File not found');
|
||||
});
|
||||
|
||||
it('should handle read errors for text files', async () => {
|
||||
actualNodeFs.writeFileSync(testTextFilePath, 'content'); // File must exist for initial statSync
|
||||
const readError = new Error('Simulated read error');
|
||||
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('Simulated read error');
|
||||
expect(result.returnDisplay).toContain('Simulated read error');
|
||||
});
|
||||
|
||||
it('should handle read errors for image/pdf files', async () => {
|
||||
actualNodeFs.writeFileSync(testImageFilePath, 'content'); // File must exist
|
||||
mockMimeLookup.mockReturnValue('image/png');
|
||||
const readError = new Error('Simulated image read error');
|
||||
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('Simulated image read error');
|
||||
expect(result.returnDisplay).toContain('Simulated image read error');
|
||||
});
|
||||
|
||||
it('should process an image file', async () => {
|
||||
const fakePngData = Buffer.from('fake png data');
|
||||
actualNodeFs.writeFileSync(testImageFilePath, fakePngData);
|
||||
mockMimeLookup.mockReturnValue('image/png');
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
).toBeDefined();
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('image/png');
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
|
||||
).toBe(fakePngData.toString('base64'));
|
||||
expect(result.returnDisplay).toContain('Read image file: image.png');
|
||||
});
|
||||
|
||||
it('should process a PDF file', async () => {
|
||||
const fakePdfData = Buffer.from('fake pdf data');
|
||||
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
|
||||
mockMimeLookup.mockReturnValue('application/pdf');
|
||||
const result = await processSingleFileContent(
|
||||
testPdfFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
).toBeDefined();
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('application/pdf');
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
|
||||
).toBe(fakePdfData.toString('base64'));
|
||||
expect(result.returnDisplay).toContain('Read pdf file: document.pdf');
|
||||
});
|
||||
|
||||
it('should skip binary files', async () => {
|
||||
actualNodeFs.writeFileSync(
|
||||
testBinaryFilePath,
|
||||
Buffer.from([0x00, 0x01, 0x02]),
|
||||
);
|
||||
mockMimeLookup.mockReturnValueOnce('application/octet-stream');
|
||||
// isBinaryFile will operate on the real file.
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testBinaryFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'Cannot display content of binary file',
|
||||
);
|
||||
expect(result.returnDisplay).toContain('Skipped binary file: app.exe');
|
||||
});
|
||||
|
||||
it('should handle path being a directory', async () => {
|
||||
const result = await processSingleFileContent(directoryPath, tempRootDir);
|
||||
expect(result.error).toContain('Path is a directory');
|
||||
expect(result.returnDisplay).toContain('Path is a directory');
|
||||
});
|
||||
|
||||
it('should paginate text files correctly (offset and limit)', async () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
5,
|
||||
5,
|
||||
); // Read lines 6-10
|
||||
const expectedContent = lines.slice(5, 10).join('\n');
|
||||
|
||||
expect(result.llmContent).toContain(expectedContent);
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content truncated: showing lines 6-10 of 20 total lines. Use offset/limit parameters to view more.]',
|
||||
);
|
||||
expect(result.returnDisplay).toContain(
|
||||
'Read text file: test.txt (truncated)',
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
expect(result.linesShown).toEqual([6, 10]);
|
||||
});
|
||||
|
||||
it('should handle limit exceeding file length', async () => {
|
||||
const lines = ['Line 1', 'Line 2'];
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
0,
|
||||
10,
|
||||
);
|
||||
const expectedContent = lines.join('\n');
|
||||
|
||||
expect(result.llmContent).toBe(expectedContent);
|
||||
expect(result.returnDisplay).toContain('Read text file: test.txt');
|
||||
expect(result.isTruncated).toBe(false);
|
||||
expect(result.originalLineCount).toBe(2);
|
||||
expect(result.linesShown).toEqual([1, 2]);
|
||||
});
|
||||
|
||||
it('should truncate long lines in text files', async () => {
|
||||
const longLine = 'a'.repeat(2500);
|
||||
actualNodeFs.writeFileSync(
|
||||
testTextFilePath,
|
||||
`Short line\n${longLine}\nAnother short line`,
|
||||
);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
|
||||
expect(result.llmContent).toContain('Short line');
|
||||
expect(result.llmContent).toContain(
|
||||
longLine.substring(0, 2000) + '... [truncated]',
|
||||
);
|
||||
expect(result.llmContent).toContain('Another short line');
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content partially truncated: some lines exceeded maximum length of 2000 characters.]',
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,280 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { PartUnion } from '@google/genai';
|
||||
import mime from 'mime-types';
|
||||
|
||||
// Constants for text file processing
|
||||
const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
|
||||
const MAX_LINE_LENGTH_TEXT_FILE = 2000;
|
||||
|
||||
// Default values for encoding and separator format
|
||||
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
|
||||
|
||||
/**
|
||||
* Checks if a path is within a given root directory.
|
||||
* @param pathToCheck The absolute path to check.
|
||||
* @param rootDirectory The absolute root directory.
|
||||
* @returns True if the path is within the root directory, false otherwise.
|
||||
*/
|
||||
export function isWithinRoot(
|
||||
pathToCheck: string,
|
||||
rootDirectory: string,
|
||||
): boolean {
|
||||
const normalizedPathToCheck = path.normalize(pathToCheck);
|
||||
const normalizedRootDirectory = path.normalize(rootDirectory);
|
||||
|
||||
// Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
|
||||
// unless it's the root path itself (e.g., '/' or 'C:\').
|
||||
const rootWithSeparator =
|
||||
normalizedRootDirectory === path.sep ||
|
||||
normalizedRootDirectory.endsWith(path.sep)
|
||||
? normalizedRootDirectory
|
||||
: normalizedRootDirectory + path.sep;
|
||||
|
||||
return (
|
||||
normalizedPathToCheck === normalizedRootDirectory ||
|
||||
normalizedPathToCheck.startsWith(rootWithSeparator)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a file is likely binary based on content sampling.
|
||||
* @param filePath Path to the file.
|
||||
* @returns True if the file appears to be binary.
|
||||
*/
|
||||
export function isBinaryFile(filePath: string): boolean {
|
||||
try {
|
||||
const fd = fs.openSync(filePath, 'r');
|
||||
// Read up to 4KB or file size, whichever is smaller
|
||||
const fileSize = fs.fstatSync(fd).size;
|
||||
if (fileSize === 0) {
|
||||
// Empty file is not considered binary for content checking
|
||||
fs.closeSync(fd);
|
||||
return false;
|
||||
}
|
||||
const bufferSize = Math.min(4096, fileSize);
|
||||
const buffer = Buffer.alloc(bufferSize);
|
||||
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
|
||||
fs.closeSync(fd);
|
||||
|
||||
if (bytesRead === 0) return false;
|
||||
|
||||
let nonPrintableCount = 0;
|
||||
for (let i = 0; i < bytesRead; i++) {
|
||||
if (buffer[i] === 0) return true; // Null byte is a strong indicator
|
||||
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
|
||||
nonPrintableCount++;
|
||||
}
|
||||
}
|
||||
// If >30% non-printable characters, consider it binary
|
||||
return nonPrintableCount / bytesRead > 0.3;
|
||||
} catch {
|
||||
// If any error occurs (e.g. file not found, permissions),
|
||||
// treat as not binary here; let higher-level functions handle existence/access errors.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the type of file based on extension and content.
|
||||
* @param filePath Path to the file.
|
||||
* @returns 'text', 'image', 'pdf', or 'binary'.
|
||||
*/
|
||||
export function detectFileType(
|
||||
filePath: string,
|
||||
): 'text' | 'image' | 'pdf' | 'binary' {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string
|
||||
|
||||
if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) {
|
||||
return 'image';
|
||||
}
|
||||
if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
// Stricter binary check for common non-text extensions before content check
|
||||
// These are often not well-covered by mime-types or might be misidentified.
|
||||
if (
|
||||
[
|
||||
'.zip',
|
||||
'.tar',
|
||||
'.gz',
|
||||
'.exe',
|
||||
'.dll',
|
||||
'.so',
|
||||
'.class',
|
||||
'.jar',
|
||||
'.war',
|
||||
'.7z',
|
||||
'.doc',
|
||||
'.docx',
|
||||
'.xls',
|
||||
'.xlsx',
|
||||
'.ppt',
|
||||
'.pptx',
|
||||
'.odt',
|
||||
'.ods',
|
||||
'.odp',
|
||||
'.bin',
|
||||
'.dat',
|
||||
'.obj',
|
||||
'.o',
|
||||
'.a',
|
||||
'.lib',
|
||||
'.wasm',
|
||||
'.pyc',
|
||||
'.pyo',
|
||||
].includes(ext)
|
||||
) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
|
||||
// and it's not a known binary extension.
|
||||
if (isBinaryFile(filePath)) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
return 'text';
|
||||
}
|
||||
|
||||
export interface ProcessedFileReadResult {
|
||||
llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
|
||||
returnDisplay: string;
|
||||
error?: string; // Optional error message for the LLM if file processing failed
|
||||
isTruncated?: boolean; // For text files, indicates if content was truncated
|
||||
originalLineCount?: number; // For text files
|
||||
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and processes a single file, handling text, images, and PDFs.
|
||||
* @param filePath Absolute path to the file.
|
||||
* @param rootDirectory Absolute path to the project root for relative path display.
|
||||
* @param offset Optional offset for text files (0-based line number).
|
||||
* @param limit Optional limit for text files (number of lines to read).
|
||||
* @returns ProcessedFileReadResult object.
|
||||
*/
|
||||
export async function processSingleFileContent(
|
||||
filePath: string,
|
||||
rootDirectory: string,
|
||||
offset?: number,
|
||||
limit?: number,
|
||||
): Promise<ProcessedFileReadResult> {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
// Sync check is acceptable before async read
|
||||
return {
|
||||
llmContent: '',
|
||||
returnDisplay: 'File not found.',
|
||||
error: `File not found: ${filePath}`,
|
||||
};
|
||||
}
|
||||
const stats = fs.statSync(filePath); // Sync check
|
||||
if (stats.isDirectory()) {
|
||||
return {
|
||||
llmContent: '',
|
||||
returnDisplay: 'Path is a directory.',
|
||||
error: `Path is a directory, not a file: ${filePath}`,
|
||||
};
|
||||
}
|
||||
|
||||
const fileType = detectFileType(filePath);
|
||||
const relativePathForDisplay = path
|
||||
.relative(rootDirectory, filePath)
|
||||
.replace(/\\/g, '/');
|
||||
|
||||
switch (fileType) {
|
||||
case 'binary': {
|
||||
return {
|
||||
llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
|
||||
returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
|
||||
};
|
||||
}
|
||||
case 'text': {
|
||||
const content = await fs.promises.readFile(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
const originalLineCount = lines.length;
|
||||
|
||||
const startLine = offset || 0;
|
||||
const effectiveLimit =
|
||||
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
|
||||
// Ensure endLine does not exceed originalLineCount
|
||||
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
|
||||
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
|
||||
const actualStartLine = Math.min(startLine, originalLineCount);
|
||||
const selectedLines = lines.slice(actualStartLine, endLine);
|
||||
|
||||
let linesWereTruncatedInLength = false;
|
||||
const formattedLines = selectedLines.map((line) => {
|
||||
if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
|
||||
linesWereTruncatedInLength = true;
|
||||
return (
|
||||
line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
|
||||
);
|
||||
}
|
||||
return line;
|
||||
});
|
||||
|
||||
const contentRangeTruncated = endLine < originalLineCount;
|
||||
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
|
||||
|
||||
let llmTextContent = '';
|
||||
if (contentRangeTruncated) {
|
||||
llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
|
||||
} else if (linesWereTruncatedInLength) {
|
||||
llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
|
||||
}
|
||||
llmTextContent += formattedLines.join('\n');
|
||||
|
||||
return {
|
||||
llmContent: llmTextContent,
|
||||
returnDisplay: `Read text file: ${relativePathForDisplay}${isTruncated ? ' (truncated)' : ''}`,
|
||||
isTruncated,
|
||||
originalLineCount,
|
||||
linesShown: [actualStartLine + 1, endLine],
|
||||
};
|
||||
}
|
||||
case 'image':
|
||||
case 'pdf': {
|
||||
const contentBuffer = await fs.promises.readFile(filePath);
|
||||
const base64Data = contentBuffer.toString('base64');
|
||||
return {
|
||||
llmContent: {
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType: mime.lookup(filePath) || 'application/octet-stream',
|
||||
},
|
||||
},
|
||||
returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
|
||||
};
|
||||
}
|
||||
default: {
|
||||
// Should not happen with current detectFileType logic
|
||||
const exhaustiveCheck: never = fileType;
|
||||
return {
|
||||
llmContent: `Unhandled file type: ${exhaustiveCheck}`,
|
||||
returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
|
||||
error: `Unhandled file type for ${filePath}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
const displayPath = path
|
||||
.relative(rootDirectory, filePath)
|
||||
.replace(/\\/g, '/');
|
||||
return {
|
||||
llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
|
||||
returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
|
||||
error: `Error reading file ${filePath}: ${errorMessage}`,
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue