Web fetch tool (#31)

* Adding a web fetch tool.
This commit is contained in:
Allen Hutchison 2025-04-18 13:20:39 -07:00 committed by GitHub
parent 56d4a35d05
commit 3ed61f1ff2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 177 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import { GlobTool } from './tools/glob.tool.js';
import { EditTool } from './tools/edit.tool.js';
import { TerminalTool } from './tools/terminal.tool.js';
import { WriteFileTool } from './tools/write-file.tool.js';
import { WebFetchTool } from './tools/web-fetch.tool.js';
import { globalConfig } from './config/config.js';
async function main() {
@ -77,6 +78,7 @@ function registerTools(targetDir: string) {
const editTool = new EditTool(targetDir);
const terminalTool = new TerminalTool(targetDir);
const writeFileTool = new WriteFileTool(targetDir);
const webFetchTool = new WebFetchTool();
toolRegistry.registerTool(lsTool);
toolRegistry.registerTool(readFileTool);
@ -85,4 +87,5 @@ function registerTools(targetDir: string) {
toolRegistry.registerTool(editTool);
toolRegistry.registerTool(terminalTool);
toolRegistry.registerTool(writeFileTool);
toolRegistry.registerTool(webFetchTool);
}

View File

@ -0,0 +1,174 @@
import { SchemaValidator } from '../utils/schemaValidator.js';
import { BaseTool, ToolResult } from './tools.js';
import { ToolCallConfirmationDetails } from '../ui/types.js'; // Added for shouldConfirmExecute
/**
* Parameters for the WebFetch tool
*/
export interface WebFetchToolParams {
/**
* The URL to fetch content from.
*/
url: string;
}
/**
* Standardized result from the WebFetch tool
*/
export interface WebFetchToolResult extends ToolResult {}
/**
* Implementation of the WebFetch tool that reads content from a URL.
*/
export class WebFetchTool extends BaseTool<
WebFetchToolParams,
WebFetchToolResult
> {
static readonly Name: string = 'web_fetch';
/**
* Creates a new instance of the WebFetchTool
*/
constructor() {
super(
WebFetchTool.Name,
'WebFetch',
'Fetches text content from a given URL. Handles potential network errors and non-success HTTP status codes.',
{
properties: {
url: {
description:
"The URL to fetch. Must be an absolute URL (e.g., 'https://example.com/file.txt').",
type: 'string',
},
},
required: ['url'],
type: 'object',
},
);
// No rootDirectory needed for web fetching
}
/**
* Validates the parameters for the WebFetch tool
* @param params Parameters to validate
* @returns An error message string if invalid, null otherwise
*/
invalidParams(params: WebFetchToolParams): string | null {
// 1. Validate against the basic schema first
if (
this.schema.parameters &&
!SchemaValidator.validate(
this.schema.parameters as Record<string, unknown>,
params,
)
) {
return 'Parameters failed schema validation.';
}
// 2. Validate the URL format and protocol
try {
const parsedUrl = new URL(params.url);
// Ensure it's an HTTP or HTTPS URL
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
return `Invalid URL protocol: "${parsedUrl.protocol}". Only 'http:' and 'https:' are supported.`;
}
} catch (error) {
// The URL constructor throws if the format is invalid
return `Invalid URL format: "${params.url}". Please provide a valid absolute URL (e.g., 'https://example.com').`;
}
// If all checks pass, the parameters are valid
return null;
}
/**
* Gets a description of the web fetch operation.
* @param params Parameters for the web fetch.
* @returns A string describing the operation.
*/
getDescription(params: WebFetchToolParams): string {
// Shorten long URLs for display
const displayUrl =
params.url.length > 80
? params.url.substring(0, 77) + '...'
: params.url;
return `Fetching content from ${displayUrl}`;
}
/**
* Determines if the tool should prompt for confirmation before execution.
* Web fetches are generally safe, so default to false.
* @param params Parameters for the tool execution
* @returns Whether execute should be confirmed.
*/
async shouldConfirmExecute(
params: WebFetchToolParams,
): Promise<ToolCallConfirmationDetails | false> {
// Could add logic here to confirm based on domain, etc. if needed
return Promise.resolve(false);
}
/**
* Fetches content from the specified URL.
* @param params Parameters for the web fetch operation.
* @returns Result with the fetched content or an error message.
*/
async execute(params: WebFetchToolParams): Promise<WebFetchToolResult> {
const validationError = this.invalidParams(params);
if (validationError) {
return {
llmContent: `Error: Invalid parameters provided. Reason: ${validationError}`,
returnDisplay: `**Error:** Invalid parameters. ${validationError}`,
};
}
const url = params.url;
try {
const response = await fetch(url, {
headers: {
'User-Agent': 'GeminiCode-CLI/1.0',
},
signal: AbortSignal.timeout(15000) // 15 seconds timeout
});
if (!response.ok) {
// fetch doesn't throw on bad HTTP status codes (4xx, 5xx)
const errorText = `Failed to fetch data from ${url}. Status: ${response.status} ${response.statusText}`;
return {
llmContent: `Error: ${errorText}`,
returnDisplay: `**Error:** ${errorText}`,
};
}
// Assuming the response is text. Add checks for content-type if needed.
const data = await response.text();
let llmContent = '';
// Truncate very large responses for the LLM context
const MAX_LLM_CONTENT_LENGTH = 100000;
if (data) {
llmContent = `Fetched data from ${url}:\n\n${
data.length > MAX_LLM_CONTENT_LENGTH
? data.substring(0, MAX_LLM_CONTENT_LENGTH) +
'\n... [Content truncated]'
: data
}`;
} else {
llmContent = `No data fetched from ${url}. Status: ${response.status}`;
}
return {
llmContent,
returnDisplay: `Fetched content from ${url}`, // Simple display message
};
} catch (error: any) {
// This catches network errors (DNS resolution, connection refused, etc.)
// and errors from the URL constructor if somehow bypassed validation (unlikely)
const errorMessage = `Failed to fetch data from ${url}. Error: ${error instanceof Error ? error.message : String(error)}`;
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `**Error:** ${errorMessage}`,
};
}
}
}