From 5097b5a6563a4d514242580495c5974b7e665c3f Mon Sep 17 00:00:00 2001 From: Taylor Mullen Date: Sun, 25 May 2025 14:41:37 -0700 Subject: [PATCH] Fix(write-file): Correct over-escaping and improve content generation - Leveraged existing edit correction technology from `edit.ts` to address over-escaping issues in `write-file.ts`. - Introduced `ensureCorrectFileContent` for correcting content in new files, where a simple "replace" isnt applicable. This uses a new LLM prompt tailored for correcting potentially problematic string escaping. - Added caching for `ensureCorrectFileContent` to optimize performance. - Refactored `write-file.ts` to integrate these corrections, improving the reliability of file content generation and modification. Part of https://github.com/google-gemini/gemini-cli/issues/484 --- packages/server/src/tools/write-file.ts | 145 +++++++++++++++++---- packages/server/src/utils/editCorrector.ts | 85 ++++++++++++ 2 files changed, 202 insertions(+), 28 deletions(-) diff --git a/packages/server/src/tools/write-file.ts b/packages/server/src/tools/write-file.ts index 1b6b6dc8..6c0b7d72 100644 --- a/packages/server/src/tools/write-file.ts +++ b/packages/server/src/tools/write-file.ts @@ -18,7 +18,12 @@ import { } from './tools.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; -import { isNodeError } from '../utils/errors.js'; +import { getErrorMessage, isNodeError } from '../utils/errors.js'; +import { + ensureCorrectEdit, + ensureCorrectFileContent, +} from '../utils/editCorrector.js'; +import { GeminiClient } from '../core/client.js'; /** * Parameters for the WriteFile tool @@ -35,11 +40,19 @@ export interface WriteFileToolParams { content: string; } +interface GetCorrectedFileContentResult { + originalContent: string; + correctedContent: string; + fileExists: boolean; + error?: { message: string; code?: string }; +} + /** * Implementation of the WriteFile tool logic */ export class WriteFileTool extends BaseTool { static readonly Name: string = 'write_file'; + private readonly client: GeminiClient; constructor(private readonly config: Config) { super( @@ -62,6 +75,8 @@ export class WriteFileTool extends BaseTool { type: 'object', }, ); + + this.client = new GeminiClient(this.config); } private isWithinRoot(pathToCheck: string): boolean { @@ -135,23 +150,27 @@ export class WriteFileTool extends BaseTool { return false; } + const correctedContentResult = await this._getCorrectedFileContent( + params.file_path, + params.content, + ); + + if (correctedContentResult.error) { + // If file exists but couldn't be read, we can't show a diff for confirmation. + return false; + } + + const { originalContent, correctedContent } = correctedContentResult; const relativePath = makeRelative( params.file_path, this.config.getTargetDir(), ); const fileName = path.basename(params.file_path); - let currentContent = ''; - try { - currentContent = fs.readFileSync(params.file_path, 'utf8'); - } catch { - // File might not exist, that's okay for write/create - } - const fileDiff = Diff.createPatch( fileName, - currentContent, - params.content, + originalContent, // Original content (empty if new file or unreadable) + correctedContent, // Content after potential correction 'Current', 'Proposed', { context: 3 }, @@ -183,37 +202,53 @@ export class WriteFileTool extends BaseTool { }; } - let currentContent = ''; - let isNewFile = false; - try { - currentContent = fs.readFileSync(params.file_path, 'utf8'); - } catch (err: unknown) { - if (isNodeError(err) && err.code === 'ENOENT') { - isNewFile = true; - } else { - // Rethrow other read errors (permissions etc.) - const errorMsg = `Error checking existing file: ${err instanceof Error ? err.message : String(err)}`; - return { - llmContent: `Error checking existing file ${params.file_path}: ${errorMsg}`, - returnDisplay: `Error: ${errorMsg}`, - }; - } + const correctedContentResult = await this._getCorrectedFileContent( + params.file_path, + params.content, + ); + + if (correctedContentResult.error) { + const errDetails = correctedContentResult.error; + const errorMsg = `Error checking existing file: ${errDetails.message}`; + return { + llmContent: `Error checking existing file ${params.file_path}: ${errDetails.message}`, + returnDisplay: errorMsg, + }; } + const { + originalContent, + correctedContent: fileContent, + fileExists, + } = correctedContentResult; + // fileExists is true if the file existed (and was readable or unreadable but caught by readError). + // fileExists is false if the file did not exist (ENOENT). + const isNewFile = + !fileExists || + (correctedContentResult.error !== undefined && + !correctedContentResult.fileExists); + try { const dirName = path.dirname(params.file_path); if (!fs.existsSync(dirName)) { fs.mkdirSync(dirName, { recursive: true }); } - fs.writeFileSync(params.file_path, params.content, 'utf8'); + fs.writeFileSync(params.file_path, fileContent, 'utf8'); // Generate diff for display result const fileName = path.basename(params.file_path); + // If there was a readError, originalContent in correctedContentResult is '', + // but for the diff, we want to show the original content as it was before the write if possible. + // However, if it was unreadable, currentContentForDiff will be empty. + const currentContentForDiff = correctedContentResult.error + ? '' // Or some indicator of unreadable content + : originalContent; + const fileDiff = Diff.createPatch( fileName, - currentContent, // Empty if it was a new file - params.content, + currentContentForDiff, + fileContent, 'Original', 'Written', { context: 3 }, @@ -237,4 +272,58 @@ export class WriteFileTool extends BaseTool { }; } } + + private async _getCorrectedFileContent( + filePath: string, + proposedContent: string, + ): Promise { + let originalContent = ''; + let fileExists = false; + let correctedContent = proposedContent; + + try { + originalContent = fs.readFileSync(filePath, 'utf8'); + fileExists = true; // File exists and was read + } catch (err) { + if (isNodeError(err) && err.code === 'ENOENT') { + fileExists = false; + originalContent = ''; + } else { + // File exists but could not be read (permissions, etc.) + fileExists = true; // Mark as existing but problematic + originalContent = ''; // Can't use its content + const error = { + message: getErrorMessage(err), + code: isNodeError(err) ? err.code : undefined, + }; + // Return early as we can't proceed with content correction meaningfully + return { originalContent, correctedContent, fileExists, error }; + } + } + + // If readError is set, we have returned. + // So, file was either read successfully (fileExists=true, originalContent set) + // or it was ENOENT (fileExists=false, originalContent=''). + + if (fileExists) { + // This implies originalContent is available + const { params: correctedParams } = await ensureCorrectEdit( + originalContent, + { + old_string: originalContent, // Treat entire current content as old_string + new_string: proposedContent, + file_path: filePath, + }, + this.client, + ); + correctedContent = correctedParams.new_string; + } else { + // This implies new file (ENOENT) + correctedContent = await ensureCorrectFileContent( + proposedContent, + this.client, + ); + } + return { originalContent, correctedContent, fileExists }; + } } diff --git a/packages/server/src/utils/editCorrector.ts b/packages/server/src/utils/editCorrector.ts index 91aa85a3..92551478 100644 --- a/packages/server/src/utils/editCorrector.ts +++ b/packages/server/src/utils/editCorrector.ts @@ -28,6 +28,9 @@ const editCorrectionCache = new LruCache( MAX_CACHE_SIZE, ); +// Cache for ensureCorrectFileContent results +const fileContentCorrectionCache = new LruCache(MAX_CACHE_SIZE); + /** * Defines the structure of the parameters within CorrectedEditResult */ @@ -174,6 +177,27 @@ export async function ensureCorrectEdit( return result; } +export async function ensureCorrectFileContent( + content: string, + client: GeminiClient, +): Promise { + const cachedResult = fileContentCorrectionCache.get(content); + if (cachedResult) { + return cachedResult; + } + + const contentPotentiallyEscaped = + unescapeStringForGeminiBug(content) !== content; + if (!contentPotentiallyEscaped) { + fileContentCorrectionCache.set(content, content); + return content; + } + + const correctedContent = await correctStringEscaping(content, client); + fileContentCorrectionCache.set(content, correctedContent); + return correctedContent; +} + // Define the expected JSON schema for the LLM response for old_string correction const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = { type: Type.OBJECT, @@ -385,6 +409,66 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr } } +const CORRECT_STRING_ESCAPING_SCHEMA: SchemaUnion = { + type: Type.OBJECT, + properties: { + corrected_string_escaping: { + type: Type.STRING, + description: + 'The string with corrected escaping, ensuring it is valid, specially considering potential over-escaping issues from previous LLM generations.', + }, + }, + required: ['corrected_string_escaping'], +}; + +export async function correctStringEscaping( + potentiallyProblematicString: string, + client: GeminiClient, +): Promise { + const prompt = ` +Context: An LLM has just generated potentially_problematic_string and the text might have been improperly escaped (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello"). + +potentially_problematic_string (this text MIGHT have bad escaping, or might be entirely correct): +\`\`\` +${potentiallyProblematicString} +\`\`\` + +Task: Analyze the potentially_problematic_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the text will be a valid and correctly interpreted. + +For example, if potentially_problematic_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz". +If potentially_problematic_string is console.log(\\"Hello World\\"), it should be console.log("Hello World"). + +Return ONLY the corrected string in the specified JSON format with the key 'corrected_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_string. + `.trim(); + + const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; + + try { + const result = await client.generateJson( + contents, + CORRECT_STRING_ESCAPING_SCHEMA, + EditModel, + EditConfig, + ); + + if ( + result && + typeof result.corrected_new_string_escaping === 'string' && + result.corrected_new_string_escaping.length > 0 + ) { + return result.corrected_new_string_escaping; + } else { + return potentiallyProblematicString; + } + } catch (error) { + console.error( + 'Error during LLM call for string escaping correction:', + error, + ); + return potentiallyProblematicString; + } +} + function trimPairIfPossible( target: string, trimIfTargetTrims: string, @@ -470,4 +554,5 @@ export function countOccurrences(str: string, substr: string): number { export function resetEditCorrectorCaches_TEST_ONLY() { editCorrectionCache.clear(); + fileContentCorrectionCache.clear(); }