refactor: Correct Gemini's over-escaped new_string in replace tool

- Implement a heuristic to detect and unescape `new_string` if it appears Gemini has over-escaped it, while `old_string` is correctly formatted. - This improves the reliability of the replace tool when the model generates an incorrectly escaped replacement string. Part of https://github.com/google-gemini/gemini-cli/issues/484
2025-05-25 13:26:58 -07:00 · 2025-05-25 13:26:58 -07:00 · 24da7b3ca6
parent fa4a04157f
commit 24da7b3ca6
1 changed files with 73 additions and 1 deletions
--- a/packages/server/src/utils/editCorrector.ts
+++ b/packages/server/src/utils/editCorrector.ts
@ -62,7 +62,13 @@ export async function ensureCorrectEdit(
  let occurrences = countOccurrences(currentContent, finalOldString);

  if (occurrences === 1) {
-    return { params: originalParams, occurrences };
+    if (newStringPotentiallyEscaped) {
+      finalNewString = await correctNewStringEscaping(
+        client,
+        finalOldString,
+        originalParams.new_string,
+      );
+    }
  } else {
    // occurrences is 0 or some other unexpected state initially
    const unescapedOldStringAttempt = unescapeStringForGeminiBug(
@ -274,6 +280,72 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
  }
 }

+const CORRECT_NEW_STRING_ESCAPING_SCHEMA: SchemaUnion = {
+  type: Type.OBJECT,
+  properties: {
+    corrected_new_string_escaping: {
+      type: Type.STRING,
+      description:
+        'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.',
+    },
+  },
+  required: ['corrected_new_string_escaping'],
+};
+
+export async function correctNewStringEscaping(
+  geminiClient: GeminiClient,
+  oldString: string,
+  potentiallyProblematicNewString: string,
+): Promise<string> {
+  const prompt = `
+Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
+
+old_string (this is the exact text that will be replaced):
+\`\`\`
+${oldString}
+\`\`\`
+
+potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct):
+\`\`\`
+${potentiallyProblematicNewString}
+\`\`\`
+
+Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted.
+
+For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
+If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
+
+Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string.
+  `.trim();
+
+  const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
+
+  try {
+    const result = await geminiClient.generateJson(
+      contents,
+      CORRECT_NEW_STRING_ESCAPING_SCHEMA,
+      EditModel,
+      EditConfig,
+    );
+
+    if (
+      result &&
+      typeof result.corrected_new_string_escaping === 'string' &&
+      result.corrected_new_string_escaping.length > 0
+    ) {
+      return result.corrected_new_string_escaping;
+    } else {
+      return potentiallyProblematicNewString;
+    }
+  } catch (error) {
+    console.error(
+      'Error during LLM call for new_string escaping correction:',
+      error,
+    );
+    return potentiallyProblematicNewString;
+  }
+}
+
 /**
 * Unescapes a string that might have been overly escaped by an LLM.
 */