Use structured prompt for compression. (#2747)

2025-06-30 17:04:33 -07:00 · 2025-06-30 17:04:33 -07:00 · 0ca5c07135
parent dbd626054f
commit 0ca5c07135
3 changed files with 103 additions and 55 deletions
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -22,7 +22,7 @@ import {
  ChatCompressionInfo,
 } from './turn.js';
 import { Config } from '../config/config.js';
-import { getCoreSystemPrompt } from './prompts.js';
+import { getCoreSystemPrompt, getCompressionPrompt } from './prompts.js';
 import { ReadManyFilesTool } from '../tools/read-many-files.js';
 import { getResponseText } from '../utils/generateContentResponseUtilities.js';
 import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
@ -171,7 +171,7 @@ export class GeminiClient {
    const toolRegistry = await this.config.getToolRegistry();
    const toolDeclarations = toolRegistry.getFunctionDeclarations();
    const tools: Tool[] = [{ functionDeclarations: toolDeclarations }];
-    const initialHistory: Content[] = [
+    const history: Content[] = [
      {
        role: 'user',
        parts: envParts,
@ -180,8 +180,8 @@ export class GeminiClient {
        role: 'model',
        parts: [{ text: 'Got it. Thanks for the context!' }],
      },
+      ...(extraHistory ?? []),
    ];
-    const history = initialHistory.concat(extraHistory ?? []);
    try {
      const userMemory = this.config.getUserMemory();
      const systemInstruction = getCoreSystemPrompt(userMemory);
@ -428,74 +428,61 @@ export class GeminiClient {
  async tryCompressChat(
    force: boolean = false,
  ): Promise<ChatCompressionInfo | null> {
-    const history = this.getChat().getHistory(true); // Get curated history
+    const curatedHistory = this.getChat().getHistory(true);

    // Regardless of `force`, don't do anything if the history is empty.
-    if (history.length === 0) {
+    if (curatedHistory.length === 0) {
      return null;
    }

-    const { totalTokens: originalTokenCount } =
+    let { totalTokens: originalTokenCount } =
      await this.getContentGenerator().countTokens({
        model: this.model,
-        contents: history,
+        contents: curatedHistory,
      });
-
-    // If not forced, check if we should compress based on context size.
-    if (!force) {
-      if (originalTokenCount === undefined) {
-        // If token count is undefined, we can't determine if we need to compress.
-        console.warn(
-          `Could not determine token count for model ${this.model}. Skipping compression check.`,
-        );
-        return null;
-      }
-      const tokenCount = originalTokenCount; // Now guaranteed to be a number
-
-      const limit = tokenLimit(this.model);
-      if (!limit) {
-        // If no limit is defined for the model, we can't compress.
-        console.warn(
-          `No token limit defined for model ${this.model}. Skipping compression check.`,
-        );
-        return null;
-      }
-
-      if (tokenCount < 0.95 * limit) {
-        return null;
-      }
+    if (originalTokenCount === undefined) {
+      console.warn(`Could not determine token count for model ${this.model}.`);
+      originalTokenCount = 0;
    }

-    const summarizationRequestMessage = {
-      text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.',
-    };
-    const response = await this.getChat().sendMessage({
-      message: summarizationRequestMessage,
+    // Don't compress if not forced and we are under the limit.
+    if (!force && originalTokenCount < 0.95 * tokenLimit(this.model)) {
+      return null;
+    }
+
+    const { text: summary } = await this.getChat().sendMessage({
+      message: {
+        text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
+      },
+      config: {
+        systemInstruction: { text: getCompressionPrompt() },
+      },
    });
-    const newHistory = [
+    this.chat = await this.startChat([
      {
        role: 'user',
-        parts: [summarizationRequestMessage],
+        parts: [{ text: summary }],
      },
      {
        role: 'model',
-        parts: [{ text: response.text }],
+        parts: [{ text: 'Got it. Thanks for the additional context!' }],
      },
-    ];
-    this.chat = await this.startChat(newHistory);
-    const newTokenCount = (
+    ]);
+
+    const { totalTokens: newTokenCount } =
      await this.getContentGenerator().countTokens({
        model: this.model,
-        contents: newHistory,
-      })
-    ).totalTokens;
+        contents: this.getChat().getHistory(),
+      });
+    if (newTokenCount === undefined) {
+      console.warn('Could not determine compressed history token count.');
+      return null;
+    }

-    return originalTokenCount && newTokenCount
-      ? {
-          originalTokenCount,
-          newTokenCount,
-        }
-      : null;
+    return {
+      originalTokenCount,
+      newTokenCount,
+    };
  }

  /**
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@ -72,10 +72,6 @@ function isValidContent(content: Content): boolean {
 * @throws Error if the history contains an invalid role.
 */
 function validateHistory(history: Content[]) {
-  // Empty history is valid.
-  if (history.length === 0) {
-    return;
-  }
  for (const content of history) {
    if (content.role !== 'user' && content.role !== 'model') {
      throw new Error(`Role must be user or model, but got ${content.role}.`);
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@ -271,3 +271,68 @@ Your core function is efficient and safe assistance. Balance extreme conciseness

  return `${basePrompt}${memorySuffix}`;
 }
+
+/**
+ * Provides the system prompt for the history compression process.
+ * This prompt instructs the model to act as a specialized state manager,
+ * think in a scratchpad, and produce a structured XML summary.
+ */
+export function getCompressionPrompt(): string {
+  return `
+You are the component that summarizes internal chat history into a given structure.
+
+When the conversation history grows too large, you will be invoked to distill the entire history into a concise, structured XML snapshot. This snapshot is CRITICAL, as it will become the agent's *only* memory of the past. The agent will resume its work based solely on this snapshot. All crucial details, plans, errors, and user directives MUST be preserved.
+
+First, you will think through the entire history in a private <scratchpad>. Review the user's overall goal, the agent's actions, tool outputs, file modifications, and any unresolved questions. Identify every piece of information that is essential for future actions.
+
+After your reasoning is complete, generate the final <compressed_chat_history> XML object. Be incredibly dense with information. Omit any irrelevant conversational filler.
+
+The structure MUST be as follows:
+
+<compressed_chat_history>
+    <overall_goal>
+        <!-- A single, concise sentence describing the user's high-level objective. -->
+        <!-- Example: "Refactor the authentication service to use a new JWT library." -->
+    </overall_goal>
+
+    <key_knowledge>
+        <!-- Crucial facts, conventions, and constraints the agent must remember based on the conversation history and interaction with the user. Use bullet points. -->
+        <!-- Example:
+         - Build Command: \`npm run build\`
+         - Testing: Tests are run with \`npm test\`. Test files must end in \`.test.ts\`.
+         - API Endpoint: The primary API endpoint is \`https://api.example.com/v2\`.
+         
+        -->
+    </key_knowledge>
+
+    <file_system_state>
+        <!-- List files that have been created, read, modified, or deleted. Note their status and critical learnings. -->
+        <!-- Example:
+         - CWD: \`/home/user/project/src\`
+         - READ: \`package.json\` - Confirmed 'axios' is a dependency.
+         - MODIFIED: \`services/auth.ts\` - Replaced 'jsonwebtoken' with 'jose'.
+         - CREATED: \`tests/new-feature.test.ts\` - Initial test structure for the new feature.
+        -->
+    </file_system_state>
+
+    <recent_actions>
+        <!-- A summary of the last few significant agent actions and their outcomes. Focus on facts. -->
+        <!-- Example:
+         - Ran \`grep 'old_function'\` which returned 3 results in 2 files.
+         - Ran \`npm run test\`, which failed due to a snapshot mismatch in \`UserProfile.test.ts\`.
+         - Ran \`ls -F static/\` and discovered image assets are stored as \`.webp\`.
+        -->
+    </recent_actions>
+
+    <current_plan>
+        <!-- The agent's step-by-step plan. Mark completed steps. -->
+        <!-- Example:
+         1. [DONE] Identify all files using the deprecated 'UserAPI'.
+         2. [IN PROGRESS] Refactor \`src/components/UserProfile.tsx\` to use the new 'ProfileAPI'.
+         3. [TODO] Refactor the remaining files.
+         4. [TODO] Update tests to reflect the API change.
+        -->
+    </current_plan>
+</compressed_chat_history>
+`.trim();
+}