From fa5b616a10497802f0af5b6c9534eef66fff7159 Mon Sep 17 00:00:00 2001 From: Brandon Keiji Date: Tue, 3 Jun 2025 18:54:11 +0000 Subject: [PATCH] feat: compress chat history when we approach token limit (#711) --- packages/core/src/core/client.ts | 51 +++++++++++++++++++++++++++ packages/core/src/core/tokenLimits.ts | 28 +++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 packages/core/src/core/tokenLimits.ts diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 311f53bf..fa093b30 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -26,6 +26,7 @@ import { reportError } from '../utils/errorReporting.js'; import { GeminiChat } from './geminiChat.js'; import { retryWithBackoff } from '../utils/retry.js'; import { getErrorMessage } from '../utils/errors.js'; +import { tokenLimit } from './tokenLimits.js'; export class GeminiClient { private chat: Promise; @@ -172,6 +173,7 @@ export class GeminiClient { return; } + await this.tryCompressChat(); const chat = await this.chat; const turn = new Turn(chat); const resultStream = turn.run(request, signal); @@ -322,4 +324,53 @@ export class GeminiClient { ); } } + + private async tryCompressChat(): Promise { + const chat = await this.chat; + const history = chat.getHistory(true); // Get curated history + + // Count tokens using the models module from the GoogleGenAI client instance + const { totalTokens } = await this.client.models.countTokens({ + model: this.model, + contents: history, + }); + + if (totalTokens === undefined) { + // If token count is undefined, we can't determine if we need to compress. + console.warn( + `Could not determine token count for model ${this.model}. Skipping compression check.`, + ); + return; + } + const tokenCount = totalTokens; // Now guaranteed to be a number + + const limit = tokenLimit(this.model); + if (!limit) { + // If no limit is defined for the model, we can't compress. + console.warn( + `No token limit defined for model ${this.model}. Skipping compression check.`, + ); + return; + } + + if (tokenCount < 0.95 * limit) { + return; + } + const summarizationRequestMessage = { + text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.', + }; + const response = await chat.sendMessage({ + message: summarizationRequestMessage, + }); + this.chat = this.startChat([ + { + role: 'user', + parts: [summarizationRequestMessage], + }, + { + role: 'model', + parts: [{ text: response.text }], + }, + ]); + } } diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts new file mode 100644 index 00000000..56882c21 --- /dev/null +++ b/packages/core/src/core/tokenLimits.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +type Model = string; +type TokenCount = number; + +export const DEFAULT_TOKEN_LIMIT = 1_048_576; + +export function tokenLimit(model: Model): TokenCount { + // Add other models as they become relevant or if specified by config + // Pulled from https://ai.google.dev/gemini-api/docs/models + switch (model) { + case 'gemini-1.5-pro': + return 2_097_152; + case 'gemini-1.5-flash': + case 'gemini-2.5-pro-preview-05-06': + case 'gemini-2.5-flash-preview-05-20': + case 'gemini-2.0-flash': + return 1_048_576; + case 'gemini-2.0-flash-preview-image-generation': + return 32_000; + default: + return DEFAULT_TOKEN_LIMIT; + } +}