feat: compress chat history when we approach token limit (#711)

This commit is contained in:
Brandon Keiji 2025-06-03 18:54:11 +00:00 committed by GitHub
parent 080af01715
commit fa5b616a10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 79 additions and 0 deletions

View File

@ -26,6 +26,7 @@ import { reportError } from '../utils/errorReporting.js';
import { GeminiChat } from './geminiChat.js';
import { retryWithBackoff } from '../utils/retry.js';
import { getErrorMessage } from '../utils/errors.js';
import { tokenLimit } from './tokenLimits.js';
export class GeminiClient {
private chat: Promise<GeminiChat>;
@ -172,6 +173,7 @@ export class GeminiClient {
return;
}
await this.tryCompressChat();
const chat = await this.chat;
const turn = new Turn(chat);
const resultStream = turn.run(request, signal);
@ -322,4 +324,53 @@ export class GeminiClient {
);
}
}
private async tryCompressChat(): Promise<void> {
const chat = await this.chat;
const history = chat.getHistory(true); // Get curated history
// Count tokens using the models module from the GoogleGenAI client instance
const { totalTokens } = await this.client.models.countTokens({
model: this.model,
contents: history,
});
if (totalTokens === undefined) {
// If token count is undefined, we can't determine if we need to compress.
console.warn(
`Could not determine token count for model ${this.model}. Skipping compression check.`,
);
return;
}
const tokenCount = totalTokens; // Now guaranteed to be a number
const limit = tokenLimit(this.model);
if (!limit) {
// If no limit is defined for the model, we can't compress.
console.warn(
`No token limit defined for model ${this.model}. Skipping compression check.`,
);
return;
}
if (tokenCount < 0.95 * limit) {
return;
}
const summarizationRequestMessage = {
text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.',
};
const response = await chat.sendMessage({
message: summarizationRequestMessage,
});
this.chat = this.startChat([
{
role: 'user',
parts: [summarizationRequestMessage],
},
{
role: 'model',
parts: [{ text: response.text }],
},
]);
}
}

View File

@ -0,0 +1,28 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
type Model = string;
type TokenCount = number;
export const DEFAULT_TOKEN_LIMIT = 1_048_576;
export function tokenLimit(model: Model): TokenCount {
// Add other models as they become relevant or if specified by config
// Pulled from https://ai.google.dev/gemini-api/docs/models
switch (model) {
case 'gemini-1.5-pro':
return 2_097_152;
case 'gemini-1.5-flash':
case 'gemini-2.5-pro-preview-05-06':
case 'gemini-2.5-flash-preview-05-20':
case 'gemini-2.0-flash':
return 1_048_576;
case 'gemini-2.0-flash-preview-image-generation':
return 32_000;
default:
return DEFAULT_TOKEN_LIMIT;
}
}