feat: compress chat history when we approach token limit (#711)
This commit is contained in:
parent
080af01715
commit
fa5b616a10
|
@ -26,6 +26,7 @@ import { reportError } from '../utils/errorReporting.js';
|
||||||
import { GeminiChat } from './geminiChat.js';
|
import { GeminiChat } from './geminiChat.js';
|
||||||
import { retryWithBackoff } from '../utils/retry.js';
|
import { retryWithBackoff } from '../utils/retry.js';
|
||||||
import { getErrorMessage } from '../utils/errors.js';
|
import { getErrorMessage } from '../utils/errors.js';
|
||||||
|
import { tokenLimit } from './tokenLimits.js';
|
||||||
|
|
||||||
export class GeminiClient {
|
export class GeminiClient {
|
||||||
private chat: Promise<GeminiChat>;
|
private chat: Promise<GeminiChat>;
|
||||||
|
@ -172,6 +173,7 @@ export class GeminiClient {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await this.tryCompressChat();
|
||||||
const chat = await this.chat;
|
const chat = await this.chat;
|
||||||
const turn = new Turn(chat);
|
const turn = new Turn(chat);
|
||||||
const resultStream = turn.run(request, signal);
|
const resultStream = turn.run(request, signal);
|
||||||
|
@ -322,4 +324,53 @@ export class GeminiClient {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async tryCompressChat(): Promise<void> {
|
||||||
|
const chat = await this.chat;
|
||||||
|
const history = chat.getHistory(true); // Get curated history
|
||||||
|
|
||||||
|
// Count tokens using the models module from the GoogleGenAI client instance
|
||||||
|
const { totalTokens } = await this.client.models.countTokens({
|
||||||
|
model: this.model,
|
||||||
|
contents: history,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (totalTokens === undefined) {
|
||||||
|
// If token count is undefined, we can't determine if we need to compress.
|
||||||
|
console.warn(
|
||||||
|
`Could not determine token count for model ${this.model}. Skipping compression check.`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const tokenCount = totalTokens; // Now guaranteed to be a number
|
||||||
|
|
||||||
|
const limit = tokenLimit(this.model);
|
||||||
|
if (!limit) {
|
||||||
|
// If no limit is defined for the model, we can't compress.
|
||||||
|
console.warn(
|
||||||
|
`No token limit defined for model ${this.model}. Skipping compression check.`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tokenCount < 0.95 * limit) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const summarizationRequestMessage = {
|
||||||
|
text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.',
|
||||||
|
};
|
||||||
|
const response = await chat.sendMessage({
|
||||||
|
message: summarizationRequestMessage,
|
||||||
|
});
|
||||||
|
this.chat = this.startChat([
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: [summarizationRequestMessage],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'model',
|
||||||
|
parts: [{ text: response.text }],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
type Model = string;
|
||||||
|
type TokenCount = number;
|
||||||
|
|
||||||
|
export const DEFAULT_TOKEN_LIMIT = 1_048_576;
|
||||||
|
|
||||||
|
export function tokenLimit(model: Model): TokenCount {
|
||||||
|
// Add other models as they become relevant or if specified by config
|
||||||
|
// Pulled from https://ai.google.dev/gemini-api/docs/models
|
||||||
|
switch (model) {
|
||||||
|
case 'gemini-1.5-pro':
|
||||||
|
return 2_097_152;
|
||||||
|
case 'gemini-1.5-flash':
|
||||||
|
case 'gemini-2.5-pro-preview-05-06':
|
||||||
|
case 'gemini-2.5-flash-preview-05-20':
|
||||||
|
case 'gemini-2.0-flash':
|
||||||
|
return 1_048_576;
|
||||||
|
case 'gemini-2.0-flash-preview-image-generation':
|
||||||
|
return 32_000;
|
||||||
|
default:
|
||||||
|
return DEFAULT_TOKEN_LIMIT;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue