diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index c50d4c43..64e39e68 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -356,6 +356,18 @@ export const useGeminiStream = ( [addItem, pendingHistoryItemRef, setPendingHistoryItem], ); + const handleChatCompressionEvent = useCallback( + () => + addItem( + { + type: 'info', + text: `IMPORTANT: this conversation approached the input token limit for ${config.getModel()}. We'll send a compressed context to the model for any future messages.`, + }, + Date.now(), + ), + [addItem, config], + ); + const processGeminiStreamEvents = useCallback( async ( stream: AsyncIterable, @@ -364,20 +376,35 @@ export const useGeminiStream = ( let geminiMessageBuffer = ''; const toolCallRequests: ToolCallRequestInfo[] = []; for await (const event of stream) { - if (event.type === ServerGeminiEventType.Content) { - geminiMessageBuffer = handleContentEvent( - event.value, - geminiMessageBuffer, - userMessageTimestamp, - ); - } else if (event.type === ServerGeminiEventType.ToolCallRequest) { - toolCallRequests.push(event.value); - } else if (event.type === ServerGeminiEventType.UserCancelled) { - handleUserCancelledEvent(userMessageTimestamp); - return StreamProcessingStatus.UserCancelled; - } else if (event.type === ServerGeminiEventType.Error) { - handleErrorEvent(event.value, userMessageTimestamp); - return StreamProcessingStatus.Error; + switch (event.type) { + case ServerGeminiEventType.Content: + geminiMessageBuffer = handleContentEvent( + event.value, + geminiMessageBuffer, + userMessageTimestamp, + ); + break; + case ServerGeminiEventType.ToolCallRequest: + toolCallRequests.push(event.value); + break; + case ServerGeminiEventType.UserCancelled: + handleUserCancelledEvent(userMessageTimestamp); + break; + case ServerGeminiEventType.Error: + handleErrorEvent(event.value, userMessageTimestamp); + break; + case ServerGeminiEventType.ChatCompressed: + handleChatCompressionEvent(); + break; + case ServerGeminiEventType.ToolCallConfirmation: + case ServerGeminiEventType.ToolCallResponse: + // do nothing + break; + default: { + // enforces exhaustive switch-case + const unreachable: never = event; + return unreachable; + } } } if (toolCallRequests.length > 0) { @@ -390,6 +417,7 @@ export const useGeminiStream = ( handleUserCancelledEvent, handleErrorEvent, scheduleToolCalls, + handleChatCompressionEvent, ], ); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index fa093b30..d795f1d2 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -16,7 +16,7 @@ import { } from '@google/genai'; import process from 'node:process'; import { getFolderStructure } from '../utils/getFolderStructure.js'; -import { Turn, ServerGeminiStreamEvent } from './turn.js'; +import { Turn, ServerGeminiStreamEvent, GeminiEventType } from './turn.js'; import { Config } from '../config/config.js'; import { getCoreSystemPrompt } from './prompts.js'; import { ReadManyFilesTool } from '../tools/read-many-files.js'; @@ -173,7 +173,10 @@ export class GeminiClient { return; } - await this.tryCompressChat(); + const compressed = await this.tryCompressChat(); + if (compressed) { + yield { type: GeminiEventType.ChatCompressed }; + } const chat = await this.chat; const turn = new Turn(chat); const resultStream = turn.run(request, signal); @@ -325,7 +328,7 @@ export class GeminiClient { } } - private async tryCompressChat(): Promise { + private async tryCompressChat(): Promise { const chat = await this.chat; const history = chat.getHistory(true); // Get curated history @@ -340,7 +343,7 @@ export class GeminiClient { console.warn( `Could not determine token count for model ${this.model}. Skipping compression check.`, ); - return; + return false; } const tokenCount = totalTokens; // Now guaranteed to be a number @@ -350,11 +353,11 @@ export class GeminiClient { console.warn( `No token limit defined for model ${this.model}. Skipping compression check.`, ); - return; + return false; } if (tokenCount < 0.95 * limit) { - return; + return false; } const summarizationRequestMessage = { text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.', @@ -372,5 +375,7 @@ export class GeminiClient { parts: [{ text: response.text }], }, ]); + + return true; } } diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 22b01cce..637fc19d 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -42,6 +42,7 @@ export enum GeminiEventType { ToolCallConfirmation = 'tool_call_confirmation', UserCancelled = 'user_cancelled', Error = 'error', + ChatCompressed = 'chat_compressed', } export interface GeminiErrorEventValue { @@ -95,6 +96,10 @@ export type ServerGeminiErrorEvent = { value: GeminiErrorEventValue; }; +export type ServerGeminiChatCompressedEvent = { + type: GeminiEventType.ChatCompressed; +}; + // The original union type, now composed of the individual types export type ServerGeminiStreamEvent = | ServerGeminiContentEvent @@ -102,7 +107,8 @@ export type ServerGeminiStreamEvent = | ServerGeminiToolCallResponseEvent | ServerGeminiToolCallConfirmationEvent | ServerGeminiUserCancelledEvent - | ServerGeminiErrorEvent; + | ServerGeminiErrorEvent + | ServerGeminiChatCompressedEvent; // A turn manages the agentic loop turn within the server context. export class Turn {