From d5c6bb9740a52d87b71d812e698d0e88abf10caa Mon Sep 17 00:00:00 2001 From: Jacob MacDonald Date: Fri, 13 Jun 2025 21:21:40 -0700 Subject: [PATCH] Add `/compress` command to force a compression of the context (#986) Related to https://b.corp.google.com/issues/423605555 - I figured this might be a simpler solution to start with, while still also being useful on its own even if we do implement that. --- docs/cli/commands.md | 6 ++ packages/cli/src/ui/App.tsx | 36 +++++---- .../src/ui/components/HistoryItemDisplay.tsx | 4 + .../messages/CompressionMessage.tsx | 48 ++++++++++++ .../ui/hooks/slashCommandProcessor.test.ts | 39 ++++++++++ .../cli/src/ui/hooks/slashCommandProcessor.ts | 68 ++++++++++++++++- packages/cli/src/ui/hooks/useGeminiStream.ts | 10 ++- packages/cli/src/ui/types.ts | 20 ++++- packages/core/src/core/client.ts | 73 ++++++++++++------- packages/core/src/core/turn.ts | 6 ++ 10 files changed, 267 insertions(+), 43 deletions(-) create mode 100644 packages/cli/src/ui/components/messages/CompressionMessage.tsx diff --git a/docs/cli/commands.md b/docs/cli/commands.md index f9e229a0..d5d8bc18 100644 --- a/docs/cli/commands.md +++ b/docs/cli/commands.md @@ -58,9 +58,15 @@ Slash commands provide meta-level control over the CLI itself. They can typicall - **Action:** Terminates the CLI process. - **`/tools`** + - **Description:** Displays a list of all the tools that are currently available to the model. - **Action:** Outputs a list of the available tools. +- **`/compress`** + + - **Description:** Compresses the current context. This will save on tokens used for future tasks while retaining a high level summary of what has happened. + - **Action:** Replaces the entire chat context with a summary. + ## At Commands (`@`) At commands are used to quickly include the content of files or directories as part of your prompt to Gemini. These commands now feature git-aware filtering. diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 7d8ef463..40935f93 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -170,7 +170,11 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { } }, [config, addItem]); - const { handleSlashCommand, slashCommands } = useSlashCommandProcessor( + const { + handleSlashCommand, + slashCommands, + pendingHistoryItems: pendingSlashCommandHistoryItems, + } = useSlashCommandProcessor( config, history, addItem, @@ -186,6 +190,7 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { showToolDescriptions, setQuittingMessages, ); + const pendingHistoryItems = [...pendingSlashCommandHistoryItems]; const { rows: terminalHeight, columns: terminalWidth } = useTerminalSize(); const { stdin, setRawMode } = useStdin(); @@ -286,18 +291,23 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { return editorType as EditorType; }, [settings, openEditorDialog]); - const { streamingState, submitQuery, initError, pendingHistoryItems } = - useGeminiStream( - config.getGeminiClient(), - history, - addItem, - setShowHelp, - config, - setDebugMessage, - handleSlashCommand, - shellModeActive, - getPreferredEditor, - ); + const { + streamingState, + submitQuery, + initError, + pendingHistoryItems: pendingGeminiHistoryItems, + } = useGeminiStream( + config.getGeminiClient(), + history, + addItem, + setShowHelp, + config, + setDebugMessage, + handleSlashCommand, + shellModeActive, + getPreferredEditor, + ); + pendingHistoryItems.push(...pendingGeminiHistoryItems); const { elapsedTime, currentLoadingPhrase } = useLoadingIndicator(streamingState); const showAutoAcceptIndicator = useAutoAcceptIndicator({ config }); diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index fc1b128d..d99ad503 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -13,6 +13,7 @@ import { InfoMessage } from './messages/InfoMessage.js'; import { ErrorMessage } from './messages/ErrorMessage.js'; import { ToolGroupMessage } from './messages/ToolGroupMessage.js'; import { GeminiMessageContent } from './messages/GeminiMessageContent.js'; +import { CompressionMessage } from './messages/CompressionMessage.js'; import { Box } from 'ink'; import { AboutBox } from './AboutBox.js'; import { StatsDisplay } from './StatsDisplay.js'; @@ -81,5 +82,8 @@ export const HistoryItemDisplay: React.FC = ({ isFocused={isFocused} /> )} + {item.type === 'compression' && ( + + )} ); diff --git a/packages/cli/src/ui/components/messages/CompressionMessage.tsx b/packages/cli/src/ui/components/messages/CompressionMessage.tsx new file mode 100644 index 00000000..aaa56149 --- /dev/null +++ b/packages/cli/src/ui/components/messages/CompressionMessage.tsx @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import React from 'react'; +import { Box, Text } from 'ink'; +import { CompressionProps } from '../../types.js'; +import Spinner from 'ink-spinner'; +import { Colors } from '../../colors.js'; + +export interface CompressionDisplayProps { + compression: CompressionProps; +} + +/* + * Compression messages appear when the /compress command is ran, and show a loading spinner + * while compression is in progress, followed up by some compression stats. + */ +export const CompressionMessage: React.FC = ({ + compression, +}) => { + const text = compression.isPending + ? 'Compressing chat history' + : `Chat history compressed from ${compression.originalTokenCount} to ${compression.newTokenCount} tokens.`; + + return ( + + + {compression.isPending ? ( + + ) : ( + + )} + + + + {text} + + + + ); +}; diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts index c2873bd6..73669651 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts @@ -62,6 +62,7 @@ import { getMCPServerStatus, MCPDiscoveryState, getMCPDiscoveryState, + GeminiClient, } from '@gemini-cli/core'; import { useSessionStats } from '../contexts/SessionContext.js'; @@ -100,6 +101,8 @@ describe('useSlashCommandProcessor', () => { let mockOpenEditorDialog: ReturnType; let mockPerformMemoryRefresh: ReturnType; let mockSetQuittingMessages: ReturnType; + let mockTryCompressChat: ReturnType; + let mockGeminiClient: GeminiClient; let mockConfig: Config; let mockCorgiMode: ReturnType; const mockUseSessionStats = useSessionStats as Mock; @@ -115,8 +118,13 @@ describe('useSlashCommandProcessor', () => { mockOpenEditorDialog = vi.fn(); mockPerformMemoryRefresh = vi.fn().mockResolvedValue(undefined); mockSetQuittingMessages = vi.fn(); + mockTryCompressChat = vi.fn(); + mockGeminiClient = { + tryCompressChat: mockTryCompressChat, + } as unknown as GeminiClient; mockConfig = { getDebugMode: vi.fn(() => false), + getGeminiClient: () => mockGeminiClient, getSandbox: vi.fn(() => 'test-sandbox'), getModel: vi.fn(() => 'test-model'), getProjectRoot: vi.fn(() => '/test/dir'), @@ -944,4 +952,35 @@ Add any other context about the problem here. expect(commandResult).toBe(true); }); }); + + describe('/compress command', () => { + it('should call tryCompressChat(true)', async () => { + const { handleSlashCommand } = getProcessor(); + mockTryCompressChat.mockImplementationOnce(async (force?: boolean) => { + // TODO: Check that we have a pending compression item in the history. + expect(force).toBe(true); + return { + originalTokenCount: 100, + newTokenCount: 50, + }; + }); + + await act(async () => { + handleSlashCommand('/compress'); + }); + expect(mockGeminiClient.tryCompressChat).toHaveBeenCalledWith(true); + expect(mockAddItem).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + type: MessageType.COMPRESSION, + compression: { + isPending: false, + originalTokenCount: 100, + newTokenCount: 50, + }, + }), + expect.any(Number), + ); + }); + }); }); diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts index 861d7bd9..97374e4f 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts @@ -9,6 +9,7 @@ import { type PartListUnion } from '@google/genai'; import open from 'open'; import process from 'node:process'; import { UseHistoryManagerReturn } from './useHistoryManager.js'; +import { useStateAndRef } from './useStateAndRef.js'; import { Config, GitService, @@ -80,6 +81,13 @@ export const useSlashCommandProcessor = ( return new GitService(config.getProjectRoot()); }, [config]); + const pendingHistoryItems: HistoryItemWithoutId[] = []; + const [pendingCompressionItemRef, setPendingCompressionItem] = + useStateAndRef(null); + if (pendingCompressionItemRef.current != null) { + pendingHistoryItems.push(pendingCompressionItemRef.current); + } + const addMessage = useCallback( (message: Message) => { // Convert Message to HistoryItemWithoutId @@ -105,6 +113,11 @@ export const useSlashCommandProcessor = ( stats: message.stats, duration: message.duration, }; + } else if (message.type === MessageType.COMPRESSION) { + historyItemContent = { + type: 'compression', + compression: message.compression, + }; } else { historyItemContent = { type: message.type as @@ -641,6 +654,57 @@ Add any other context about the problem here. }, 100); }, }, + { + name: 'compress', + altName: 'summarize', + description: 'Compresses the context by replacing it with a summary.', + action: async (_mainCommand, _subCommand, _args) => { + if (pendingCompressionItemRef.current !== null) { + addMessage({ + type: MessageType.ERROR, + content: + 'Already compressing, wait for previous request to complete', + timestamp: new Date(), + }); + return; + } + setPendingCompressionItem({ + type: MessageType.COMPRESSION, + compression: { + isPending: true, + }, + }); + try { + const compressed = await config! + .getGeminiClient()! + .tryCompressChat(true); + if (compressed) { + addMessage({ + type: MessageType.COMPRESSION, + compression: { + isPending: false, + originalTokenCount: compressed.originalTokenCount, + newTokenCount: compressed.newTokenCount, + }, + timestamp: new Date(), + }); + } else { + addMessage({ + type: MessageType.ERROR, + content: 'Failed to compress chat history.', + timestamp: new Date(), + }); + } + } catch (e) { + addMessage({ + type: MessageType.ERROR, + content: `Failed to compress chat history: ${e instanceof Error ? e.message : String(e)}`, + timestamp: new Date(), + }); + } + setPendingCompressionItem(null); + }, + }, ]; if (config?.getCheckpointEnabled()) { @@ -767,6 +831,8 @@ Add any other context about the problem here. loadHistory, addItem, setQuittingMessages, + pendingCompressionItemRef, + setPendingCompressionItem, ]); const handleSlashCommand = useCallback( @@ -830,5 +896,5 @@ Add any other context about the problem here. [addItem, slashCommands, addMessage], ); - return { handleSlashCommand, slashCommands }; + return { handleSlashCommand, slashCommands, pendingHistoryItems }; }; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 920ec490..bff38a2b 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -13,6 +13,7 @@ import { ServerGeminiStreamEvent as GeminiEvent, ServerGeminiContentEvent as ContentEvent, ServerGeminiErrorEvent as ErrorEvent, + ServerGeminiChatCompressedEvent, getErrorMessage, isNodeError, MessageSenderType, @@ -368,11 +369,14 @@ export const useGeminiStream = ( ); const handleChatCompressionEvent = useCallback( - () => + (eventValue: ServerGeminiChatCompressedEvent['value']) => addItem( { type: 'info', - text: `IMPORTANT: this conversation approached the input token limit for ${config.getModel()}. We'll send a compressed context to the model for any future messages.`, + text: + `IMPORTANT: This conversation approached the input token limit for ${config.getModel()}. ` + + `A compressed context will be sent for future messages (compressed from: ` + + `${eventValue.originalTokenCount} to ${eventValue.newTokenCount} tokens).`, }, Date.now(), ), @@ -406,7 +410,7 @@ export const useGeminiStream = ( handleErrorEvent(event.value, userMessageTimestamp); break; case ServerGeminiEventType.ChatCompressed: - handleChatCompressionEvent(); + handleChatCompressionEvent(event.value); break; case ServerGeminiEventType.UsageMetadata: addUsage(event.value); diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 728b3476..3c0ec616 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -53,6 +53,12 @@ export interface IndividualToolCallDisplay { renderOutputAsMarkdown?: boolean; } +export interface CompressionProps { + isPending: boolean; + originalTokenCount?: number; + newTokenCount?: number; +} + export interface HistoryItemBase { text?: string; // Text content for user/gemini/info/error messages } @@ -113,6 +119,11 @@ export type HistoryItemUserShell = HistoryItemBase & { text: string; }; +export type HistoryItemCompression = HistoryItemBase & { + type: 'compression'; + compression: CompressionProps; +}; + // Using Omit seems to have some issues with typescript's // type inference e.g. historyItem.type === 'tool_group' isn't auto-inferring that // 'tools' in historyItem. @@ -127,7 +138,8 @@ export type HistoryItemWithoutId = | HistoryItemAbout | HistoryItemToolGroup | HistoryItemStats - | HistoryItemQuit; + | HistoryItemQuit + | HistoryItemCompression; export type HistoryItem = HistoryItemWithoutId & { id: number }; @@ -140,6 +152,7 @@ export enum MessageType { STATS = 'stats', QUIT = 'quit', GEMINI = 'gemini', + COMPRESSION = 'compression', } // Simplified message structure for internal feedback @@ -172,6 +185,11 @@ export type Message = stats: CumulativeStats; duration: string; content?: string; + } + | { + type: MessageType.COMPRESSION; + compression: CompressionProps; + timestamp: Date; }; export interface ConsoleMessageItem { diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 7cfec9d6..fd44bf03 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -15,7 +15,12 @@ import { GenerateContentResponse, } from '@google/genai'; import { getFolderStructure } from '../utils/getFolderStructure.js'; -import { Turn, ServerGeminiStreamEvent, GeminiEventType } from './turn.js'; +import { + Turn, + ServerGeminiStreamEvent, + GeminiEventType, + ChatCompressionInfo, +} from './turn.js'; import { Config } from '../config/config.js'; import { getCoreSystemPrompt } from './prompts.js'; import { ReadManyFilesTool } from '../tools/read-many-files.js'; @@ -194,7 +199,7 @@ export class GeminiClient { const compressed = await this.tryCompressChat(); if (compressed) { - yield { type: GeminiEventType.ChatCompressed }; + yield { type: GeminiEventType.ChatCompressed, value: compressed }; } const chat = await this.chat; const turn = new Turn(chat); @@ -390,44 +395,55 @@ export class GeminiClient { }); } - private async tryCompressChat(): Promise { + async tryCompressChat( + force: boolean = false, + ): Promise { const chat = await this.chat; const history = chat.getHistory(true); // Get curated history + // Regardless of `force`, don't do anything if the history is empty. + if (history.length === 0) { + return null; + } + const cg = await this.contentGenerator; - const { totalTokens } = await cg.countTokens({ + const { totalTokens: originalTokenCount } = await cg.countTokens({ model: this.model, contents: history, }); - if (totalTokens === undefined) { - // If token count is undefined, we can't determine if we need to compress. - console.warn( - `Could not determine token count for model ${this.model}. Skipping compression check.`, - ); - return false; - } - const tokenCount = totalTokens; // Now guaranteed to be a number + // If not forced, check if we should compress based on context size. + if (!force) { + if (originalTokenCount === undefined) { + // If token count is undefined, we can't determine if we need to compress. + console.warn( + `Could not determine token count for model ${this.model}. Skipping compression check.`, + ); + return null; + } + const tokenCount = originalTokenCount; // Now guaranteed to be a number - const limit = tokenLimit(this.model); - if (!limit) { - // If no limit is defined for the model, we can't compress. - console.warn( - `No token limit defined for model ${this.model}. Skipping compression check.`, - ); - return false; + const limit = tokenLimit(this.model); + if (!limit) { + // If no limit is defined for the model, we can't compress. + console.warn( + `No token limit defined for model ${this.model}. Skipping compression check.`, + ); + return null; + } + + if (tokenCount < 0.95 * limit) { + return null; + } } - if (tokenCount < 0.95 * limit) { - return false; - } const summarizationRequestMessage = { text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.', }; const response = await chat.sendMessage({ message: summarizationRequestMessage, }); - this.chat = this.startChat([ + const newHistory = [ { role: 'user', parts: [summarizationRequestMessage], @@ -436,8 +452,15 @@ export class GeminiClient { role: 'model', parts: [{ text: response.text }], }, - ]); + ]; + this.chat = this.startChat(newHistory); + const newTokenCount = ( + await cg.countTokens({ model: this.model, contents: newHistory }) + ).totalTokens; - return true; + return { + originalTokenCount, + newTokenCount, + }; } } diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 71c02d83..87533c28 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -98,8 +98,14 @@ export type ServerGeminiErrorEvent = { value: GeminiErrorEventValue; }; +export interface ChatCompressionInfo { + originalTokenCount: number | undefined; + newTokenCount: number | undefined; +} + export type ServerGeminiChatCompressedEvent = { type: GeminiEventType.ChatCompressed; + value: ChatCompressionInfo; }; export type ServerGeminiUsageMetadataEvent = {