Add `/compress` command to force a compression of the context (#986)

Related to https://b.corp.google.com/issues/423605555 - I figured this might be a simpler solution to start with, while still also being useful on its own even if we do implement that.
This commit is contained in:
Jacob MacDonald 2025-06-13 21:21:40 -07:00 committed by GitHub
parent 1452bb4ca4
commit d5c6bb9740
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 267 additions and 43 deletions

View File

@ -58,9 +58,15 @@ Slash commands provide meta-level control over the CLI itself. They can typicall
- **Action:** Terminates the CLI process.
- **`/tools`**
- **Description:** Displays a list of all the tools that are currently available to the model.
- **Action:** Outputs a list of the available tools.
- **`/compress`**
- **Description:** Compresses the current context. This will save on tokens used for future tasks while retaining a high level summary of what has happened.
- **Action:** Replaces the entire chat context with a summary.
## At Commands (`@`)
At commands are used to quickly include the content of files or directories as part of your prompt to Gemini. These commands now feature git-aware filtering.

View File

@ -170,7 +170,11 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
}
}, [config, addItem]);
const { handleSlashCommand, slashCommands } = useSlashCommandProcessor(
const {
handleSlashCommand,
slashCommands,
pendingHistoryItems: pendingSlashCommandHistoryItems,
} = useSlashCommandProcessor(
config,
history,
addItem,
@ -186,6 +190,7 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
showToolDescriptions,
setQuittingMessages,
);
const pendingHistoryItems = [...pendingSlashCommandHistoryItems];
const { rows: terminalHeight, columns: terminalWidth } = useTerminalSize();
const { stdin, setRawMode } = useStdin();
@ -286,18 +291,23 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
return editorType as EditorType;
}, [settings, openEditorDialog]);
const { streamingState, submitQuery, initError, pendingHistoryItems } =
useGeminiStream(
config.getGeminiClient(),
history,
addItem,
setShowHelp,
config,
setDebugMessage,
handleSlashCommand,
shellModeActive,
getPreferredEditor,
);
const {
streamingState,
submitQuery,
initError,
pendingHistoryItems: pendingGeminiHistoryItems,
} = useGeminiStream(
config.getGeminiClient(),
history,
addItem,
setShowHelp,
config,
setDebugMessage,
handleSlashCommand,
shellModeActive,
getPreferredEditor,
);
pendingHistoryItems.push(...pendingGeminiHistoryItems);
const { elapsedTime, currentLoadingPhrase } =
useLoadingIndicator(streamingState);
const showAutoAcceptIndicator = useAutoAcceptIndicator({ config });

View File

@ -13,6 +13,7 @@ import { InfoMessage } from './messages/InfoMessage.js';
import { ErrorMessage } from './messages/ErrorMessage.js';
import { ToolGroupMessage } from './messages/ToolGroupMessage.js';
import { GeminiMessageContent } from './messages/GeminiMessageContent.js';
import { CompressionMessage } from './messages/CompressionMessage.js';
import { Box } from 'ink';
import { AboutBox } from './AboutBox.js';
import { StatsDisplay } from './StatsDisplay.js';
@ -81,5 +82,8 @@ export const HistoryItemDisplay: React.FC<HistoryItemDisplayProps> = ({
isFocused={isFocused}
/>
)}
{item.type === 'compression' && (
<CompressionMessage compression={item.compression} />
)}
</Box>
);

View File

@ -0,0 +1,48 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import React from 'react';
import { Box, Text } from 'ink';
import { CompressionProps } from '../../types.js';
import Spinner from 'ink-spinner';
import { Colors } from '../../colors.js';
export interface CompressionDisplayProps {
compression: CompressionProps;
}
/*
* Compression messages appear when the /compress command is ran, and show a loading spinner
* while compression is in progress, followed up by some compression stats.
*/
export const CompressionMessage: React.FC<CompressionDisplayProps> = ({
compression,
}) => {
const text = compression.isPending
? 'Compressing chat history'
: `Chat history compressed from ${compression.originalTokenCount} to ${compression.newTokenCount} tokens.`;
return (
<Box flexDirection="row">
<Box marginRight={1}>
{compression.isPending ? (
<Spinner type="dots" />
) : (
<Text color={Colors.AccentPurple}></Text>
)}
</Box>
<Box>
<Text
color={
compression.isPending ? Colors.AccentPurple : Colors.AccentGreen
}
>
{text}
</Text>
</Box>
</Box>
);
};

View File

@ -62,6 +62,7 @@ import {
getMCPServerStatus,
MCPDiscoveryState,
getMCPDiscoveryState,
GeminiClient,
} from '@gemini-cli/core';
import { useSessionStats } from '../contexts/SessionContext.js';
@ -100,6 +101,8 @@ describe('useSlashCommandProcessor', () => {
let mockOpenEditorDialog: ReturnType<typeof vi.fn>;
let mockPerformMemoryRefresh: ReturnType<typeof vi.fn>;
let mockSetQuittingMessages: ReturnType<typeof vi.fn>;
let mockTryCompressChat: ReturnType<typeof vi.fn>;
let mockGeminiClient: GeminiClient;
let mockConfig: Config;
let mockCorgiMode: ReturnType<typeof vi.fn>;
const mockUseSessionStats = useSessionStats as Mock;
@ -115,8 +118,13 @@ describe('useSlashCommandProcessor', () => {
mockOpenEditorDialog = vi.fn();
mockPerformMemoryRefresh = vi.fn().mockResolvedValue(undefined);
mockSetQuittingMessages = vi.fn();
mockTryCompressChat = vi.fn();
mockGeminiClient = {
tryCompressChat: mockTryCompressChat,
} as unknown as GeminiClient;
mockConfig = {
getDebugMode: vi.fn(() => false),
getGeminiClient: () => mockGeminiClient,
getSandbox: vi.fn(() => 'test-sandbox'),
getModel: vi.fn(() => 'test-model'),
getProjectRoot: vi.fn(() => '/test/dir'),
@ -944,4 +952,35 @@ Add any other context about the problem here.
expect(commandResult).toBe(true);
});
});
describe('/compress command', () => {
it('should call tryCompressChat(true)', async () => {
const { handleSlashCommand } = getProcessor();
mockTryCompressChat.mockImplementationOnce(async (force?: boolean) => {
// TODO: Check that we have a pending compression item in the history.
expect(force).toBe(true);
return {
originalTokenCount: 100,
newTokenCount: 50,
};
});
await act(async () => {
handleSlashCommand('/compress');
});
expect(mockGeminiClient.tryCompressChat).toHaveBeenCalledWith(true);
expect(mockAddItem).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
type: MessageType.COMPRESSION,
compression: {
isPending: false,
originalTokenCount: 100,
newTokenCount: 50,
},
}),
expect.any(Number),
);
});
});
});

View File

@ -9,6 +9,7 @@ import { type PartListUnion } from '@google/genai';
import open from 'open';
import process from 'node:process';
import { UseHistoryManagerReturn } from './useHistoryManager.js';
import { useStateAndRef } from './useStateAndRef.js';
import {
Config,
GitService,
@ -80,6 +81,13 @@ export const useSlashCommandProcessor = (
return new GitService(config.getProjectRoot());
}, [config]);
const pendingHistoryItems: HistoryItemWithoutId[] = [];
const [pendingCompressionItemRef, setPendingCompressionItem] =
useStateAndRef<HistoryItemWithoutId | null>(null);
if (pendingCompressionItemRef.current != null) {
pendingHistoryItems.push(pendingCompressionItemRef.current);
}
const addMessage = useCallback(
(message: Message) => {
// Convert Message to HistoryItemWithoutId
@ -105,6 +113,11 @@ export const useSlashCommandProcessor = (
stats: message.stats,
duration: message.duration,
};
} else if (message.type === MessageType.COMPRESSION) {
historyItemContent = {
type: 'compression',
compression: message.compression,
};
} else {
historyItemContent = {
type: message.type as
@ -641,6 +654,57 @@ Add any other context about the problem here.
}, 100);
},
},
{
name: 'compress',
altName: 'summarize',
description: 'Compresses the context by replacing it with a summary.',
action: async (_mainCommand, _subCommand, _args) => {
if (pendingCompressionItemRef.current !== null) {
addMessage({
type: MessageType.ERROR,
content:
'Already compressing, wait for previous request to complete',
timestamp: new Date(),
});
return;
}
setPendingCompressionItem({
type: MessageType.COMPRESSION,
compression: {
isPending: true,
},
});
try {
const compressed = await config!
.getGeminiClient()!
.tryCompressChat(true);
if (compressed) {
addMessage({
type: MessageType.COMPRESSION,
compression: {
isPending: false,
originalTokenCount: compressed.originalTokenCount,
newTokenCount: compressed.newTokenCount,
},
timestamp: new Date(),
});
} else {
addMessage({
type: MessageType.ERROR,
content: 'Failed to compress chat history.',
timestamp: new Date(),
});
}
} catch (e) {
addMessage({
type: MessageType.ERROR,
content: `Failed to compress chat history: ${e instanceof Error ? e.message : String(e)}`,
timestamp: new Date(),
});
}
setPendingCompressionItem(null);
},
},
];
if (config?.getCheckpointEnabled()) {
@ -767,6 +831,8 @@ Add any other context about the problem here.
loadHistory,
addItem,
setQuittingMessages,
pendingCompressionItemRef,
setPendingCompressionItem,
]);
const handleSlashCommand = useCallback(
@ -830,5 +896,5 @@ Add any other context about the problem here.
[addItem, slashCommands, addMessage],
);
return { handleSlashCommand, slashCommands };
return { handleSlashCommand, slashCommands, pendingHistoryItems };
};

View File

@ -13,6 +13,7 @@ import {
ServerGeminiStreamEvent as GeminiEvent,
ServerGeminiContentEvent as ContentEvent,
ServerGeminiErrorEvent as ErrorEvent,
ServerGeminiChatCompressedEvent,
getErrorMessage,
isNodeError,
MessageSenderType,
@ -368,11 +369,14 @@ export const useGeminiStream = (
);
const handleChatCompressionEvent = useCallback(
() =>
(eventValue: ServerGeminiChatCompressedEvent['value']) =>
addItem(
{
type: 'info',
text: `IMPORTANT: this conversation approached the input token limit for ${config.getModel()}. We'll send a compressed context to the model for any future messages.`,
text:
`IMPORTANT: This conversation approached the input token limit for ${config.getModel()}. ` +
`A compressed context will be sent for future messages (compressed from: ` +
`${eventValue.originalTokenCount} to ${eventValue.newTokenCount} tokens).`,
},
Date.now(),
),
@ -406,7 +410,7 @@ export const useGeminiStream = (
handleErrorEvent(event.value, userMessageTimestamp);
break;
case ServerGeminiEventType.ChatCompressed:
handleChatCompressionEvent();
handleChatCompressionEvent(event.value);
break;
case ServerGeminiEventType.UsageMetadata:
addUsage(event.value);

View File

@ -53,6 +53,12 @@ export interface IndividualToolCallDisplay {
renderOutputAsMarkdown?: boolean;
}
export interface CompressionProps {
isPending: boolean;
originalTokenCount?: number;
newTokenCount?: number;
}
export interface HistoryItemBase {
text?: string; // Text content for user/gemini/info/error messages
}
@ -113,6 +119,11 @@ export type HistoryItemUserShell = HistoryItemBase & {
text: string;
};
export type HistoryItemCompression = HistoryItemBase & {
type: 'compression';
compression: CompressionProps;
};
// Using Omit<HistoryItem, 'id'> seems to have some issues with typescript's
// type inference e.g. historyItem.type === 'tool_group' isn't auto-inferring that
// 'tools' in historyItem.
@ -127,7 +138,8 @@ export type HistoryItemWithoutId =
| HistoryItemAbout
| HistoryItemToolGroup
| HistoryItemStats
| HistoryItemQuit;
| HistoryItemQuit
| HistoryItemCompression;
export type HistoryItem = HistoryItemWithoutId & { id: number };
@ -140,6 +152,7 @@ export enum MessageType {
STATS = 'stats',
QUIT = 'quit',
GEMINI = 'gemini',
COMPRESSION = 'compression',
}
// Simplified message structure for internal feedback
@ -172,6 +185,11 @@ export type Message =
stats: CumulativeStats;
duration: string;
content?: string;
}
| {
type: MessageType.COMPRESSION;
compression: CompressionProps;
timestamp: Date;
};
export interface ConsoleMessageItem {

View File

@ -15,7 +15,12 @@ import {
GenerateContentResponse,
} from '@google/genai';
import { getFolderStructure } from '../utils/getFolderStructure.js';
import { Turn, ServerGeminiStreamEvent, GeminiEventType } from './turn.js';
import {
Turn,
ServerGeminiStreamEvent,
GeminiEventType,
ChatCompressionInfo,
} from './turn.js';
import { Config } from '../config/config.js';
import { getCoreSystemPrompt } from './prompts.js';
import { ReadManyFilesTool } from '../tools/read-many-files.js';
@ -194,7 +199,7 @@ export class GeminiClient {
const compressed = await this.tryCompressChat();
if (compressed) {
yield { type: GeminiEventType.ChatCompressed };
yield { type: GeminiEventType.ChatCompressed, value: compressed };
}
const chat = await this.chat;
const turn = new Turn(chat);
@ -390,44 +395,55 @@ export class GeminiClient {
});
}
private async tryCompressChat(): Promise<boolean> {
async tryCompressChat(
force: boolean = false,
): Promise<ChatCompressionInfo | null> {
const chat = await this.chat;
const history = chat.getHistory(true); // Get curated history
// Regardless of `force`, don't do anything if the history is empty.
if (history.length === 0) {
return null;
}
const cg = await this.contentGenerator;
const { totalTokens } = await cg.countTokens({
const { totalTokens: originalTokenCount } = await cg.countTokens({
model: this.model,
contents: history,
});
if (totalTokens === undefined) {
// If token count is undefined, we can't determine if we need to compress.
console.warn(
`Could not determine token count for model ${this.model}. Skipping compression check.`,
);
return false;
}
const tokenCount = totalTokens; // Now guaranteed to be a number
// If not forced, check if we should compress based on context size.
if (!force) {
if (originalTokenCount === undefined) {
// If token count is undefined, we can't determine if we need to compress.
console.warn(
`Could not determine token count for model ${this.model}. Skipping compression check.`,
);
return null;
}
const tokenCount = originalTokenCount; // Now guaranteed to be a number
const limit = tokenLimit(this.model);
if (!limit) {
// If no limit is defined for the model, we can't compress.
console.warn(
`No token limit defined for model ${this.model}. Skipping compression check.`,
);
return false;
const limit = tokenLimit(this.model);
if (!limit) {
// If no limit is defined for the model, we can't compress.
console.warn(
`No token limit defined for model ${this.model}. Skipping compression check.`,
);
return null;
}
if (tokenCount < 0.95 * limit) {
return null;
}
}
if (tokenCount < 0.95 * limit) {
return false;
}
const summarizationRequestMessage = {
text: 'Summarize our conversation up to this point. The summary should be a concise yet comprehensive overview of all key topics, questions, answers, and important details discussed. This summary will replace the current chat history to conserve tokens, so it must capture everything essential to understand the context and continue our conversation effectively as if no information was lost.',
};
const response = await chat.sendMessage({
message: summarizationRequestMessage,
});
this.chat = this.startChat([
const newHistory = [
{
role: 'user',
parts: [summarizationRequestMessage],
@ -436,8 +452,15 @@ export class GeminiClient {
role: 'model',
parts: [{ text: response.text }],
},
]);
];
this.chat = this.startChat(newHistory);
const newTokenCount = (
await cg.countTokens({ model: this.model, contents: newHistory })
).totalTokens;
return true;
return {
originalTokenCount,
newTokenCount,
};
}
}

View File

@ -98,8 +98,14 @@ export type ServerGeminiErrorEvent = {
value: GeminiErrorEventValue;
};
export interface ChatCompressionInfo {
originalTokenCount: number | undefined;
newTokenCount: number | undefined;
}
export type ServerGeminiChatCompressedEvent = {
type: GeminiEventType.ChatCompressed;
value: ChatCompressionInfo;
};
export type ServerGeminiUsageMetadataEvent = {