From cd069fd436c7a209f5192bebf6e242796c185c66 Mon Sep 17 00:00:00 2001 From: Vachan <52260220+vachan-shetty@users.noreply.github.com> Date: Tue, 1 Jul 2025 17:18:13 -0700 Subject: [PATCH] Reduce the threshold for when we compress history. (#2898) --- packages/core/src/core/client.test.ts | 118 ++++++++++++++++++++++++-- packages/core/src/core/client.ts | 7 +- 2 files changed, 118 insertions(+), 7 deletions(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index b5f64543..0adbf986 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -21,6 +21,7 @@ import { getCoreSystemPrompt } from './prompts.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { setSimulate429 } from '../utils/testUtils.js'; +import { tokenLimit } from './tokenLimits.js'; // --- Mocks --- const mockChatCreateFn = vi.fn(); @@ -82,8 +83,7 @@ describe('Gemini Client (client.ts)', () => { embedContent: mockEmbedContentFn, }, }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return mock as any; + return mock as unknown as GoogleGenAI; }); mockChatCreateFn.mockResolvedValue({} as Chat); @@ -130,14 +130,12 @@ describe('Gemini Client (client.ts)', () => { getWorkingDir: vi.fn().mockReturnValue('/test/dir'), getFileService: vi.fn().mockReturnValue(fileService), }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return mock as any; + return mock as unknown as Config; }); // We can instantiate the client here since Config is mocked // and the constructor will use the mocked GoogleGenAI - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const mockConfig = new Config({} as any); + const mockConfig = new Config({} as never); client = new GeminiClient(mockConfig); await client.initialize(contentGeneratorConfig); }); @@ -364,6 +362,114 @@ describe('Gemini Client (client.ts)', () => { }); }); + describe('tryCompressChat', () => { + const mockCountTokens = vi.fn(); + const mockSendMessage = vi.fn(); + + beforeEach(() => { + vi.mock('./tokenLimits', () => ({ + tokenLimit: vi.fn(), + })); + + const mockGenerator: Partial = { + countTokens: mockCountTokens, + }; + client['contentGenerator'] = mockGenerator as ContentGenerator; + + // Mock the chat's sendMessage method + const mockChat: Partial = { + getHistory: vi + .fn() + .mockReturnValue([ + { role: 'user', parts: [{ text: '...history...' }] }, + ]), + addHistory: vi.fn(), + sendMessage: mockSendMessage, + }; + client['chat'] = mockChat as GeminiChat; + }); + + it('should not trigger summarization if token count is below threshold', async () => { + const MOCKED_TOKEN_LIMIT = 1000; + vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT); + + mockCountTokens.mockResolvedValue({ + totalTokens: MOCKED_TOKEN_LIMIT * 0.699, // TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7 + }); + + const initialChat = client.getChat(); + const result = await client.tryCompressChat(); + const newChat = client.getChat(); + + expect(tokenLimit).toHaveBeenCalled(); + expect(result).toBeNull(); + expect(newChat).toBe(initialChat); + }); + + it('should trigger summarization if token count is at threshold', async () => { + const MOCKED_TOKEN_LIMIT = 1000; + vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT); + + const originalTokenCount = 1000 * 0.7; + const newTokenCount = 100; + + mockCountTokens + .mockResolvedValueOnce({ totalTokens: originalTokenCount }) // First call for the check + .mockResolvedValueOnce({ totalTokens: newTokenCount }); // Second call for the new history + + // Mock the summary response from the chat + mockSendMessage.mockResolvedValue({ + role: 'model', + parts: [{ text: 'This is a summary.' }], + }); + + const initialChat = client.getChat(); + const result = await client.tryCompressChat(); + const newChat = client.getChat(); + + expect(tokenLimit).toHaveBeenCalled(); + expect(mockSendMessage).toHaveBeenCalled(); + + // Assert that summarization happened and returned the correct stats + expect(result).toEqual({ + originalTokenCount, + newTokenCount, + }); + + // Assert that the chat was reset + expect(newChat).not.toBe(initialChat); + }); + + it('should always trigger summarization when force is true, regardless of token count', async () => { + const originalTokenCount = 10; // Well below threshold + const newTokenCount = 5; + + mockCountTokens + .mockResolvedValueOnce({ totalTokens: originalTokenCount }) + .mockResolvedValueOnce({ totalTokens: newTokenCount }); + + // Mock the summary response from the chat + mockSendMessage.mockResolvedValue({ + role: 'model', + parts: [{ text: 'This is a summary.' }], + }); + + const initialChat = client.getChat(); + const result = await client.tryCompressChat(true); // force = true + const newChat = client.getChat(); + + expect(mockSendMessage).toHaveBeenCalled(); + + expect(result).toEqual({ + originalTokenCount, + newTokenCount, + }); + + // Assert that the chat was reset + expect(newChat).not.toBe(initialChat); + }); + }); + describe('sendMessageStream', () => { it('should return the turn instance after the stream is complete', async () => { // Arrange diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index fe60112d..b39b10a0 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -55,6 +55,7 @@ export class GeminiClient { topP: 1, }; private readonly MAX_TURNS = 100; + private readonly TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7; constructor(private config: Config) { if (config.getProxy()) { @@ -449,7 +450,11 @@ export class GeminiClient { } // Don't compress if not forced and we are under the limit. - if (!force && originalTokenCount < 0.95 * tokenLimit(this.model)) { + if ( + !force && + originalTokenCount < + this.TOKEN_THRESHOLD_FOR_SUMMARIZATION * tokenLimit(this.model) + ) { return null; }