From cd069fd436c7a209f5192bebf6e242796c185c66 Mon Sep 17 00:00:00 2001
From: Vachan <52260220+vachan-shetty@users.noreply.github.com>
Date: Tue, 1 Jul 2025 17:18:13 -0700
Subject: [PATCH] Reduce the threshold for when we compress history. (#2898)

---
 packages/core/src/core/client.test.ts | 118 ++++++++++++++++++++++++--
 packages/core/src/core/client.ts      |   7 +-
 2 files changed, 118 insertions(+), 7 deletions(-)
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index b5f64543..0adbf986 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -21,6 +21,7 @@ import { getCoreSystemPrompt } from './prompts.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
+import { tokenLimit } from './tokenLimits.js';
 
 // --- Mocks ---
 const mockChatCreateFn = vi.fn();
@@ -82,8 +83,7 @@ describe('Gemini Client (client.ts)', () => {
           embedContent: mockEmbedContentFn,
         },
       };
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      return mock as any;
+      return mock as unknown as GoogleGenAI;
     });
 
     mockChatCreateFn.mockResolvedValue({} as Chat);
@@ -130,14 +130,12 @@ describe('Gemini Client (client.ts)', () => {
         getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
         getFileService: vi.fn().mockReturnValue(fileService),
       };
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      return mock as any;
+      return mock as unknown as Config;
     });
 
     // We can instantiate the client here since Config is mocked
     // and the constructor will use the mocked GoogleGenAI
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const mockConfig = new Config({} as any);
+    const mockConfig = new Config({} as never);
     client = new GeminiClient(mockConfig);
     await client.initialize(contentGeneratorConfig);
   });
@@ -364,6 +362,114 @@ describe('Gemini Client (client.ts)', () => {
     });
   });
 
+  describe('tryCompressChat', () => {
+    const mockCountTokens = vi.fn();
+    const mockSendMessage = vi.fn();
+
+    beforeEach(() => {
+      vi.mock('./tokenLimits', () => ({
+        tokenLimit: vi.fn(),
+      }));
+
+      const mockGenerator: Partial<ContentGenerator> = {
+        countTokens: mockCountTokens,
+      };
+      client['contentGenerator'] = mockGenerator as ContentGenerator;
+
+      // Mock the chat's sendMessage method
+      const mockChat: Partial<GeminiChat> = {
+        getHistory: vi
+          .fn()
+          .mockReturnValue([
+            { role: 'user', parts: [{ text: '...history...' }] },
+          ]),
+        addHistory: vi.fn(),
+        sendMessage: mockSendMessage,
+      };
+      client['chat'] = mockChat as GeminiChat;
+    });
+
+    it('should not trigger summarization if token count is below threshold', async () => {
+      const MOCKED_TOKEN_LIMIT = 1000;
+      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
+
+      mockCountTokens.mockResolvedValue({
+        totalTokens: MOCKED_TOKEN_LIMIT * 0.699, // TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7
+      });
+
+      const initialChat = client.getChat();
+      const result = await client.tryCompressChat();
+      const newChat = client.getChat();
+
+      expect(tokenLimit).toHaveBeenCalled();
+      expect(result).toBeNull();
+      expect(newChat).toBe(initialChat);
+    });
+
+    it('should trigger summarization if token count is at threshold', async () => {
+      const MOCKED_TOKEN_LIMIT = 1000;
+      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
+
+      const originalTokenCount = 1000 * 0.7;
+      const newTokenCount = 100;
+
+      mockCountTokens
+        .mockResolvedValueOnce({ totalTokens: originalTokenCount }) // First call for the check
+        .mockResolvedValueOnce({ totalTokens: newTokenCount }); // Second call for the new history
+
+      // Mock the summary response from the chat
+      mockSendMessage.mockResolvedValue({
+        role: 'model',
+        parts: [{ text: 'This is a summary.' }],
+      });
+
+      const initialChat = client.getChat();
+      const result = await client.tryCompressChat();
+      const newChat = client.getChat();
+
+      expect(tokenLimit).toHaveBeenCalled();
+      expect(mockSendMessage).toHaveBeenCalled();
+
+      // Assert that summarization happened and returned the correct stats
+      expect(result).toEqual({
+        originalTokenCount,
+        newTokenCount,
+      });
+
+      // Assert that the chat was reset
+      expect(newChat).not.toBe(initialChat);
+    });
+
+    it('should always trigger summarization when force is true, regardless of token count', async () => {
+      const originalTokenCount = 10; // Well below threshold
+      const newTokenCount = 5;
+
+      mockCountTokens
+        .mockResolvedValueOnce({ totalTokens: originalTokenCount })
+        .mockResolvedValueOnce({ totalTokens: newTokenCount });
+
+      // Mock the summary response from the chat
+      mockSendMessage.mockResolvedValue({
+        role: 'model',
+        parts: [{ text: 'This is a summary.' }],
+      });
+
+      const initialChat = client.getChat();
+      const result = await client.tryCompressChat(true); // force = true
+      const newChat = client.getChat();
+
+      expect(mockSendMessage).toHaveBeenCalled();
+
+      expect(result).toEqual({
+        originalTokenCount,
+        newTokenCount,
+      });
+
+      // Assert that the chat was reset
+      expect(newChat).not.toBe(initialChat);
+    });
+  });
+
   describe('sendMessageStream', () => {
     it('should return the turn instance after the stream is complete', async () => {
       // Arrange
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index fe60112d..b39b10a0 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -55,6 +55,7 @@ export class GeminiClient {
     topP: 1,
   };
   private readonly MAX_TURNS = 100;
+  private readonly TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7;
 
   constructor(private config: Config) {
     if (config.getProxy()) {
@@ -449,7 +450,11 @@ export class GeminiClient {
     }
 
     // Don't compress if not forced and we are under the limit.
-    if (!force && originalTokenCount < 0.95 * tokenLimit(this.model)) {
+    if (
+      !force &&
+      originalTokenCount <
+        this.TOKEN_THRESHOLD_FOR_SUMMARIZATION * tokenLimit(this.model)
+    ) {
       return null;
     }