Reduce the threshold for when we compress history. (#2898)

2025-07-01 17:18:13 -07:00 · 2025-07-01 17:18:13 -07:00 · cd069fd436
parent 38445f63f0
commit cd069fd436
2 changed files with 118 additions and 7 deletions
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@ -21,6 +21,7 @@ import { getCoreSystemPrompt } from './prompts.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import { tokenLimit } from './tokenLimits.js';
 // --- Mocks ---
 const mockChatCreateFn = vi.fn();
@ -82,8 +83,7 @@ describe('Gemini Client (client.ts)', () => {
          embedContent: mockEmbedContentFn,
        },
      };
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return mock as unknown as GoogleGenAI;
      return mock as any;
    });
    mockChatCreateFn.mockResolvedValue({} as Chat);
@ -130,14 +130,12 @@ describe('Gemini Client (client.ts)', () => {
        getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
        getFileService: vi.fn().mockReturnValue(fileService),
      };
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return mock as unknown as Config;
      return mock as any;
    });
    // We can instantiate the client here since Config is mocked
    // and the constructor will use the mocked GoogleGenAI
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const mockConfig = new Config({} as never);
    const mockConfig = new Config({} as any);
    client = new GeminiClient(mockConfig);
    await client.initialize(contentGeneratorConfig);
  });
@ -364,6 +362,114 @@ describe('Gemini Client (client.ts)', () => {
    });
  });
  describe('tryCompressChat', () => {
    const mockCountTokens = vi.fn();
    const mockSendMessage = vi.fn();
    beforeEach(() => {
      vi.mock('./tokenLimits', () => ({
        tokenLimit: vi.fn(),
      }));
      const mockGenerator: Partial<ContentGenerator> = {
        countTokens: mockCountTokens,
      };
      client['contentGenerator'] = mockGenerator as ContentGenerator;
      // Mock the chat's sendMessage method
      const mockChat: Partial<GeminiChat> = {
        getHistory: vi
          .fn()
          .mockReturnValue([
            { role: 'user', parts: [{ text: '...history...' }] },
          ]),
        addHistory: vi.fn(),
        sendMessage: mockSendMessage,
      };
      client['chat'] = mockChat as GeminiChat;
    });
    it('should not trigger summarization if token count is below threshold', async () => {
      const MOCKED_TOKEN_LIMIT = 1000;
      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
      mockCountTokens.mockResolvedValue({
        totalTokens: MOCKED_TOKEN_LIMIT * 0.699, // TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7
      });
      const initialChat = client.getChat();
      const result = await client.tryCompressChat();
      const newChat = client.getChat();
      expect(tokenLimit).toHaveBeenCalled();
      expect(result).toBeNull();
      expect(newChat).toBe(initialChat);
    });
    it('should trigger summarization if token count is at threshold', async () => {
      const MOCKED_TOKEN_LIMIT = 1000;
      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
      const originalTokenCount = 1000 * 0.7;
      const newTokenCount = 100;
      mockCountTokens
        .mockResolvedValueOnce({ totalTokens: originalTokenCount }) // First call for the check
        .mockResolvedValueOnce({ totalTokens: newTokenCount }); // Second call for the new history
      // Mock the summary response from the chat
      mockSendMessage.mockResolvedValue({
        role: 'model',
        parts: [{ text: 'This is a summary.' }],
      });
      const initialChat = client.getChat();
      const result = await client.tryCompressChat();
      const newChat = client.getChat();
      expect(tokenLimit).toHaveBeenCalled();
      expect(mockSendMessage).toHaveBeenCalled();
      // Assert that summarization happened and returned the correct stats
      expect(result).toEqual({
        originalTokenCount,
        newTokenCount,
      });
      // Assert that the chat was reset
      expect(newChat).not.toBe(initialChat);
    });
    it('should always trigger summarization when force is true, regardless of token count', async () => {
      const originalTokenCount = 10; // Well below threshold
      const newTokenCount = 5;
      mockCountTokens
        .mockResolvedValueOnce({ totalTokens: originalTokenCount })
        .mockResolvedValueOnce({ totalTokens: newTokenCount });
      // Mock the summary response from the chat
      mockSendMessage.mockResolvedValue({
        role: 'model',
        parts: [{ text: 'This is a summary.' }],
      });
      const initialChat = client.getChat();
      const result = await client.tryCompressChat(true); // force = true
      const newChat = client.getChat();
      expect(mockSendMessage).toHaveBeenCalled();
      expect(result).toEqual({
        originalTokenCount,
        newTokenCount,
      });
      // Assert that the chat was reset
      expect(newChat).not.toBe(initialChat);
    });
  });
  describe('sendMessageStream', () => {
    it('should return the turn instance after the stream is complete', async () => {
      // Arrange
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -55,6 +55,7 @@ export class GeminiClient {
    topP: 1,
  };
  private readonly MAX_TURNS = 100;
  private readonly TOKEN_THRESHOLD_FOR_SUMMARIZATION = 0.7;
  constructor(private config: Config) {
    if (config.getProxy()) {
@ -449,7 +450,11 @@ export class GeminiClient {
    }
    // Don't compress if not forced and we are under the limit.
-    if (!force && originalTokenCount < 0.95 * tokenLimit(this.model)) {
+    if (
      !force &&
      originalTokenCount <
        this.TOKEN_THRESHOLD_FOR_SUMMARIZATION * tokenLimit(this.model)
    ) {
      return null;
    }