Don't prematurely end convo w/ Gemini.
- There seems to be a root model bug where the model will preemptively bail on conversations without trying harder. Typically the stops are VERY obvious and bug-looking where you need to prmopt the model to "continue". - This PR attempts to fix the above by running a 2.0-flash request (don't need somethign more powerful) at the end of every full interaction to see who should speak (user or model). - Add tests for nextSpeakerChecker Fixes https://b.corp.google.com/issues/416826051
This commit is contained in:
parent
c0eab31c02
commit
d159a1507e
|
@ -21,6 +21,7 @@ import { Config } from '../config/config.js';
|
||||||
import { getCoreSystemPrompt } from './prompts.js';
|
import { getCoreSystemPrompt } from './prompts.js';
|
||||||
import { ReadManyFilesTool } from '../tools/read-many-files.js';
|
import { ReadManyFilesTool } from '../tools/read-many-files.js';
|
||||||
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
|
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
|
||||||
|
import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
|
||||||
|
|
||||||
export class GeminiClient {
|
export class GeminiClient {
|
||||||
private client: GoogleGenAI;
|
private client: GoogleGenAI;
|
||||||
|
@ -103,6 +104,16 @@ export class GeminiClient {
|
||||||
.getToolRegistry()
|
.getToolRegistry()
|
||||||
.getFunctionDeclarations();
|
.getFunctionDeclarations();
|
||||||
const tools: Tool[] = [{ functionDeclarations: toolDeclarations }];
|
const tools: Tool[] = [{ functionDeclarations: toolDeclarations }];
|
||||||
|
const history: Content[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
parts: envParts,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'model',
|
||||||
|
parts: [{ text: 'Got it. Thanks for the context!' }],
|
||||||
|
},
|
||||||
|
];
|
||||||
try {
|
try {
|
||||||
return this.client.chats.create({
|
return this.client.chats.create({
|
||||||
model: this.model,
|
model: this.model,
|
||||||
|
@ -111,16 +122,7 @@ export class GeminiClient {
|
||||||
...this.generateContentConfig,
|
...this.generateContentConfig,
|
||||||
tools,
|
tools,
|
||||||
},
|
},
|
||||||
history: [
|
history,
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
parts: envParts,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: 'model',
|
|
||||||
parts: [{ text: 'Got it. Thanks for the context!' }],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error initializing Gemini chat session:', error);
|
console.error('Error initializing Gemini chat session:', error);
|
||||||
|
@ -149,10 +151,15 @@ export class GeminiClient {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// What do we do when we have both function responses and confirmations?
|
|
||||||
const fnResponses = turn.getFunctionResponses();
|
const fnResponses = turn.getFunctionResponses();
|
||||||
if (fnResponses.length === 0) {
|
if (fnResponses.length === 0) {
|
||||||
break; // user's turn to respond
|
const nextSpeakerCheck = await checkNextSpeaker(chat, this);
|
||||||
|
if (nextSpeakerCheck?.next_speaker === 'model') {
|
||||||
|
request = [{ text: 'Please continue.' }];
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
request = fnResponses;
|
request = fnResponses;
|
||||||
}
|
}
|
||||||
|
@ -167,7 +174,7 @@ export class GeminiClient {
|
||||||
): Promise<Record<string, unknown>> {
|
): Promise<Record<string, unknown>> {
|
||||||
try {
|
try {
|
||||||
const result = await this.client.models.generateContent({
|
const result = await this.client.models.generateContent({
|
||||||
model: this.model,
|
model: 'gemini-2.0-flash',
|
||||||
config: {
|
config: {
|
||||||
...this.generateContentConfig,
|
...this.generateContentConfig,
|
||||||
systemInstruction: getCoreSystemPrompt(),
|
systemInstruction: getCoreSystemPrompt(),
|
||||||
|
|
|
@ -0,0 +1,186 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, vi, beforeEach, Mock, afterEach } from 'vitest';
|
||||||
|
import { Chat, Content } from '@google/genai';
|
||||||
|
import { GeminiClient } from '../core/client.js';
|
||||||
|
import { Config } from '../config/config.js'; // Added Config import
|
||||||
|
import { checkNextSpeaker, NextSpeakerResponse } from './nextSpeakerChecker.js';
|
||||||
|
|
||||||
|
// Mock GeminiClient and Config constructor
|
||||||
|
vi.mock('../core/client.js');
|
||||||
|
vi.mock('../config/config.js');
|
||||||
|
|
||||||
|
// Mock @google/genai
|
||||||
|
const mockGetHistory = vi.fn();
|
||||||
|
const mockCreateChat = vi.fn(() => ({
|
||||||
|
getHistory: mockGetHistory,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('@google/genai', async () => {
|
||||||
|
const actualGenAI =
|
||||||
|
await vi.importActual<typeof import('@google/genai')>('@google/genai');
|
||||||
|
return {
|
||||||
|
...actualGenAI,
|
||||||
|
GoogleGenAI: vi.fn().mockImplementation(() => ({
|
||||||
|
chats: {
|
||||||
|
create: mockCreateChat,
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
// Keep Chat constructor mock for type safety if direct instantiation is attempted,
|
||||||
|
// but primary path is via client.chats.create
|
||||||
|
Chat: vi.fn().mockImplementation(() => ({
|
||||||
|
getHistory: mockGetHistory,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('checkNextSpeaker', () => {
|
||||||
|
let mockChat: Chat;
|
||||||
|
let mockGeminiClient: GeminiClient;
|
||||||
|
let MockConfig: Mock;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
// Dynamically import and assign the mock
|
||||||
|
// Must be done within beforeEach or test to ensure mocks are reset
|
||||||
|
MockConfig = vi.mocked(Config);
|
||||||
|
// Create a mock instance of Config
|
||||||
|
const mockConfigInstance = new MockConfig(
|
||||||
|
'test-api-key',
|
||||||
|
'gemini-pro',
|
||||||
|
false,
|
||||||
|
'.',
|
||||||
|
false,
|
||||||
|
undefined,
|
||||||
|
false,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
// Mock any methods on mockConfigInstance if needed, e.g., mockConfigInstance.getToolRegistry = vi.fn()...
|
||||||
|
|
||||||
|
mockGeminiClient = new GeminiClient(mockConfigInstance);
|
||||||
|
// Simulate chat creation as done in GeminiClient
|
||||||
|
mockChat = { getHistory: mockGetHistory } as unknown as Chat;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if history is empty', async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([]);
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
expect(mockGeminiClient.generateJson).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if the last speaker was the user', async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'user', parts: [{ text: 'Hello' }] },
|
||||||
|
] as Content[]);
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
expect(mockGeminiClient.generateJson).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return { next_speaker: 'model' } when model intends to continue", async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'I will now do something.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
const mockApiResponse: NextSpeakerResponse = {
|
||||||
|
reasoning: 'Model stated it will do something.',
|
||||||
|
next_speaker: 'model',
|
||||||
|
};
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toEqual(mockApiResponse);
|
||||||
|
expect(mockGeminiClient.generateJson).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return { next_speaker: 'user' } when model asks a question", async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'What would you like to do?' }] },
|
||||||
|
] as Content[]);
|
||||||
|
const mockApiResponse: NextSpeakerResponse = {
|
||||||
|
reasoning: 'Model asked a question.',
|
||||||
|
next_speaker: 'user',
|
||||||
|
};
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toEqual(mockApiResponse);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return { next_speaker: 'user' } when model makes a statement", async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'This is a statement.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
const mockApiResponse: NextSpeakerResponse = {
|
||||||
|
reasoning: 'Model made a statement, awaiting user input.',
|
||||||
|
next_speaker: 'user',
|
||||||
|
};
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toEqual(mockApiResponse);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if geminiClient.generateJson throws an error', async () => {
|
||||||
|
const consoleWarnSpy = vi
|
||||||
|
.spyOn(console, 'warn')
|
||||||
|
.mockImplementation(() => {});
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockRejectedValue(
|
||||||
|
new Error('API Error'),
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
consoleWarnSpy.mockRestore();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if geminiClient.generateJson returns invalid JSON (missing next_speaker)', async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue({
|
||||||
|
reasoning: 'This is incomplete.',
|
||||||
|
} as unknown as NextSpeakerResponse); // Type assertion to simulate invalid response
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if geminiClient.generateJson returns a non-string next_speaker', async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue({
|
||||||
|
reasoning: 'Model made a statement, awaiting user input.',
|
||||||
|
next_speaker: 123, // Invalid type
|
||||||
|
} as unknown as NextSpeakerResponse);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null if geminiClient.generateJson returns an invalid next_speaker string value', async () => {
|
||||||
|
(mockChat.getHistory as Mock).mockResolvedValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue({
|
||||||
|
reasoning: 'Model made a statement, awaiting user input.',
|
||||||
|
next_speaker: 'neither', // Invalid enum value
|
||||||
|
} as unknown as NextSpeakerResponse);
|
||||||
|
|
||||||
|
const result = await checkNextSpeaker(mockChat, mockGeminiClient);
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
|
@ -0,0 +1,97 @@
|
||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2025 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Chat, Content, SchemaUnion, Type } from '@google/genai';
|
||||||
|
import { GeminiClient } from '../core/client.js';
|
||||||
|
|
||||||
|
const CHECK_PROMPT = `Analyze *only* the content and structure of your immediately preceding response (your last turn in the conversation history). Based *strictly* on that response, determine who should logically speak next: the 'user' or the 'model' (you).
|
||||||
|
**Decision Rules (apply in order):**
|
||||||
|
1. **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next.
|
||||||
|
2. **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
|
||||||
|
3. **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.
|
||||||
|
**Output Format:**
|
||||||
|
Respond *only* in JSON format according to the following schema. Do not include any text outside the JSON structure.
|
||||||
|
\`\`\`json
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn."
|
||||||
|
},
|
||||||
|
"next_speaker": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["user", "model"],
|
||||||
|
"description": "Who should speak next based *only* on the preceding turn and the decision rules."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["next_speaker", "reasoning"]
|
||||||
|
}
|
||||||
|
\`\`\`
|
||||||
|
`;
|
||||||
|
|
||||||
|
const RESPONSE_SCHEMA: SchemaUnion = {
|
||||||
|
type: Type.OBJECT,
|
||||||
|
properties: {
|
||||||
|
reasoning: {
|
||||||
|
type: Type.STRING,
|
||||||
|
description:
|
||||||
|
"Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn.",
|
||||||
|
},
|
||||||
|
next_speaker: {
|
||||||
|
type: Type.STRING,
|
||||||
|
enum: ['user', 'model'],
|
||||||
|
description:
|
||||||
|
'Who should speak next based *only* on the preceding turn and the decision rules',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['reasoning', 'next_speaker'],
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface NextSpeakerResponse {
|
||||||
|
reasoning: string;
|
||||||
|
next_speaker: 'user' | 'model';
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function checkNextSpeaker(
|
||||||
|
chat: Chat,
|
||||||
|
geminiClient: GeminiClient,
|
||||||
|
): Promise<NextSpeakerResponse | null> {
|
||||||
|
const history = await chat.getHistory();
|
||||||
|
// Ensure there's a model response to analyze
|
||||||
|
if (history.length === 0 || history[history.length - 1].role !== 'model') {
|
||||||
|
// Cannot determine next speaker if the last turn wasn't from the model
|
||||||
|
// or if history is empty.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const contents: Content[] = [
|
||||||
|
...history,
|
||||||
|
{ role: 'user', parts: [{ text: CHECK_PROMPT }] },
|
||||||
|
];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsedResponse = (await geminiClient.generateJson(
|
||||||
|
contents,
|
||||||
|
RESPONSE_SCHEMA,
|
||||||
|
)) as unknown as NextSpeakerResponse;
|
||||||
|
|
||||||
|
if (
|
||||||
|
parsedResponse &&
|
||||||
|
parsedResponse.next_speaker &&
|
||||||
|
['user', 'model'].includes(parsedResponse.next_speaker)
|
||||||
|
) {
|
||||||
|
return parsedResponse;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(
|
||||||
|
'Failed to talk to Gemiin endpoint when seeing if conversation should continue.',
|
||||||
|
error,
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue