From 123ad20e9bfc5cf47eca4fe0e073ecef67a639f9 Mon Sep 17 00:00:00 2001 From: Asad Memon Date: Sun, 15 Jun 2025 11:19:05 -0700 Subject: [PATCH] feat: Show model thoughts while loading (#992) --- packages/cli/src/ui/App.tsx | 7 +++ .../ui/components/LoadingIndicator.test.tsx | 52 +++++++++++++++++++ .../src/ui/components/LoadingIndicator.tsx | 42 +++++++++------ packages/cli/src/ui/hooks/useGeminiStream.ts | 6 +++ packages/core/src/core/client.ts | 16 +++++- packages/core/src/core/geminiChat.ts | 18 +++++-- packages/core/src/core/turn.ts | 36 ++++++++++++- 7 files changed, 153 insertions(+), 24 deletions(-) diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 52c286dc..9a4ecbd3 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -300,6 +300,7 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { submitQuery, initError, pendingHistoryItems: pendingGeminiHistoryItems, + thought, } = useGeminiStream( config.getGeminiClient(), history, @@ -542,6 +543,12 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { ) : ( <> ', () => { ); expect(lastFrame()).toBe(''); }); + + it('should display fallback phrase if thought is empty', () => { + const props = { + thought: null, + currentLoadingPhrase: 'Loading...', + elapsedTime: 5, + }; + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toContain('Loading...'); + }); + + it('should display the subject of a thought', () => { + const props = { + thought: { + subject: 'Thinking about something...', + description: 'and other stuff.', + }, + elapsedTime: 5, + }; + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toBeDefined(); + if (output) { + expect(output).toContain('Thinking about something...'); + expect(output).not.toContain('and other stuff.'); + } + }); + + it('should prioritize thought.subject over currentLoadingPhrase', () => { + const props = { + thought: { + subject: 'This should be displayed', + description: 'A description', + }, + currentLoadingPhrase: 'This should not be displayed', + elapsedTime: 5, + }; + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toContain('This should be displayed'); + expect(output).not.toContain('This should not be displayed'); + }); }); diff --git a/packages/cli/src/ui/components/LoadingIndicator.tsx b/packages/cli/src/ui/components/LoadingIndicator.tsx index 61b74b89..855894e6 100644 --- a/packages/cli/src/ui/components/LoadingIndicator.tsx +++ b/packages/cli/src/ui/components/LoadingIndicator.tsx @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { ThoughtSummary } from '@gemini-cli/core'; import React from 'react'; import { Box, Text } from 'ink'; import { Colors } from '../colors.js'; @@ -15,12 +16,14 @@ interface LoadingIndicatorProps { currentLoadingPhrase?: string; elapsedTime: number; rightContent?: React.ReactNode; + thought?: ThoughtSummary | null; } export const LoadingIndicator: React.FC = ({ currentLoadingPhrase, elapsedTime, rightContent, + thought, }) => { const streamingState = useStreamingContext(); @@ -28,25 +31,30 @@ export const LoadingIndicator: React.FC = ({ return null; } + const primaryText = thought?.subject || currentLoadingPhrase; + return ( - - - + + {/* Main loading line */} + + + + + {primaryText && {primaryText}} + + {streamingState === StreamingState.WaitingForConfirmation + ? '' + : ` (esc to cancel, ${elapsedTime}s)`} + + {/* Spacer */} + {rightContent && {rightContent}} - {currentLoadingPhrase && ( - {currentLoadingPhrase} - )} - - {streamingState === StreamingState.WaitingForConfirmation - ? '' - : ` (esc to cancel, ${elapsedTime}s)`} - - {/* Spacer */} - {rightContent && {rightContent}} ); }; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index bff38a2b..51d32506 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -21,6 +21,7 @@ import { logUserPrompt, GitService, EditorType, + ThoughtSummary, } from '@gemini-cli/core'; import { type Part, type PartListUnion } from '@google/genai'; import { @@ -90,6 +91,7 @@ export const useGeminiStream = ( const [initError, setInitError] = useState(null); const abortControllerRef = useRef(null); const [isResponding, setIsResponding] = useState(false); + const [thought, setThought] = useState(null); const [pendingHistoryItemRef, setPendingHistoryItem] = useStateAndRef(null); const logger = useLogger(); @@ -393,6 +395,9 @@ export const useGeminiStream = ( const toolCallRequests: ToolCallRequestInfo[] = []; for await (const event of stream) { switch (event.type) { + case ServerGeminiEventType.Thought: + setThought(event.value); + break; case ServerGeminiEventType.Content: geminiMessageBuffer = handleContentEvent( event.value, @@ -730,5 +735,6 @@ export const useGeminiStream = ( submitQuery, initError, pendingHistoryItems, + thought, }; }; diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 682d9461..7cfb6b53 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -38,6 +38,11 @@ import { import { ProxyAgent, setGlobalDispatcher } from 'undici'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; +function isThinkingSupported(model: string) { + if (model.startsWith('gemini-2.5')) return true; + return false; +} + export class GeminiClient { private chat: Promise; private contentGenerator: Promise; @@ -164,14 +169,21 @@ export class GeminiClient { try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); - + const generateContentConfigWithThinking = isThinkingSupported(this.model) + ? { + ...this.generateContentConfig, + thinkingConfig: { + includeThoughts: true, + }, + } + : this.generateContentConfig; return new GeminiChat( this.config, await this.contentGenerator, this.model, { systemInstruction, - ...this.generateContentConfig, + ...generateContentConfigWithThinking, tools, }, history, diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 8a9fceab..cb0dd07b 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -417,6 +417,10 @@ export class GeminiChat { chunks.push(chunk); const content = chunk.candidates?.[0]?.content; if (content !== undefined) { + if (this.isThoughtContent(content)) { + yield chunk; + continue; + } outputContent.push(content); } } @@ -452,12 +456,19 @@ export class GeminiChat { modelOutput: Content[], automaticFunctionCallingHistory?: Content[], ) { + const nonThoughtModelOutput = modelOutput.filter( + (content) => !this.isThoughtContent(content), + ); + let outputContents: Content[] = []; if ( - modelOutput.length > 0 && - modelOutput.every((content) => content.role !== undefined) + nonThoughtModelOutput.length > 0 && + nonThoughtModelOutput.every((content) => content.role !== undefined) ) { - outputContents = modelOutput; + outputContents = nonThoughtModelOutput; + } else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) { + // This case handles when the model returns only a thought. + // We don't want to add an empty model response in this case. } else { // When not a function response appends an empty content when model returns empty response, so that the // history is always alternating between user and model. @@ -486,7 +497,6 @@ export class GeminiChat { if (this.isThoughtContent(content)) { continue; } - const lastContent = consolidatedOutputContents[consolidatedOutputContents.length - 1]; if (this.isTextContent(lastContent) && this.isTextContent(content)) { diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 87533c28..4c0a297e 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -45,6 +45,7 @@ export enum GeminiEventType { Error = 'error', ChatCompressed = 'chat_compressed', UsageMetadata = 'usage_metadata', + Thought = 'thought', } export interface GeminiErrorEventValue { @@ -69,11 +70,21 @@ export interface ServerToolCallConfirmationDetails { details: ToolCallConfirmationDetails; } +export type ThoughtSummary = { + subject: string; + description: string; +}; + export type ServerGeminiContentEvent = { type: GeminiEventType.Content; value: string; }; +export type ServerGeminiThoughtEvent = { + type: GeminiEventType.Thought; + value: ThoughtSummary; +}; + export type ServerGeminiToolCallRequestEvent = { type: GeminiEventType.ToolCallRequest; value: ToolCallRequestInfo; @@ -122,7 +133,8 @@ export type ServerGeminiStreamEvent = | ServerGeminiUserCancelledEvent | ServerGeminiErrorEvent | ServerGeminiChatCompressedEvent - | ServerGeminiUsageMetadataEvent; + | ServerGeminiUsageMetadataEvent + | ServerGeminiThoughtEvent; // A turn manages the agentic loop turn within the server context. export class Turn { @@ -160,6 +172,28 @@ export class Turn { } this.debugResponses.push(resp); + const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0]; + if (thoughtPart?.thought) { + // Thought always has a bold "subject" part enclosed in double asterisks + // (e.g., **Subject**). The rest of the string is considered the description. + const rawText = thoughtPart.text ?? ''; + const subjectStringMatches = rawText.match(/\*\*(.*?)\*\*/s); + const subject = subjectStringMatches + ? subjectStringMatches[1].trim() + : ''; + const description = rawText.replace(/\*\*(.*?)\*\*/s, '').trim(); + const thought: ThoughtSummary = { + subject, + description, + }; + + yield { + type: GeminiEventType.Thought, + value: thought, + }; + continue; + } + const text = getResponseText(resp); if (text) { yield { type: GeminiEventType.Content, value: text };