feat: Show model thoughts while loading (#992)

2025-06-15 11:19:05 -07:00 · 2025-06-15 11:19:05 -07:00 · 123ad20e9b
parent b3d89a1075
commit 123ad20e9b
7 changed files with 153 additions and 24 deletions
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@ -300,6 +300,7 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
    submitQuery,
    initError,
    pendingHistoryItems: pendingGeminiHistoryItems,
+    thought,
  } = useGeminiStream(
    config.getGeminiClient(),
    history,
@ -542,6 +543,12 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
          ) : (
            <>
              <LoadingIndicator
+                thought={
+                  streamingState === StreamingState.WaitingForConfirmation ||
+                  config.getAccessibility()?.disableLoadingPhrases
+                    ? undefined
+                    : thought
+                }
                currentLoadingPhrase={
                  config.getAccessibility()?.disableLoadingPhrases
                    ? undefined
--- a/packages/cli/src/ui/components/LoadingIndicator.test.tsx
+++ b/packages/cli/src/ui/components/LoadingIndicator.test.tsx
@ -159,4 +159,56 @@ describe('<LoadingIndicator />', () => {
    );
    expect(lastFrame()).toBe('');
  });
+
+  it('should display fallback phrase if thought is empty', () => {
+    const props = {
+      thought: null,
+      currentLoadingPhrase: 'Loading...',
+      elapsedTime: 5,
+    };
+    const { lastFrame } = renderWithContext(
+      <LoadingIndicator {...props} />,
+      StreamingState.Responding,
+    );
+    const output = lastFrame();
+    expect(output).toContain('Loading...');
+  });
+
+  it('should display the subject of a thought', () => {
+    const props = {
+      thought: {
+        subject: 'Thinking about something...',
+        description: 'and other stuff.',
+      },
+      elapsedTime: 5,
+    };
+    const { lastFrame } = renderWithContext(
+      <LoadingIndicator {...props} />,
+      StreamingState.Responding,
+    );
+    const output = lastFrame();
+    expect(output).toBeDefined();
+    if (output) {
+      expect(output).toContain('Thinking about something...');
+      expect(output).not.toContain('and other stuff.');
+    }
+  });
+
+  it('should prioritize thought.subject over currentLoadingPhrase', () => {
+    const props = {
+      thought: {
+        subject: 'This should be displayed',
+        description: 'A description',
+      },
+      currentLoadingPhrase: 'This should not be displayed',
+      elapsedTime: 5,
+    };
+    const { lastFrame } = renderWithContext(
+      <LoadingIndicator {...props} />,
+      StreamingState.Responding,
+    );
+    const output = lastFrame();
+    expect(output).toContain('This should be displayed');
+    expect(output).not.toContain('This should not be displayed');
+  });
 });
--- a/packages/cli/src/ui/components/LoadingIndicator.tsx
+++ b/packages/cli/src/ui/components/LoadingIndicator.tsx
@ -4,6 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import { ThoughtSummary } from '@gemini-cli/core';
 import React from 'react';
 import { Box, Text } from 'ink';
 import { Colors } from '../colors.js';
@ -15,12 +16,14 @@ interface LoadingIndicatorProps {
  currentLoadingPhrase?: string;
  elapsedTime: number;
  rightContent?: React.ReactNode;
+  thought?: ThoughtSummary | null;
 }

 export const LoadingIndicator: React.FC<LoadingIndicatorProps> = ({
  currentLoadingPhrase,
  elapsedTime,
  rightContent,
+  thought,
 }) => {
  const streamingState = useStreamingContext();

@ -28,25 +31,30 @@ export const LoadingIndicator: React.FC<LoadingIndicatorProps> = ({
    return null;
  }

+  const primaryText = thought?.subject || currentLoadingPhrase;
+
  return (
-    <Box marginTop={1} paddingLeft={0}>
-      <Box marginRight={1}>
-        <GeminiRespondingSpinner
-          nonRespondingDisplay={
-            streamingState === StreamingState.WaitingForConfirmation ? '⠏' : ''
-          }
-        />
+    <Box marginTop={1} paddingLeft={0} flexDirection="column">
+      {/* Main loading line */}
+      <Box>
+        <Box marginRight={1}>
+          <GeminiRespondingSpinner
+            nonRespondingDisplay={
+              streamingState === StreamingState.WaitingForConfirmation
+                ? '⠏'
+                : ''
+            }
+          />
+        </Box>
+        {primaryText && <Text color={Colors.AccentPurple}>{primaryText}</Text>}
+        <Text color={Colors.Gray}>
+          {streamingState === StreamingState.WaitingForConfirmation
+            ? ''
+            : ` (esc to cancel, ${elapsedTime}s)`}
+        </Text>
+        <Box flexGrow={1}>{/* Spacer */}</Box>
+        {rightContent && <Box>{rightContent}</Box>}
      </Box>
-      {currentLoadingPhrase && (
-        <Text color={Colors.AccentPurple}>{currentLoadingPhrase}</Text>
-      )}
-      <Text color={Colors.Gray}>
-        {streamingState === StreamingState.WaitingForConfirmation
-          ? ''
-          : ` (esc to cancel, ${elapsedTime}s)`}
-      </Text>
-      <Box flexGrow={1}>{/* Spacer */}</Box>
-      {rightContent && <Box>{rightContent}</Box>}
    </Box>
  );
 };
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@ -21,6 +21,7 @@ import {
  logUserPrompt,
  GitService,
  EditorType,
+  ThoughtSummary,
 } from '@gemini-cli/core';
 import { type Part, type PartListUnion } from '@google/genai';
 import {
@ -90,6 +91,7 @@ export const useGeminiStream = (
  const [initError, setInitError] = useState<string | null>(null);
  const abortControllerRef = useRef<AbortController | null>(null);
  const [isResponding, setIsResponding] = useState<boolean>(false);
+  const [thought, setThought] = useState<ThoughtSummary | null>(null);
  const [pendingHistoryItemRef, setPendingHistoryItem] =
    useStateAndRef<HistoryItemWithoutId | null>(null);
  const logger = useLogger();
@ -393,6 +395,9 @@ export const useGeminiStream = (
      const toolCallRequests: ToolCallRequestInfo[] = [];
      for await (const event of stream) {
        switch (event.type) {
+          case ServerGeminiEventType.Thought:
+            setThought(event.value);
+            break;
          case ServerGeminiEventType.Content:
            geminiMessageBuffer = handleContentEvent(
              event.value,
@ -730,5 +735,6 @@ export const useGeminiStream = (
    submitQuery,
    initError,
    pendingHistoryItems,
+    thought,
  };
 };
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -38,6 +38,11 @@ import {
 import { ProxyAgent, setGlobalDispatcher } from 'undici';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';

+function isThinkingSupported(model: string) {
+  if (model.startsWith('gemini-2.5')) return true;
+  return false;
+}
+
 export class GeminiClient {
  private chat: Promise<GeminiChat>;
  private contentGenerator: Promise<ContentGenerator>;
@ -164,14 +169,21 @@ export class GeminiClient {
    try {
      const userMemory = this.config.getUserMemory();
      const systemInstruction = getCoreSystemPrompt(userMemory);
-
+      const generateContentConfigWithThinking = isThinkingSupported(this.model)
+        ? {
+            ...this.generateContentConfig,
+            thinkingConfig: {
+              includeThoughts: true,
+            },
+          }
+        : this.generateContentConfig;
      return new GeminiChat(
        this.config,
        await this.contentGenerator,
        this.model,
        {
          systemInstruction,
-          ...this.generateContentConfig,
+          ...generateContentConfigWithThinking,
          tools,
        },
        history,
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@ -417,6 +417,10 @@ export class GeminiChat {
          chunks.push(chunk);
          const content = chunk.candidates?.[0]?.content;
          if (content !== undefined) {
+            if (this.isThoughtContent(content)) {
+              yield chunk;
+              continue;
+            }
            outputContent.push(content);
          }
        }
@ -452,12 +456,19 @@ export class GeminiChat {
    modelOutput: Content[],
    automaticFunctionCallingHistory?: Content[],
  ) {
+    const nonThoughtModelOutput = modelOutput.filter(
+      (content) => !this.isThoughtContent(content),
+    );
+
    let outputContents: Content[] = [];
    if (
-      modelOutput.length > 0 &&
-      modelOutput.every((content) => content.role !== undefined)
+      nonThoughtModelOutput.length > 0 &&
+      nonThoughtModelOutput.every((content) => content.role !== undefined)
    ) {
-      outputContents = modelOutput;
+      outputContents = nonThoughtModelOutput;
+    } else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) {
+      // This case handles when the model returns only a thought.
+      // We don't want to add an empty model response in this case.
    } else {
      // When not a function response appends an empty content when model returns empty response, so that the
      // history is always alternating between user and model.
@ -486,7 +497,6 @@ export class GeminiChat {
      if (this.isThoughtContent(content)) {
        continue;
      }
-
      const lastContent =
        consolidatedOutputContents[consolidatedOutputContents.length - 1];
      if (this.isTextContent(lastContent) && this.isTextContent(content)) {
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@ -45,6 +45,7 @@ export enum GeminiEventType {
  Error = 'error',
  ChatCompressed = 'chat_compressed',
  UsageMetadata = 'usage_metadata',
+  Thought = 'thought',
 }

 export interface GeminiErrorEventValue {
@ -69,11 +70,21 @@ export interface ServerToolCallConfirmationDetails {
  details: ToolCallConfirmationDetails;
 }

+export type ThoughtSummary = {
+  subject: string;
+  description: string;
+};
+
 export type ServerGeminiContentEvent = {
  type: GeminiEventType.Content;
  value: string;
 };

+export type ServerGeminiThoughtEvent = {
+  type: GeminiEventType.Thought;
+  value: ThoughtSummary;
+};
+
 export type ServerGeminiToolCallRequestEvent = {
  type: GeminiEventType.ToolCallRequest;
  value: ToolCallRequestInfo;
@ -122,7 +133,8 @@ export type ServerGeminiStreamEvent =
  | ServerGeminiUserCancelledEvent
  | ServerGeminiErrorEvent
  | ServerGeminiChatCompressedEvent
-  | ServerGeminiUsageMetadataEvent;
+  | ServerGeminiUsageMetadataEvent
+  | ServerGeminiThoughtEvent;

 // A turn manages the agentic loop turn within the server context.
 export class Turn {
@ -160,6 +172,28 @@ export class Turn {
        }
        this.debugResponses.push(resp);

+        const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
+        if (thoughtPart?.thought) {
+          // Thought always has a bold "subject" part enclosed in double asterisks
+          // (e.g., **Subject**). The rest of the string is considered the description.
+          const rawText = thoughtPart.text ?? '';
+          const subjectStringMatches = rawText.match(/\*\*(.*?)\*\*/s);
+          const subject = subjectStringMatches
+            ? subjectStringMatches[1].trim()
+            : '';
+          const description = rawText.replace(/\*\*(.*?)\*\*/s, '').trim();
+          const thought: ThoughtSummary = {
+            subject,
+            description,
+          };
+
+          yield {
+            type: GeminiEventType.Thought,
+            value: thought,
+          };
+          continue;
+        }
+
        const text = getResponseText(resp);
        if (text) {
          yield { type: GeminiEventType.Content, value: text };