fix(core): Remove json output schema form the next speaker check prompt (#5325)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-07-31 18:17:52 -07:00 · 2025-07-31 18:17:52 -07:00 · f21ff09389
parent 6c3fb18ef6
commit f21ff09389
5 changed files with 60 additions and 25 deletions
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -44,7 +44,11 @@ import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { LoopDetectionService } from '../services/loopDetectionService.js';
 import { ideContext } from '../ide/ideContext.js';
 import { logNextSpeakerCheck } from '../telemetry/loggers.js';
-import { NextSpeakerCheckEvent } from '../telemetry/types.js';
+import {
+  MalformedJsonResponseEvent,
+  NextSpeakerCheckEvent,
+} from '../telemetry/types.js';
+import { ClearcutLogger } from '../telemetry/clearcut-logger/clearcut-logger.js';

 function isThinkingSupported(model: string) {
  if (model.startsWith('gemini-2.5')) return true;
@ -506,7 +510,7 @@ export class GeminiClient {
        authType: this.config.getContentGeneratorConfig()?.authType,
      });

-      const text = getResponseText(result);
+      let text = getResponseText(result);
      if (!text) {
        const error = new Error(
          'API returned an empty response for generateJson.',
@ -519,6 +523,18 @@ export class GeminiClient {
        );
        throw error;
      }
+
+      const prefix = '```json';
+      const suffix = '```';
+      if (text.startsWith(prefix) && text.endsWith(suffix)) {
+        ClearcutLogger.getInstance(this.config)?.logMalformedJsonResponseEvent(
+          new MalformedJsonResponseEvent(modelToUse),
+        );
+        text = text
+          .substring(prefix.length, text.length - suffix.length)
+          .trim();
+      }
+
      try {
        return JSON.parse(text);
      } catch (parseError) {
@ -532,7 +548,9 @@ export class GeminiClient {
          'generateJson-parse',
        );
        throw new Error(
-          `Failed to parse API response as JSON: ${getErrorMessage(parseError)}`,
+          `Failed to parse API response as JSON: ${getErrorMessage(
+            parseError,
+          )}`,
        );
      }
    } catch (error) {
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
@ -20,6 +20,7 @@ import {
  LoopDetectedEvent,
  NextSpeakerCheckEvent,
  SlashCommandEvent,
+  MalformedJsonResponseEvent,
 } from '../types.js';
 import { EventMetadataKey } from './event-metadata-key.js';
 import { Config } from '../../config/config.js';
@ -42,6 +43,7 @@ const flash_fallback_event_name = 'flash_fallback';
 const loop_detected_event_name = 'loop_detected';
 const next_speaker_check_event_name = 'next_speaker_check';
 const slash_command_event_name = 'slash_command';
+const malformed_json_response_event_name = 'malformed_json_response';

 export interface LogResponse {
  nextRequestWaitMs?: number;
@ -557,6 +559,21 @@ export class ClearcutLogger {
    this.flushIfNeeded();
  }

+  logMalformedJsonResponseEvent(event: MalformedJsonResponseEvent): void {
+    const data = [
+      {
+        gemini_cli_key:
+          EventMetadataKey.GEMINI_CLI_MALFORMED_JSON_RESPONSE_MODEL,
+        value: JSON.stringify(event.model),
+      },
+    ];
+
+    this.enqueueLogEvent(
+      this.createLogEvent(malformed_json_response_event_name, data),
+    );
+    this.flushIfNeeded();
+  }
+
  logEndSessionEvent(event: EndSessionEvent): void {
    const data = [
      {
--- a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts
+++ b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts
@ -183,6 +183,13 @@ export enum EventMetadataKey {

  // Logs the result of the next speaker check
  GEMINI_CLI_NEXT_SPEAKER_CHECK_RESULT = 44,
+
+  // ==========================================================================
+  // Malformed JSON Response Event Keys
+  // ==========================================================================
+
+  // Logs the model that produced the malformed JSON response.
+  GEMINI_CLI_MALFORMED_JSON_RESPONSE_MODEL = 45,
 }

 export function getEventMetadataKey(
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@ -296,6 +296,18 @@ export class SlashCommandEvent {
  }
 }

+export class MalformedJsonResponseEvent {
+  'event.name': 'malformed_json_response';
+  'event.timestamp': string; // ISO 8601
+  model: string;
+
+  constructor(model: string) {
+    this['event.name'] = 'malformed_json_response';
+    this['event.timestamp'] = new Date().toISOString();
+    this.model = model;
+  }
+}
+
 export type TelemetryEvent =
  | StartSessionEvent
  | EndSessionEvent
@ -307,4 +319,5 @@ export type TelemetryEvent =
  | FlashFallbackEvent
  | LoopDetectedEvent
  | NextSpeakerCheckEvent
-  | SlashCommandEvent;
+  | SlashCommandEvent
+  | MalformedJsonResponseEvent;
--- a/packages/core/src/utils/nextSpeakerChecker.ts
+++ b/packages/core/src/utils/nextSpeakerChecker.ts
@ -14,27 +14,7 @@ const CHECK_PROMPT = `Analyze *only* the content and structure of your immediate
 **Decision Rules (apply in order):**
 1.  **Model Continues:** If your last response explicitly states an immediate next action *you* intend to take (e.g., "Next, I will...", "Now I'll process...", "Moving on to analyze...", indicates an intended tool call that didn't execute), OR if the response seems clearly incomplete (cut off mid-thought without a natural conclusion), then the **'model'** should speak next.
 2.  **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
-3.  **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.
-**Output Format:**
-Respond *only* in JSON format according to the following schema. Do not include any text outside the JSON structure.
-\`\`\`json
-{
-  "type": "object",
-  "properties": {
-    "reasoning": {
-        "type": "string",
-        "description": "Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn."
-    },
-    "next_speaker": {
-      "type": "string",
-      "enum": ["user", "model"],
-      "description": "Who should speak next based *only* on the preceding turn and the decision rules."
-    }
-  },
-  "required": ["next_speaker", "reasoning"]
-}
-\`\`\`
-`;
+3.  **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.`;

 const RESPONSE_SCHEMA: SchemaUnion = {
  type: Type.OBJECT,