From 6723c72fa5468be713c05205c75be532729e8f92 Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Thu, 12 Jun 2025 16:48:10 -0400 Subject: [PATCH] telemetry: include user decisions in tool call logs (#966) Add the user's decision (accept, reject, modify) to tool call telemetry to better understand user intent. The decision provides crucial context to the `success` metric, as a user can reject a call that would have succeeded or accept one that fails. Also prettify the arguments json. Example: ![image](https://github.com/user-attachments/assets/251cb9fc-ceaa-4cdd-929c-8de47031aca8) #750 --- docs/core/telemetry.md | 3 + .../cli/src/ui/hooks/useReactToolScheduler.ts | 28 +-- .../core/src/core/coreToolScheduler.test.ts | 6 + packages/core/src/core/coreToolScheduler.ts | 52 ++++ packages/core/src/telemetry/loggers.test.ts | 238 ++++++++++++++++++ packages/core/src/telemetry/loggers.ts | 36 ++- packages/core/src/telemetry/metrics.ts | 2 + packages/core/src/telemetry/types.ts | 3 + 8 files changed, 339 insertions(+), 29 deletions(-) diff --git a/docs/core/telemetry.md b/docs/core/telemetry.md index e6498d3b..8d6468c6 100644 --- a/docs/core/telemetry.md +++ b/docs/core/telemetry.md @@ -288,6 +288,7 @@ These are timestamped records of specific events. - `function_args` - `duration_ms` - `success` (boolean) + - `decision` (string: "accept", "reject", or "modify", optional) - `error` (optional) - `error_type` (optional) @@ -332,11 +333,13 @@ These are numerical measurements of behavior over time. - **Attributes**: - `function_name` - `success` (boolean) + - `decision` (string: "accept", "reject", or "modify", optional) - `gemini_cli.tool.call.latency` (Histogram, ms): Measures tool call latency. - **Attributes**: - `function_name` + - `decision` (string: "accept", "reject", or "modify", optional) - `gemini_cli.api.request.count` (Counter, Int): Counts all API requests. diff --git a/packages/cli/src/ui/hooks/useReactToolScheduler.ts b/packages/cli/src/ui/hooks/useReactToolScheduler.ts index 0faccb2a..7f08c81f 100644 --- a/packages/cli/src/ui/hooks/useReactToolScheduler.ts +++ b/packages/cli/src/ui/hooks/useReactToolScheduler.ts @@ -20,7 +20,6 @@ import { Tool, ToolCall, Status as CoreStatus, - logToolCall, EditorType, } from '@gemini-cli/core'; import { useCallback, useState, useMemo } from 'react'; @@ -108,33 +107,9 @@ export function useReactToolScheduler( const allToolCallsCompleteHandler: AllToolCallsCompleteHandler = useCallback( (completedToolCalls) => { - completedToolCalls.forEach((call) => { - let success = false; - let errorMessage: string | undefined; - let duration = 0; - - if (call.status === 'success') { - success = true; - } - if ( - call.status === 'error' && - typeof call.response.resultDisplay === 'string' - ) { - errorMessage = call.response.resultDisplay; - } - duration = call.durationMs || 0; - - logToolCall(config, { - function_name: call.request.name, - function_args: call.request.args, - duration_ms: duration, - success, - error: errorMessage, - }); - }); onComplete(completedToolCalls); }, - [onComplete, config], + [onComplete], ); const toolCallsUpdateHandler: ToolCallsUpdateHandler = useCallback( @@ -165,6 +140,7 @@ export function useReactToolScheduler( onToolCallsUpdate: toolCallsUpdateHandler, approvalMode: config.getApprovalMode(), getPreferredEditor, + config, }), [ config, diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index d9d30f94..8e09a2af 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -16,6 +16,7 @@ import { ToolCallConfirmationDetails, ToolConfirmationOutcome, ToolResult, + Config, } from '../index.js'; import { Part, PartListUnion } from '@google/genai'; import { convertToFunctionResponse } from './coreToolScheduler.js'; @@ -74,7 +75,12 @@ describe('CoreToolScheduler', () => { const onAllToolCallsComplete = vi.fn(); const onToolCallsUpdate = vi.fn(); + const mockConfig = { + getSessionId: () => 'test-session-id', + } as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, toolRegistry: Promise.resolve(toolRegistry as any), onAllToolCallsComplete, onToolCallsUpdate, diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index f8688aeb..d17e1511 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -16,6 +16,8 @@ import { EditTool, EditToolParams, EditorType, + Config, + logToolCall, } from '../index.js'; import { Part, PartListUnion } from '@google/genai'; import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js'; @@ -25,6 +27,7 @@ export type ValidatingToolCall = { request: ToolCallRequestInfo; tool: Tool; startTime?: number; + outcome?: ToolConfirmationOutcome; }; export type ScheduledToolCall = { @@ -32,6 +35,7 @@ export type ScheduledToolCall = { request: ToolCallRequestInfo; tool: Tool; startTime?: number; + outcome?: ToolConfirmationOutcome; }; export type ErroredToolCall = { @@ -39,6 +43,7 @@ export type ErroredToolCall = { request: ToolCallRequestInfo; response: ToolCallResponseInfo; durationMs?: number; + outcome?: ToolConfirmationOutcome; }; export type SuccessfulToolCall = { @@ -47,6 +52,7 @@ export type SuccessfulToolCall = { tool: Tool; response: ToolCallResponseInfo; durationMs?: number; + outcome?: ToolConfirmationOutcome; }; export type ExecutingToolCall = { @@ -55,6 +61,7 @@ export type ExecutingToolCall = { tool: Tool; liveOutput?: string; startTime?: number; + outcome?: ToolConfirmationOutcome; }; export type CancelledToolCall = { @@ -63,6 +70,7 @@ export type CancelledToolCall = { response: ToolCallResponseInfo; tool: Tool; durationMs?: number; + outcome?: ToolConfirmationOutcome; }; export type WaitingToolCall = { @@ -71,6 +79,7 @@ export type WaitingToolCall = { tool: Tool; confirmationDetails: ToolCallConfirmationDetails; startTime?: number; + outcome?: ToolConfirmationOutcome; }; export type Status = ToolCall['status']; @@ -205,6 +214,7 @@ interface CoreToolSchedulerOptions { onToolCallsUpdate?: ToolCallsUpdateHandler; approvalMode?: ApprovalMode; getPreferredEditor: () => EditorType | undefined; + config: Config; } export class CoreToolScheduler { @@ -215,8 +225,10 @@ export class CoreToolScheduler { private onToolCallsUpdate?: ToolCallsUpdateHandler; private approvalMode: ApprovalMode; private getPreferredEditor: () => EditorType | undefined; + private config: Config; constructor(options: CoreToolSchedulerOptions) { + this.config = options.config; this.toolRegistry = options.toolRegistry; this.outputUpdateHandler = options.outputUpdateHandler; this.onAllToolCallsComplete = options.onAllToolCallsComplete; @@ -274,6 +286,14 @@ export class CoreToolScheduler { | WaitingToolCall ).tool; + const outcome = ( + currentCall as + | ValidatingToolCall + | ScheduledToolCall + | ExecutingToolCall + | WaitingToolCall + ).outcome; + switch (newStatus) { case 'success': { const durationMs = existingStartTime @@ -285,6 +305,7 @@ export class CoreToolScheduler { status: 'success', response: auxiliaryData as ToolCallResponseInfo, durationMs, + outcome, } as SuccessfulToolCall; } case 'error': { @@ -296,6 +317,7 @@ export class CoreToolScheduler { status: 'error', response: auxiliaryData as ToolCallResponseInfo, durationMs, + outcome, } as ErroredToolCall; } case 'awaiting_approval': @@ -305,6 +327,7 @@ export class CoreToolScheduler { status: 'awaiting_approval', confirmationDetails: auxiliaryData as ToolCallConfirmationDetails, startTime: existingStartTime, + outcome, } as WaitingToolCall; case 'scheduled': return { @@ -312,6 +335,7 @@ export class CoreToolScheduler { tool: toolInstance, status: 'scheduled', startTime: existingStartTime, + outcome, } as ScheduledToolCall; case 'cancelled': { const durationMs = existingStartTime @@ -336,6 +360,7 @@ export class CoreToolScheduler { error: undefined, }, durationMs, + outcome, } as CancelledToolCall; } case 'validating': @@ -344,6 +369,7 @@ export class CoreToolScheduler { tool: toolInstance, status: 'validating', startTime: existingStartTime, + outcome, } as ValidatingToolCall; case 'executing': return { @@ -351,6 +377,7 @@ export class CoreToolScheduler { tool: toolInstance, status: 'executing', startTime: existingStartTime, + outcome, } as ExecutingToolCall; default: { const exhaustiveCheck: never = newStatus; @@ -482,6 +509,14 @@ export class CoreToolScheduler { await originalOnConfirm(outcome); } + this.toolCalls = this.toolCalls.map((call) => { + if (call.request.callId !== callId) return call; + return { + ...call, + outcome, + }; + }); + if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) { this.setStatusInternal( callId, @@ -613,6 +648,23 @@ export class CoreToolScheduler { const completedCalls = [...this.toolCalls] as CompletedToolCall[]; this.toolCalls = []; + for (const call of completedCalls) { + logToolCall( + this.config, + { + function_name: call.request.name, + function_args: call.request.args, + duration_ms: call.durationMs ?? 0, + success: call.status === 'success', + error: + call.status === 'error' + ? call.response.error?.message + : undefined, + }, + call.outcome, + ); + } + if (this.onAllToolCallsComplete) { this.onAllToolCallsComplete(completedCalls); } diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index 2153ef48..a09f3eaf 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { ToolConfirmationOutcome } from '../index.js'; import { logs } from '@opentelemetry/api-logs'; import { SemanticAttributes } from '@opentelemetry/semantic-conventions'; import { Config } from '../config/config.js'; @@ -12,6 +13,8 @@ import { logApiResponse, logCliConfiguration, logUserPrompt, + logToolCall, + ToolCallDecision, } from './loggers.js'; import * as metrics from './metrics.js'; import * as sdk from './sdk.js'; @@ -236,4 +239,239 @@ describe('loggers', () => { }); }); }); + + describe('logToolCall', () => { + const mockConfig = { + getSessionId: () => 'test-session-id', + } as Config; + + const mockMetrics = { + recordToolCallMetrics: vi.fn(), + }; + + beforeEach(() => { + vi.spyOn(metrics, 'recordToolCallMetrics').mockImplementation( + mockMetrics.recordToolCallMetrics, + ); + mockLogger.emit.mockReset(); + }); + + it('should log a tool call with all fields', () => { + const event = { + function_name: 'test-function', + function_args: { + arg1: 'value1', + arg2: 2, + }, + duration_ms: 100, + success: true, + }; + + logToolCall(mockConfig, event, ToolConfirmationOutcome.ProceedOnce); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'Tool call: test-function. Decision: accept. Success: true. Duration: 100ms.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': 'gemini_cli.tool_call', + 'event.timestamp': '2025-01-01T00:00:00.000Z', + function_name: 'test-function', + function_args: JSON.stringify( + { + arg1: 'value1', + arg2: 2, + }, + null, + 2, + ), + duration_ms: 100, + success: true, + decision: ToolCallDecision.ACCEPT, + }, + }); + + expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-function', + 100, + true, + ToolCallDecision.ACCEPT, + ); + }); + it('should log a tool call with a reject decision', () => { + const event = { + function_name: 'test-function', + function_args: { + arg1: 'value1', + arg2: 2, + }, + duration_ms: 100, + success: false, + }; + + logToolCall(mockConfig, event, ToolConfirmationOutcome.Cancel); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'Tool call: test-function. Decision: reject. Success: false. Duration: 100ms.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': 'gemini_cli.tool_call', + 'event.timestamp': '2025-01-01T00:00:00.000Z', + function_name: 'test-function', + function_args: JSON.stringify( + { + arg1: 'value1', + arg2: 2, + }, + null, + 2, + ), + duration_ms: 100, + success: false, + decision: ToolCallDecision.REJECT, + }, + }); + + expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-function', + 100, + false, + ToolCallDecision.REJECT, + ); + }); + + it('should log a tool call with a modify decision', () => { + const event = { + function_name: 'test-function', + function_args: { + arg1: 'value1', + arg2: 2, + }, + duration_ms: 100, + success: true, + }; + + logToolCall(mockConfig, event, ToolConfirmationOutcome.ModifyWithEditor); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'Tool call: test-function. Decision: modify. Success: true. Duration: 100ms.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': 'gemini_cli.tool_call', + 'event.timestamp': '2025-01-01T00:00:00.000Z', + function_name: 'test-function', + function_args: JSON.stringify( + { + arg1: 'value1', + arg2: 2, + }, + null, + 2, + ), + duration_ms: 100, + success: true, + decision: ToolCallDecision.MODIFY, + }, + }); + + expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-function', + 100, + true, + ToolCallDecision.MODIFY, + ); + }); + + it('should log a tool call without a decision', () => { + const event = { + function_name: 'test-function', + function_args: { + arg1: 'value1', + arg2: 2, + }, + duration_ms: 100, + success: true, + }; + + logToolCall(mockConfig, event); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'Tool call: test-function. Success: true. Duration: 100ms.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': 'gemini_cli.tool_call', + 'event.timestamp': '2025-01-01T00:00:00.000Z', + function_name: 'test-function', + function_args: JSON.stringify( + { + arg1: 'value1', + arg2: 2, + }, + null, + 2, + ), + duration_ms: 100, + success: true, + }, + }); + + expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-function', + 100, + true, + undefined, + ); + }); + + it('should log a failed tool call with an error', () => { + const event = { + function_name: 'test-function', + function_args: { + arg1: 'value1', + arg2: 2, + }, + duration_ms: 100, + success: false, + error: 'test-error', + error_type: 'test-error-type', + }; + + logToolCall(mockConfig, event); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'Tool call: test-function. Success: false. Duration: 100ms.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': 'gemini_cli.tool_call', + 'event.timestamp': '2025-01-01T00:00:00.000Z', + function_name: 'test-function', + function_args: JSON.stringify( + { + arg1: 'value1', + arg2: 2, + }, + null, + 2, + ), + duration_ms: 100, + success: false, + error: 'test-error', + 'error.message': 'test-error', + error_type: 'test-error-type', + 'error.type': 'test-error-type', + }, + }); + + expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-function', + 100, + false, + undefined, + ); + }); + }); }); diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index 49a7019a..66f584e7 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -30,6 +30,7 @@ import { recordToolCallMetrics, } from './metrics.js'; import { isTelemetrySdkInitialized } from './sdk.js'; +import { ToolConfirmationOutcome } from '../index.js'; const shouldLogUserPrompts = (config: Config): boolean => config.getTelemetryLogUserPromptsEnabled() ?? false; @@ -40,6 +41,29 @@ function getCommonAttributes(config: Config): LogAttributes { }; } +export enum ToolCallDecision { + ACCEPT = 'accept', + REJECT = 'reject', + MODIFY = 'modify', +} + +export function getDecisionFromOutcome( + outcome: ToolConfirmationOutcome, +): ToolCallDecision { + switch (outcome) { + case ToolConfirmationOutcome.ProceedOnce: + case ToolConfirmationOutcome.ProceedAlways: + case ToolConfirmationOutcome.ProceedAlwaysServer: + case ToolConfirmationOutcome.ProceedAlwaysTool: + return ToolCallDecision.ACCEPT; + case ToolConfirmationOutcome.ModifyWithEditor: + return ToolCallDecision.MODIFY; + case ToolConfirmationOutcome.Cancel: + default: + return ToolCallDecision.REJECT; + } +} + export function logCliConfiguration(config: Config): void { if (!isTelemetrySdkInitialized()) return; @@ -103,15 +127,20 @@ export function logUserPrompt( export function logToolCall( config: Config, - event: Omit, + event: Omit, + outcome?: ToolConfirmationOutcome, ): void { if (!isTelemetrySdkInitialized()) return; + + const decision = outcome ? getDecisionFromOutcome(outcome) : undefined; + const attributes: LogAttributes = { ...getCommonAttributes(config), ...event, 'event.name': EVENT_TOOL_CALL, 'event.timestamp': new Date().toISOString(), - function_args: JSON.stringify(event.function_args), + function_args: JSON.stringify(event.function_args, null, 2), + decision, }; if (event.error) { attributes['error.message'] = event.error; @@ -121,7 +150,7 @@ export function logToolCall( } const logger = logs.getLogger(SERVICE_NAME); const logRecord: LogRecord = { - body: `Tool call: ${event.function_name}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`, + body: `Tool call: ${event.function_name}${decision ? `. Decision: ${decision}` : ''}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`, attributes, }; logger.emit(logRecord); @@ -130,6 +159,7 @@ export function logToolCall( event.function_name, event.duration_ms, event.success, + decision, ); } diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index 93aa2189..59979ef3 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -89,6 +89,7 @@ export function recordToolCallMetrics( functionName: string, durationMs: number, success: boolean, + decision?: 'accept' | 'reject' | 'modify', ): void { if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized) return; @@ -97,6 +98,7 @@ export function recordToolCallMetrics( ...getCommonAttributes(config), function_name: functionName, success, + decision, }; toolCallCounter.add(1, metricAttributes); toolCallLatencyHistogram.record(durationMs, { diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index da352862..926f1384 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { ToolCallDecision } from './loggers.js'; + export interface UserPromptEvent { 'event.name': 'user_prompt'; 'event.timestamp': string; // ISO 8601 @@ -18,6 +20,7 @@ export interface ToolCallEvent { function_args: Record; duration_ms: number; success: boolean; + decision?: ToolCallDecision; error?: string; error_type?: string; }