telemetry: include user decisions in tool call logs (#966)

Add the user's decision (accept, reject, modify) to tool call telemetry to better understand user intent. The decision provides crucial context to the `success` metric, as a user can reject a call that would have succeeded or accept one that fails. 

Also prettify the arguments json.

Example: 
![image](https://github.com/user-attachments/assets/251cb9fc-ceaa-4cdd-929c-8de47031aca8)

#750
This commit is contained in:
Jerop Kipruto 2025-06-12 16:48:10 -04:00 committed by GitHub
parent f8863f4d00
commit 6723c72fa5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 339 additions and 29 deletions

View File

@ -288,6 +288,7 @@ These are timestamped records of specific events.
- `function_args` - `function_args`
- `duration_ms` - `duration_ms`
- `success` (boolean) - `success` (boolean)
- `decision` (string: "accept", "reject", or "modify", optional)
- `error` (optional) - `error` (optional)
- `error_type` (optional) - `error_type` (optional)
@ -332,11 +333,13 @@ These are numerical measurements of behavior over time.
- **Attributes**: - **Attributes**:
- `function_name` - `function_name`
- `success` (boolean) - `success` (boolean)
- `decision` (string: "accept", "reject", or "modify", optional)
- `gemini_cli.tool.call.latency` (Histogram, ms): Measures tool call latency. - `gemini_cli.tool.call.latency` (Histogram, ms): Measures tool call latency.
- **Attributes**: - **Attributes**:
- `function_name` - `function_name`
- `decision` (string: "accept", "reject", or "modify", optional)
- `gemini_cli.api.request.count` (Counter, Int): Counts all API requests. - `gemini_cli.api.request.count` (Counter, Int): Counts all API requests.

View File

@ -20,7 +20,6 @@ import {
Tool, Tool,
ToolCall, ToolCall,
Status as CoreStatus, Status as CoreStatus,
logToolCall,
EditorType, EditorType,
} from '@gemini-cli/core'; } from '@gemini-cli/core';
import { useCallback, useState, useMemo } from 'react'; import { useCallback, useState, useMemo } from 'react';
@ -108,33 +107,9 @@ export function useReactToolScheduler(
const allToolCallsCompleteHandler: AllToolCallsCompleteHandler = useCallback( const allToolCallsCompleteHandler: AllToolCallsCompleteHandler = useCallback(
(completedToolCalls) => { (completedToolCalls) => {
completedToolCalls.forEach((call) => {
let success = false;
let errorMessage: string | undefined;
let duration = 0;
if (call.status === 'success') {
success = true;
}
if (
call.status === 'error' &&
typeof call.response.resultDisplay === 'string'
) {
errorMessage = call.response.resultDisplay;
}
duration = call.durationMs || 0;
logToolCall(config, {
function_name: call.request.name,
function_args: call.request.args,
duration_ms: duration,
success,
error: errorMessage,
});
});
onComplete(completedToolCalls); onComplete(completedToolCalls);
}, },
[onComplete, config], [onComplete],
); );
const toolCallsUpdateHandler: ToolCallsUpdateHandler = useCallback( const toolCallsUpdateHandler: ToolCallsUpdateHandler = useCallback(
@ -165,6 +140,7 @@ export function useReactToolScheduler(
onToolCallsUpdate: toolCallsUpdateHandler, onToolCallsUpdate: toolCallsUpdateHandler,
approvalMode: config.getApprovalMode(), approvalMode: config.getApprovalMode(),
getPreferredEditor, getPreferredEditor,
config,
}), }),
[ [
config, config,

View File

@ -16,6 +16,7 @@ import {
ToolCallConfirmationDetails, ToolCallConfirmationDetails,
ToolConfirmationOutcome, ToolConfirmationOutcome,
ToolResult, ToolResult,
Config,
} from '../index.js'; } from '../index.js';
import { Part, PartListUnion } from '@google/genai'; import { Part, PartListUnion } from '@google/genai';
import { convertToFunctionResponse } from './coreToolScheduler.js'; import { convertToFunctionResponse } from './coreToolScheduler.js';
@ -74,7 +75,12 @@ describe('CoreToolScheduler', () => {
const onAllToolCallsComplete = vi.fn(); const onAllToolCallsComplete = vi.fn();
const onToolCallsUpdate = vi.fn(); const onToolCallsUpdate = vi.fn();
const mockConfig = {
getSessionId: () => 'test-session-id',
} as Config;
const scheduler = new CoreToolScheduler({ const scheduler = new CoreToolScheduler({
config: mockConfig,
toolRegistry: Promise.resolve(toolRegistry as any), toolRegistry: Promise.resolve(toolRegistry as any),
onAllToolCallsComplete, onAllToolCallsComplete,
onToolCallsUpdate, onToolCallsUpdate,

View File

@ -16,6 +16,8 @@ import {
EditTool, EditTool,
EditToolParams, EditToolParams,
EditorType, EditorType,
Config,
logToolCall,
} from '../index.js'; } from '../index.js';
import { Part, PartListUnion } from '@google/genai'; import { Part, PartListUnion } from '@google/genai';
import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js'; import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js';
@ -25,6 +27,7 @@ export type ValidatingToolCall = {
request: ToolCallRequestInfo; request: ToolCallRequestInfo;
tool: Tool; tool: Tool;
startTime?: number; startTime?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type ScheduledToolCall = { export type ScheduledToolCall = {
@ -32,6 +35,7 @@ export type ScheduledToolCall = {
request: ToolCallRequestInfo; request: ToolCallRequestInfo;
tool: Tool; tool: Tool;
startTime?: number; startTime?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type ErroredToolCall = { export type ErroredToolCall = {
@ -39,6 +43,7 @@ export type ErroredToolCall = {
request: ToolCallRequestInfo; request: ToolCallRequestInfo;
response: ToolCallResponseInfo; response: ToolCallResponseInfo;
durationMs?: number; durationMs?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type SuccessfulToolCall = { export type SuccessfulToolCall = {
@ -47,6 +52,7 @@ export type SuccessfulToolCall = {
tool: Tool; tool: Tool;
response: ToolCallResponseInfo; response: ToolCallResponseInfo;
durationMs?: number; durationMs?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type ExecutingToolCall = { export type ExecutingToolCall = {
@ -55,6 +61,7 @@ export type ExecutingToolCall = {
tool: Tool; tool: Tool;
liveOutput?: string; liveOutput?: string;
startTime?: number; startTime?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type CancelledToolCall = { export type CancelledToolCall = {
@ -63,6 +70,7 @@ export type CancelledToolCall = {
response: ToolCallResponseInfo; response: ToolCallResponseInfo;
tool: Tool; tool: Tool;
durationMs?: number; durationMs?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type WaitingToolCall = { export type WaitingToolCall = {
@ -71,6 +79,7 @@ export type WaitingToolCall = {
tool: Tool; tool: Tool;
confirmationDetails: ToolCallConfirmationDetails; confirmationDetails: ToolCallConfirmationDetails;
startTime?: number; startTime?: number;
outcome?: ToolConfirmationOutcome;
}; };
export type Status = ToolCall['status']; export type Status = ToolCall['status'];
@ -205,6 +214,7 @@ interface CoreToolSchedulerOptions {
onToolCallsUpdate?: ToolCallsUpdateHandler; onToolCallsUpdate?: ToolCallsUpdateHandler;
approvalMode?: ApprovalMode; approvalMode?: ApprovalMode;
getPreferredEditor: () => EditorType | undefined; getPreferredEditor: () => EditorType | undefined;
config: Config;
} }
export class CoreToolScheduler { export class CoreToolScheduler {
@ -215,8 +225,10 @@ export class CoreToolScheduler {
private onToolCallsUpdate?: ToolCallsUpdateHandler; private onToolCallsUpdate?: ToolCallsUpdateHandler;
private approvalMode: ApprovalMode; private approvalMode: ApprovalMode;
private getPreferredEditor: () => EditorType | undefined; private getPreferredEditor: () => EditorType | undefined;
private config: Config;
constructor(options: CoreToolSchedulerOptions) { constructor(options: CoreToolSchedulerOptions) {
this.config = options.config;
this.toolRegistry = options.toolRegistry; this.toolRegistry = options.toolRegistry;
this.outputUpdateHandler = options.outputUpdateHandler; this.outputUpdateHandler = options.outputUpdateHandler;
this.onAllToolCallsComplete = options.onAllToolCallsComplete; this.onAllToolCallsComplete = options.onAllToolCallsComplete;
@ -274,6 +286,14 @@ export class CoreToolScheduler {
| WaitingToolCall | WaitingToolCall
).tool; ).tool;
const outcome = (
currentCall as
| ValidatingToolCall
| ScheduledToolCall
| ExecutingToolCall
| WaitingToolCall
).outcome;
switch (newStatus) { switch (newStatus) {
case 'success': { case 'success': {
const durationMs = existingStartTime const durationMs = existingStartTime
@ -285,6 +305,7 @@ export class CoreToolScheduler {
status: 'success', status: 'success',
response: auxiliaryData as ToolCallResponseInfo, response: auxiliaryData as ToolCallResponseInfo,
durationMs, durationMs,
outcome,
} as SuccessfulToolCall; } as SuccessfulToolCall;
} }
case 'error': { case 'error': {
@ -296,6 +317,7 @@ export class CoreToolScheduler {
status: 'error', status: 'error',
response: auxiliaryData as ToolCallResponseInfo, response: auxiliaryData as ToolCallResponseInfo,
durationMs, durationMs,
outcome,
} as ErroredToolCall; } as ErroredToolCall;
} }
case 'awaiting_approval': case 'awaiting_approval':
@ -305,6 +327,7 @@ export class CoreToolScheduler {
status: 'awaiting_approval', status: 'awaiting_approval',
confirmationDetails: auxiliaryData as ToolCallConfirmationDetails, confirmationDetails: auxiliaryData as ToolCallConfirmationDetails,
startTime: existingStartTime, startTime: existingStartTime,
outcome,
} as WaitingToolCall; } as WaitingToolCall;
case 'scheduled': case 'scheduled':
return { return {
@ -312,6 +335,7 @@ export class CoreToolScheduler {
tool: toolInstance, tool: toolInstance,
status: 'scheduled', status: 'scheduled',
startTime: existingStartTime, startTime: existingStartTime,
outcome,
} as ScheduledToolCall; } as ScheduledToolCall;
case 'cancelled': { case 'cancelled': {
const durationMs = existingStartTime const durationMs = existingStartTime
@ -336,6 +360,7 @@ export class CoreToolScheduler {
error: undefined, error: undefined,
}, },
durationMs, durationMs,
outcome,
} as CancelledToolCall; } as CancelledToolCall;
} }
case 'validating': case 'validating':
@ -344,6 +369,7 @@ export class CoreToolScheduler {
tool: toolInstance, tool: toolInstance,
status: 'validating', status: 'validating',
startTime: existingStartTime, startTime: existingStartTime,
outcome,
} as ValidatingToolCall; } as ValidatingToolCall;
case 'executing': case 'executing':
return { return {
@ -351,6 +377,7 @@ export class CoreToolScheduler {
tool: toolInstance, tool: toolInstance,
status: 'executing', status: 'executing',
startTime: existingStartTime, startTime: existingStartTime,
outcome,
} as ExecutingToolCall; } as ExecutingToolCall;
default: { default: {
const exhaustiveCheck: never = newStatus; const exhaustiveCheck: never = newStatus;
@ -482,6 +509,14 @@ export class CoreToolScheduler {
await originalOnConfirm(outcome); await originalOnConfirm(outcome);
} }
this.toolCalls = this.toolCalls.map((call) => {
if (call.request.callId !== callId) return call;
return {
...call,
outcome,
};
});
if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) { if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) {
this.setStatusInternal( this.setStatusInternal(
callId, callId,
@ -613,6 +648,23 @@ export class CoreToolScheduler {
const completedCalls = [...this.toolCalls] as CompletedToolCall[]; const completedCalls = [...this.toolCalls] as CompletedToolCall[];
this.toolCalls = []; this.toolCalls = [];
for (const call of completedCalls) {
logToolCall(
this.config,
{
function_name: call.request.name,
function_args: call.request.args,
duration_ms: call.durationMs ?? 0,
success: call.status === 'success',
error:
call.status === 'error'
? call.response.error?.message
: undefined,
},
call.outcome,
);
}
if (this.onAllToolCallsComplete) { if (this.onAllToolCallsComplete) {
this.onAllToolCallsComplete(completedCalls); this.onAllToolCallsComplete(completedCalls);
} }

View File

@ -4,6 +4,7 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import { ToolConfirmationOutcome } from '../index.js';
import { logs } from '@opentelemetry/api-logs'; import { logs } from '@opentelemetry/api-logs';
import { SemanticAttributes } from '@opentelemetry/semantic-conventions'; import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
import { Config } from '../config/config.js'; import { Config } from '../config/config.js';
@ -12,6 +13,8 @@ import {
logApiResponse, logApiResponse,
logCliConfiguration, logCliConfiguration,
logUserPrompt, logUserPrompt,
logToolCall,
ToolCallDecision,
} from './loggers.js'; } from './loggers.js';
import * as metrics from './metrics.js'; import * as metrics from './metrics.js';
import * as sdk from './sdk.js'; import * as sdk from './sdk.js';
@ -236,4 +239,239 @@ describe('loggers', () => {
}); });
}); });
}); });
describe('logToolCall', () => {
const mockConfig = {
getSessionId: () => 'test-session-id',
} as Config;
const mockMetrics = {
recordToolCallMetrics: vi.fn(),
};
beforeEach(() => {
vi.spyOn(metrics, 'recordToolCallMetrics').mockImplementation(
mockMetrics.recordToolCallMetrics,
);
mockLogger.emit.mockReset();
});
it('should log a tool call with all fields', () => {
const event = {
function_name: 'test-function',
function_args: {
arg1: 'value1',
arg2: 2,
},
duration_ms: 100,
success: true,
};
logToolCall(mockConfig, event, ToolConfirmationOutcome.ProceedOnce);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Tool call: test-function. Decision: accept. Success: true. Duration: 100ms.',
attributes: {
'session.id': 'test-session-id',
'event.name': 'gemini_cli.tool_call',
'event.timestamp': '2025-01-01T00:00:00.000Z',
function_name: 'test-function',
function_args: JSON.stringify(
{
arg1: 'value1',
arg2: 2,
},
null,
2,
),
duration_ms: 100,
success: true,
decision: ToolCallDecision.ACCEPT,
},
});
expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
mockConfig,
'test-function',
100,
true,
ToolCallDecision.ACCEPT,
);
});
it('should log a tool call with a reject decision', () => {
const event = {
function_name: 'test-function',
function_args: {
arg1: 'value1',
arg2: 2,
},
duration_ms: 100,
success: false,
};
logToolCall(mockConfig, event, ToolConfirmationOutcome.Cancel);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Tool call: test-function. Decision: reject. Success: false. Duration: 100ms.',
attributes: {
'session.id': 'test-session-id',
'event.name': 'gemini_cli.tool_call',
'event.timestamp': '2025-01-01T00:00:00.000Z',
function_name: 'test-function',
function_args: JSON.stringify(
{
arg1: 'value1',
arg2: 2,
},
null,
2,
),
duration_ms: 100,
success: false,
decision: ToolCallDecision.REJECT,
},
});
expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
mockConfig,
'test-function',
100,
false,
ToolCallDecision.REJECT,
);
});
it('should log a tool call with a modify decision', () => {
const event = {
function_name: 'test-function',
function_args: {
arg1: 'value1',
arg2: 2,
},
duration_ms: 100,
success: true,
};
logToolCall(mockConfig, event, ToolConfirmationOutcome.ModifyWithEditor);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Tool call: test-function. Decision: modify. Success: true. Duration: 100ms.',
attributes: {
'session.id': 'test-session-id',
'event.name': 'gemini_cli.tool_call',
'event.timestamp': '2025-01-01T00:00:00.000Z',
function_name: 'test-function',
function_args: JSON.stringify(
{
arg1: 'value1',
arg2: 2,
},
null,
2,
),
duration_ms: 100,
success: true,
decision: ToolCallDecision.MODIFY,
},
});
expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
mockConfig,
'test-function',
100,
true,
ToolCallDecision.MODIFY,
);
});
it('should log a tool call without a decision', () => {
const event = {
function_name: 'test-function',
function_args: {
arg1: 'value1',
arg2: 2,
},
duration_ms: 100,
success: true,
};
logToolCall(mockConfig, event);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Tool call: test-function. Success: true. Duration: 100ms.',
attributes: {
'session.id': 'test-session-id',
'event.name': 'gemini_cli.tool_call',
'event.timestamp': '2025-01-01T00:00:00.000Z',
function_name: 'test-function',
function_args: JSON.stringify(
{
arg1: 'value1',
arg2: 2,
},
null,
2,
),
duration_ms: 100,
success: true,
},
});
expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
mockConfig,
'test-function',
100,
true,
undefined,
);
});
it('should log a failed tool call with an error', () => {
const event = {
function_name: 'test-function',
function_args: {
arg1: 'value1',
arg2: 2,
},
duration_ms: 100,
success: false,
error: 'test-error',
error_type: 'test-error-type',
};
logToolCall(mockConfig, event);
expect(mockLogger.emit).toHaveBeenCalledWith({
body: 'Tool call: test-function. Success: false. Duration: 100ms.',
attributes: {
'session.id': 'test-session-id',
'event.name': 'gemini_cli.tool_call',
'event.timestamp': '2025-01-01T00:00:00.000Z',
function_name: 'test-function',
function_args: JSON.stringify(
{
arg1: 'value1',
arg2: 2,
},
null,
2,
),
duration_ms: 100,
success: false,
error: 'test-error',
'error.message': 'test-error',
error_type: 'test-error-type',
'error.type': 'test-error-type',
},
});
expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
mockConfig,
'test-function',
100,
false,
undefined,
);
});
});
}); });

View File

@ -30,6 +30,7 @@ import {
recordToolCallMetrics, recordToolCallMetrics,
} from './metrics.js'; } from './metrics.js';
import { isTelemetrySdkInitialized } from './sdk.js'; import { isTelemetrySdkInitialized } from './sdk.js';
import { ToolConfirmationOutcome } from '../index.js';
const shouldLogUserPrompts = (config: Config): boolean => const shouldLogUserPrompts = (config: Config): boolean =>
config.getTelemetryLogUserPromptsEnabled() ?? false; config.getTelemetryLogUserPromptsEnabled() ?? false;
@ -40,6 +41,29 @@ function getCommonAttributes(config: Config): LogAttributes {
}; };
} }
export enum ToolCallDecision {
ACCEPT = 'accept',
REJECT = 'reject',
MODIFY = 'modify',
}
export function getDecisionFromOutcome(
outcome: ToolConfirmationOutcome,
): ToolCallDecision {
switch (outcome) {
case ToolConfirmationOutcome.ProceedOnce:
case ToolConfirmationOutcome.ProceedAlways:
case ToolConfirmationOutcome.ProceedAlwaysServer:
case ToolConfirmationOutcome.ProceedAlwaysTool:
return ToolCallDecision.ACCEPT;
case ToolConfirmationOutcome.ModifyWithEditor:
return ToolCallDecision.MODIFY;
case ToolConfirmationOutcome.Cancel:
default:
return ToolCallDecision.REJECT;
}
}
export function logCliConfiguration(config: Config): void { export function logCliConfiguration(config: Config): void {
if (!isTelemetrySdkInitialized()) return; if (!isTelemetrySdkInitialized()) return;
@ -103,15 +127,20 @@ export function logUserPrompt(
export function logToolCall( export function logToolCall(
config: Config, config: Config,
event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp'>, event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp' | 'decision'>,
outcome?: ToolConfirmationOutcome,
): void { ): void {
if (!isTelemetrySdkInitialized()) return; if (!isTelemetrySdkInitialized()) return;
const decision = outcome ? getDecisionFromOutcome(outcome) : undefined;
const attributes: LogAttributes = { const attributes: LogAttributes = {
...getCommonAttributes(config), ...getCommonAttributes(config),
...event, ...event,
'event.name': EVENT_TOOL_CALL, 'event.name': EVENT_TOOL_CALL,
'event.timestamp': new Date().toISOString(), 'event.timestamp': new Date().toISOString(),
function_args: JSON.stringify(event.function_args), function_args: JSON.stringify(event.function_args, null, 2),
decision,
}; };
if (event.error) { if (event.error) {
attributes['error.message'] = event.error; attributes['error.message'] = event.error;
@ -121,7 +150,7 @@ export function logToolCall(
} }
const logger = logs.getLogger(SERVICE_NAME); const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = { const logRecord: LogRecord = {
body: `Tool call: ${event.function_name}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`, body: `Tool call: ${event.function_name}${decision ? `. Decision: ${decision}` : ''}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`,
attributes, attributes,
}; };
logger.emit(logRecord); logger.emit(logRecord);
@ -130,6 +159,7 @@ export function logToolCall(
event.function_name, event.function_name,
event.duration_ms, event.duration_ms,
event.success, event.success,
decision,
); );
} }

View File

@ -89,6 +89,7 @@ export function recordToolCallMetrics(
functionName: string, functionName: string,
durationMs: number, durationMs: number,
success: boolean, success: boolean,
decision?: 'accept' | 'reject' | 'modify',
): void { ): void {
if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized) if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized)
return; return;
@ -97,6 +98,7 @@ export function recordToolCallMetrics(
...getCommonAttributes(config), ...getCommonAttributes(config),
function_name: functionName, function_name: functionName,
success, success,
decision,
}; };
toolCallCounter.add(1, metricAttributes); toolCallCounter.add(1, metricAttributes);
toolCallLatencyHistogram.record(durationMs, { toolCallLatencyHistogram.record(durationMs, {

View File

@ -4,6 +4,8 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import { ToolCallDecision } from './loggers.js';
export interface UserPromptEvent { export interface UserPromptEvent {
'event.name': 'user_prompt'; 'event.name': 'user_prompt';
'event.timestamp': string; // ISO 8601 'event.timestamp': string; // ISO 8601
@ -18,6 +20,7 @@ export interface ToolCallEvent {
function_args: Record<string, unknown>; function_args: Record<string, unknown>;
duration_ms: number; duration_ms: number;
success: boolean; success: boolean;
decision?: ToolCallDecision;
error?: string; error?: string;
error_type?: string; error_type?: string;
} }