diff --git a/docs/core/telemetry.md b/docs/core/telemetry.md index 8d6468c6..41bd9fb7 100644 --- a/docs/core/telemetry.md +++ b/docs/core/telemetry.md @@ -296,8 +296,8 @@ These are timestamped records of specific events. - **Attributes**: - `model` - - `duration_ms` - `input_token_count` + - `request_text` (optional) - `gemini_cli.api_error`: Fired if the API request fails. @@ -307,7 +307,6 @@ These are timestamped records of specific events. - `error_type` - `status_code` - `duration_ms` - - `attempt` - `gemini_cli.api_response`: Fired upon receiving a response from the Gemini API. - **Attributes**: @@ -315,7 +314,7 @@ These are timestamped records of specific events. - `status_code` - `duration_ms` - `error` (optional) - - `attempt` + - `input_token_count` - `output_token_count` - `cached_content_token_count` - `thoughts_token_count` diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 4e4dc55e..94cdf0e5 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -27,11 +27,6 @@ import { GeminiChat } from './geminiChat.js'; import { retryWithBackoff } from '../utils/retry.js'; import { getErrorMessage } from '../utils/errors.js'; import { tokenLimit } from './tokenLimits.js'; -import { - logApiRequest, - logApiResponse, - logApiError, -} from '../telemetry/index.js'; import { ContentGenerator, createContentGenerator, @@ -223,80 +218,6 @@ export class GeminiClient { return turn; } - private _logApiRequest(model: string, inputTokenCount: number): void { - logApiRequest(this.config, { - model, - input_token_count: inputTokenCount, - duration_ms: 0, // Duration is not known at request time - }); - } - - private _logApiResponse( - model: string, - durationMs: number, - attempt: number, - response: GenerateContentResponse, - ): void { - const promptFeedback = response.promptFeedback; - const finishReason = response.candidates?.[0]?.finishReason; - let responseError; - if (promptFeedback?.blockReason) { - responseError = `Blocked: ${promptFeedback.blockReason}${promptFeedback.blockReasonMessage ? ' - ' + promptFeedback.blockReasonMessage : ''}`; - } else if ( - finishReason && - !['STOP', 'MAX_TOKENS', 'UNSPECIFIED'].includes(finishReason) - ) { - responseError = `Finished with reason: ${finishReason}`; - } - - logApiResponse(this.config, { - model, - duration_ms: durationMs, - attempt, - status_code: undefined, - error: responseError, - output_token_count: response.usageMetadata?.candidatesTokenCount ?? 0, - cached_content_token_count: - response.usageMetadata?.cachedContentTokenCount ?? 0, - thoughts_token_count: response.usageMetadata?.thoughtsTokenCount ?? 0, - tool_token_count: response.usageMetadata?.toolUsePromptTokenCount ?? 0, - response_text: getResponseText(response), - }); - } - - private _logApiError( - model: string, - error: unknown, - durationMs: number, - attempt: number, - isAbort: boolean = false, - ): void { - let statusCode: number | string | undefined; - let errorMessage = getErrorMessage(error); - - if (isAbort) { - errorMessage = 'Request aborted by user'; - statusCode = 'ABORTED'; // Custom S - } else if (typeof error === 'object' && error !== null) { - if ('status' in error) { - statusCode = (error as { status: number | string }).status; - } else if ('code' in error) { - statusCode = (error as { code: number | string }).code; - } else if ('httpStatusCode' in error) { - statusCode = (error as { httpStatusCode: number | string }) - .httpStatusCode; - } - } - - logApiError(this.config, { - model, - error: errorMessage, - status_code: statusCode, - duration_ms: durationMs, - attempt, - }); - } - async generateJson( contents: Content[], schema: SchemaUnion, @@ -305,8 +226,6 @@ export class GeminiClient { config: GenerateContentConfig = {}, ): Promise> { const cg = await this.contentGenerator; - const attempt = 1; - const startTime = Date.now(); try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); @@ -316,22 +235,6 @@ export class GeminiClient { ...config, }; - let inputTokenCount = 0; - try { - const { totalTokens } = await cg.countTokens({ - model, - contents, - }); - inputTokenCount = totalTokens || 0; - } catch (_e) { - console.warn( - `Failed to count tokens for model ${model}. Proceeding with inputTokenCount = 0. Error: ${getErrorMessage(_e)}`, - ); - inputTokenCount = 0; - } - - this._logApiRequest(model, inputTokenCount); - const apiCall = () => cg.generateContent({ model, @@ -345,7 +248,6 @@ export class GeminiClient { }); const result = await retryWithBackoff(apiCall); - const durationMs = Date.now() - startTime; const text = getResponseText(result); if (!text) { @@ -358,12 +260,10 @@ export class GeminiClient { contents, 'generateJson-empty-response', ); - this._logApiError(model, error, durationMs, attempt); throw error; } try { const parsedJson = JSON.parse(text); - this._logApiResponse(model, durationMs, attempt, result); return parsedJson; } catch (parseError) { await reportError( @@ -375,15 +275,12 @@ export class GeminiClient { }, 'generateJson-parse', ); - this._logApiError(model, parseError, durationMs, attempt); throw new Error( `Failed to parse API response as JSON: ${getErrorMessage(parseError)}`, ); } } catch (error) { - const durationMs = Date.now() - startTime; if (abortSignal.aborted) { - this._logApiError(model, error, durationMs, attempt, true); throw error; } @@ -394,7 +291,6 @@ export class GeminiClient { ) { throw error; } - this._logApiError(model, error, durationMs, attempt); await reportError( error, @@ -419,8 +315,6 @@ export class GeminiClient { ...this.generateContentConfig, ...generationConfig, }; - const attempt = 1; - const startTime = Date.now(); try { const userMemory = this.config.getUserMemory(); @@ -432,22 +326,6 @@ export class GeminiClient { systemInstruction, }; - let inputTokenCount = 0; - try { - const { totalTokens } = await cg.countTokens({ - model: modelToUse, - contents, - }); - inputTokenCount = totalTokens || 0; - } catch (_e) { - console.warn( - `Failed to count tokens for model ${modelToUse}. Proceeding with inputTokenCount = 0. Error: ${getErrorMessage(_e)}`, - ); - inputTokenCount = 0; - } - - this._logApiRequest(modelToUse, inputTokenCount); - const apiCall = () => cg.generateContent({ model: modelToUse, @@ -460,18 +338,12 @@ export class GeminiClient { 'Raw API Response in client.ts:', JSON.stringify(result, null, 2), ); - const durationMs = Date.now() - startTime; - this._logApiResponse(modelToUse, durationMs, attempt, result); return result; } catch (error: unknown) { - const durationMs = Date.now() - startTime; if (abortSignal.aborted) { - this._logApiError(modelToUse, error, durationMs, attempt, true); throw error; } - this._logApiError(modelToUse, error, durationMs, attempt); - await reportError( error, `Error generating content via API with model ${modelToUse}.`, diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 3bba1805..e1f2738d 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -19,6 +19,17 @@ import { retryWithBackoff } from '../utils/retry.js'; import { isFunctionResponse } from '../utils/messageInspectors.js'; import { ContentGenerator } from './contentGenerator.js'; import { Config } from '../config/config.js'; +import { + logApiRequest, + logApiResponse, + logApiError, +} from '../telemetry/loggers.js'; +import { + getResponseText, + getResponseTextFromParts, +} from '../utils/generateContentResponseUtilities.js'; +import { getErrorMessage } from '../utils/errors.js'; +import { getRequestTextFromContents } from '../utils/requestUtils.js'; /** * Returns true if the response is valid, false otherwise. @@ -129,6 +140,69 @@ export class GeminiChat { validateHistory(history); } + private async _logApiRequest( + contents: Content[], + model: string, + ): Promise { + let inputTokenCount = 0; + try { + const { totalTokens } = await this.contentGenerator.countTokens({ + model, + contents, + }); + inputTokenCount = totalTokens || 0; + } catch (_e) { + console.warn( + `Failed to count tokens for model ${model}. Proceeding with inputTokenCount = 0. Error: ${getErrorMessage(_e)}`, + ); + inputTokenCount = 0; + } + + const shouldLogUserPrompts = (config: Config): boolean => + config.getTelemetryLogUserPromptsEnabled() ?? false; + + const requestText = getRequestTextFromContents(contents); + logApiRequest(this.config, { + model, + input_token_count: inputTokenCount, + request_text: shouldLogUserPrompts(this.config) ? requestText : undefined, + }); + } + + private async _logApiResponse( + durationMs: number, + response: GenerateContentResponse, + fullStreamedText?: string, + ): Promise { + const responseText = fullStreamedText ?? getResponseText(response); + logApiResponse(this.config, { + model: this.model, + duration_ms: durationMs, + status_code: 200, // Assuming 200 for success + input_token_count: response.usageMetadata?.promptTokenCount ?? 0, + output_token_count: response.usageMetadata?.candidatesTokenCount ?? 0, + cached_content_token_count: + response.usageMetadata?.cachedContentTokenCount ?? 0, + thoughts_token_count: response.usageMetadata?.thoughtsTokenCount ?? 0, + tool_token_count: response.usageMetadata?.toolUsePromptTokenCount ?? 0, + response_text: responseText, + }); + } + + private _logApiError(durationMs: number, error: unknown): void { + const errorMessage = error instanceof Error ? error.message : String(error); + const errorType = error instanceof Error ? error.name : 'unknown'; + const statusCode = 'unknown'; + + logApiError(this.config, { + model: this.model, + error: errorMessage, + status_code: statusCode, + error_type: errorType, + duration_ms: durationMs, + }); + } + /** * Sends a message to the model and returns the response. * @@ -154,46 +228,56 @@ export class GeminiChat { ): Promise { await this.sendPromise; const userContent = createUserContent(params.message); + const requestContents = this.getHistory(true).concat(userContent); - const apiCall = () => - this.contentGenerator.generateContent({ - model: this.model, - contents: this.getHistory(true).concat(userContent), - config: { ...this.generationConfig, ...params.config }, + this._logApiRequest(requestContents, this.model); + + const startTime = Date.now(); + let response: GenerateContentResponse; + + try { + const apiCall = () => + this.contentGenerator.generateContent({ + model: this.model, + contents: requestContents, + config: { ...this.generationConfig, ...params.config }, + }); + + response = await retryWithBackoff(apiCall); + const durationMs = Date.now() - startTime; + await this._logApiResponse(durationMs, response); + + this.sendPromise = (async () => { + const outputContent = response.candidates?.[0]?.content; + // Because the AFC input contains the entire curated chat history in + // addition to the new user input, we need to truncate the AFC history + // to deduplicate the existing chat history. + const fullAutomaticFunctionCallingHistory = + response.automaticFunctionCallingHistory; + const index = this.getHistory(true).length; + let automaticFunctionCallingHistory: Content[] = []; + if (fullAutomaticFunctionCallingHistory != null) { + automaticFunctionCallingHistory = + fullAutomaticFunctionCallingHistory.slice(index) ?? []; + } + const modelOutput = outputContent ? [outputContent] : []; + this.recordHistory( + userContent, + modelOutput, + automaticFunctionCallingHistory, + ); + })(); + await this.sendPromise.catch(() => { + // Resets sendPromise to avoid subsequent calls failing + this.sendPromise = Promise.resolve(); }); - - const responsePromise = retryWithBackoff(apiCall); - - this.sendPromise = (async () => { - const response = await responsePromise; - const outputContent = response.candidates?.[0]?.content; - - // Because the AFC input contains the entire curated chat history in - // addition to the new user input, we need to truncate the AFC history - // to deduplicate the existing chat history. - const fullAutomaticFunctionCallingHistory = - response.automaticFunctionCallingHistory; - const index = this.getHistory(true).length; - - let automaticFunctionCallingHistory: Content[] = []; - if (fullAutomaticFunctionCallingHistory != null) { - automaticFunctionCallingHistory = - fullAutomaticFunctionCallingHistory.slice(index) ?? []; - } - - const modelOutput = outputContent ? [outputContent] : []; - this.recordHistory( - userContent, - modelOutput, - automaticFunctionCallingHistory, - ); - return; - })(); - await this.sendPromise.catch(() => { - // Resets sendPromise to avoid subsequent calls failing + return response; + } catch (error) { + const durationMs = Date.now() - startTime; + this._logApiError(durationMs, error); this.sendPromise = Promise.resolve(); - }); - return responsePromise; + throw error; + } } /** @@ -223,38 +307,53 @@ export class GeminiChat { ): Promise> { await this.sendPromise; const userContent = createUserContent(params.message); + const requestContents = this.getHistory(true).concat(userContent); + this._logApiRequest(requestContents, this.model); - const apiCall = () => - this.contentGenerator.generateContentStream({ - model: this.model, - contents: this.getHistory(true).concat(userContent), - config: { ...this.generationConfig, ...params.config }, + const startTime = Date.now(); + + try { + const apiCall = () => + this.contentGenerator.generateContentStream({ + model: this.model, + contents: requestContents, + config: { ...this.generationConfig, ...params.config }, + }); + + // Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries + // for transient issues internally before yielding the async generator, this retry will re-initiate + // the stream. For simple 429/500 errors on initial call, this is fine. + // If errors occur mid-stream, this setup won't resume the stream; it will restart it. + const streamResponse = await retryWithBackoff(apiCall, { + shouldRetry: (error: Error) => { + // Check error messages for status codes, or specific error names if known + if (error && error.message) { + if (error.message.includes('429')) return true; + if (error.message.match(/5\d{2}/)) return true; + } + return false; // Don't retry other errors by default + }, }); - // Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries - // for transient issues internally before yielding the async generator, this retry will re-initiate - // the stream. For simple 429/500 errors on initial call, this is fine. - // If errors occur mid-stream, this setup won't resume the stream; it will restart it. - const streamResponse = await retryWithBackoff(apiCall, { - shouldRetry: (error: Error) => { - // Check error messages for status codes, or specific error names if known - if (error && error.message) { - if (error.message.includes('429')) return true; - if (error.message.match(/5\d{2}/)) return true; - } - return false; // Don't retry other errors by default - }, - }); + // Resolve the internal tracking of send completion promise - `sendPromise` + // for both success and failure response. The actual failure is still + // propagated by the `await streamResponse`. + this.sendPromise = Promise.resolve(streamResponse) + .then(() => undefined) + .catch(() => undefined); - // Resolve the internal tracking of send completion promise - `sendPromise` - // for both success and failure response. The actual failure is still - // propagated by the `await streamResponse`. - this.sendPromise = Promise.resolve(streamResponse) - .then(() => undefined) - .catch(() => undefined); - - const result = this.processStreamResponse(streamResponse, userContent); - return result; + const result = this.processStreamResponse( + streamResponse, + userContent, + startTime, + ); + return result; + } catch (error) { + const durationMs = Date.now() - startTime; + this._logApiError(durationMs, error); + this.sendPromise = Promise.resolve(); + throw error; + } } /** @@ -311,16 +410,40 @@ export class GeminiChat { private async *processStreamResponse( streamResponse: AsyncGenerator, inputContent: Content, + startTime: number, ) { const outputContent: Content[] = []; - for await (const chunk of streamResponse) { - if (isValidResponse(chunk)) { - const content = chunk.candidates?.[0]?.content; - if (content !== undefined) { - outputContent.push(content); + let lastChunk: GenerateContentResponse | undefined; + let errorOccurred = false; + + try { + for await (const chunk of streamResponse) { + if (isValidResponse(chunk)) { + const content = chunk.candidates?.[0]?.content; + if (content !== undefined) { + outputContent.push(content); + } + } + lastChunk = chunk; + yield chunk; + } + } catch (error) { + errorOccurred = true; + const durationMs = Date.now() - startTime; + this._logApiError(durationMs, error); + throw error; + } + + if (!errorOccurred && lastChunk) { + const durationMs = Date.now() - startTime; + const allParts: Part[] = []; + for (const content of outputContent) { + if (content.parts) { + allParts.push(...content.parts); } } - yield chunk; + const fullText = getResponseTextFromParts(allParts); + await this._logApiResponse(durationMs, lastChunk, fullText); } this.recordHistory(inputContent, outputContent); } diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index a09f3eaf..092d70f0 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -8,8 +8,13 @@ import { ToolConfirmationOutcome } from '../index.js'; import { logs } from '@opentelemetry/api-logs'; import { SemanticAttributes } from '@opentelemetry/semantic-conventions'; import { Config } from '../config/config.js'; -import { EVENT_API_RESPONSE, EVENT_USER_PROMPT } from './constants.js'; import { + EVENT_API_REQUEST, + EVENT_API_RESPONSE, + EVENT_USER_PROMPT, +} from './constants.js'; +import { + logApiRequest, logApiResponse, logCliConfiguration, logUserPrompt, @@ -167,7 +172,7 @@ describe('loggers', () => { model: 'test-model', status_code: 200, duration_ms: 100, - attempt: 1, + input_token_count: 17, output_token_count: 50, cached_content_token_count: 10, thoughts_token_count: 5, @@ -187,7 +192,7 @@ describe('loggers', () => { model: 'test-model', status_code: 200, duration_ms: 100, - attempt: 1, + input_token_count: 17, output_token_count: 50, cached_content_token_count: 10, thoughts_token_count: 5, @@ -216,8 +221,8 @@ describe('loggers', () => { const event = { model: 'test-model', duration_ms: 100, - attempt: 1, error: 'test-error', + input_token_count: 17, output_token_count: 50, cached_content_token_count: 10, thoughts_token_count: 5, @@ -240,6 +245,78 @@ describe('loggers', () => { }); }); + describe('logApiRequest', () => { + const mockConfig = { + getSessionId: () => 'test-session-id', + } as Config; + + const mockMetrics = { + recordTokenUsageMetrics: vi.fn(), + }; + + beforeEach(() => { + vi.spyOn(metrics, 'recordTokenUsageMetrics').mockImplementation( + mockMetrics.recordTokenUsageMetrics, + ); + }); + + it('should log an API request with request_text', () => { + const event = { + model: 'test-model', + input_token_count: 123, + request_text: 'This is a test request', + }; + + logApiRequest(mockConfig, event); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'API request to test-model. Tokens: 123.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': EVENT_API_REQUEST, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + model: 'test-model', + input_token_count: 123, + request_text: 'This is a test request', + }, + }); + + expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-model', + 123, + 'input', + ); + }); + + it('should log an API request without request_text', () => { + const event = { + model: 'test-model', + input_token_count: 456, + }; + + logApiRequest(mockConfig, event); + + expect(mockLogger.emit).toHaveBeenCalledWith({ + body: 'API request to test-model. Tokens: 456.', + attributes: { + 'session.id': 'test-session-id', + 'event.name': EVENT_API_REQUEST, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + model: 'test-model', + input_token_count: 456, + }, + }); + + expect(mockMetrics.recordTokenUsageMetrics).toHaveBeenCalledWith( + mockConfig, + 'test-model', + 456, + 'input', + ); + }); + }); + describe('logToolCall', () => { const mockConfig = { getSessionId: () => 'test-session-id', diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index 926f1384..e9d397a8 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -29,8 +29,8 @@ export interface ApiRequestEvent { 'event.name': 'api_request'; 'event.timestamp': string; // ISO 8601 model: string; - duration_ms: number; input_token_count: number; + request_text?: string; } export interface ApiErrorEvent { @@ -41,7 +41,6 @@ export interface ApiErrorEvent { error_type?: string; status_code?: number | string; duration_ms: number; - attempt: number; } export interface ApiResponseEvent { @@ -51,7 +50,7 @@ export interface ApiResponseEvent { status_code?: number | string; duration_ms: number; error?: string; - attempt: number; + input_token_count: number; output_token_count: number; cached_content_token_count: number; thoughts_token_count: number; diff --git a/packages/core/src/utils/requestUtils.ts b/packages/core/src/utils/requestUtils.ts new file mode 100644 index 00000000..4978e968 --- /dev/null +++ b/packages/core/src/utils/requestUtils.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Content } from '@google/genai'; + +export function getRequestTextFromContents(contents: Content[]): string { + return contents + .flatMap((content) => content.parts ?? []) + .map((part) => part.text) + .filter(Boolean) + .join(''); +}