diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 0848e330..66d521fc 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -823,11 +823,7 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { showMemoryUsage={ config.getDebugMode() || config.getShowMemoryUsage() } - promptTokenCount={sessionStats.currentResponse.promptTokenCount} - candidatesTokenCount={ - sessionStats.currentResponse.candidatesTokenCount - } - totalTokenCount={sessionStats.currentResponse.totalTokenCount} + promptTokenCount={sessionStats.lastPromptTokenCount} /> diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx index 4ece6c92..48f37ee8 100644 --- a/packages/cli/src/ui/components/Footer.tsx +++ b/packages/cli/src/ui/components/Footer.tsx @@ -23,8 +23,6 @@ interface FooterProps { showErrorDetails: boolean; showMemoryUsage?: boolean; promptTokenCount: number; - candidatesTokenCount: number; - totalTokenCount: number; } export const Footer: React.FC = ({ @@ -37,10 +35,10 @@ export const Footer: React.FC = ({ errorCount, showErrorDetails, showMemoryUsage, - totalTokenCount, + promptTokenCount, }) => { const limit = tokenLimit(model); - const percentage = totalTokenCount / limit; + const percentage = promptTokenCount / limit; return ( diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx index 5816f7b4..b40b20bc 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx @@ -8,7 +8,7 @@ import { render } from 'ink-testing-library'; import { describe, it, expect, vi } from 'vitest'; import { HistoryItemDisplay } from './HistoryItemDisplay.js'; import { HistoryItem, MessageType } from '../types.js'; -import { CumulativeStats } from '../contexts/SessionContext.js'; +import { SessionStatsProvider } from '../contexts/SessionContext.js'; // Mock child components vi.mock('./messages/ToolGroupMessage.js', () => ({ @@ -36,25 +36,15 @@ describe('', () => { }); it('renders StatsDisplay for "stats" type', () => { - const stats: CumulativeStats = { - turnCount: 1, - promptTokenCount: 10, - candidatesTokenCount: 20, - totalTokenCount: 30, - cachedContentTokenCount: 5, - toolUsePromptTokenCount: 2, - thoughtsTokenCount: 3, - apiTimeMs: 123, - }; const item: HistoryItem = { ...baseItem, type: MessageType.STATS, - stats, - lastTurnStats: stats, duration: '1s', }; const { lastFrame } = render( - , + + + , ); expect(lastFrame()).toContain('Stats'); }); @@ -76,25 +66,46 @@ describe('', () => { expect(lastFrame()).toContain('About Gemini CLI'); }); - it('renders SessionSummaryDisplay for "quit" type', () => { - const stats: CumulativeStats = { - turnCount: 1, - promptTokenCount: 10, - candidatesTokenCount: 20, - totalTokenCount: 30, - cachedContentTokenCount: 5, - toolUsePromptTokenCount: 2, - thoughtsTokenCount: 3, - apiTimeMs: 123, + it('renders ModelStatsDisplay for "model_stats" type', () => { + const item: HistoryItem = { + ...baseItem, + type: 'model_stats', }; + const { lastFrame } = render( + + + , + ); + expect(lastFrame()).toContain( + 'No API calls have been made in this session.', + ); + }); + + it('renders ToolStatsDisplay for "tool_stats" type', () => { + const item: HistoryItem = { + ...baseItem, + type: 'tool_stats', + }; + const { lastFrame } = render( + + + , + ); + expect(lastFrame()).toContain( + 'No tool calls have been made in this session.', + ); + }); + + it('renders SessionSummaryDisplay for "quit" type', () => { const item: HistoryItem = { ...baseItem, type: 'quit', - stats, duration: '1s', }; const { lastFrame } = render( - , + + + , ); expect(lastFrame()).toContain('Agent powering down. Goodbye!'); }); diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index 76b6ba6e..eba4ea47 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -17,6 +17,8 @@ import { CompressionMessage } from './messages/CompressionMessage.js'; import { Box } from 'ink'; import { AboutBox } from './AboutBox.js'; import { StatsDisplay } from './StatsDisplay.js'; +import { ModelStatsDisplay } from './ModelStatsDisplay.js'; +import { ToolStatsDisplay } from './ToolStatsDisplay.js'; import { SessionSummaryDisplay } from './SessionSummaryDisplay.js'; import { Config } from '@google/gemini-cli-core'; @@ -69,16 +71,10 @@ export const HistoryItemDisplay: React.FC = ({ gcpProject={item.gcpProject} /> )} - {item.type === 'stats' && ( - - )} - {item.type === 'quit' && ( - - )} + {item.type === 'stats' && } + {item.type === 'model_stats' && } + {item.type === 'tool_stats' && } + {item.type === 'quit' && } {item.type === 'tool_group' && ( { + const actual = await importOriginal(); + return { + ...actual, + useSessionStats: vi.fn(), + }; +}); + +const useSessionStatsMock = vi.mocked(SessionContext.useSessionStats); + +const renderWithMockedStats = (metrics: SessionMetrics) => { + useSessionStatsMock.mockReturnValue({ + stats: { + sessionStartTime: new Date(), + metrics, + lastPromptTokenCount: 0, + }, + }); + + return render(); +}; + +describe('', () => { + it('should render "no API calls" message when there are no active models', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + expect(lastFrame()).toContain( + 'No API calls have been made in this session.', + ); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('should not display conditional rows if no model has data for them', () => { + const { lastFrame } = renderWithMockedStats({ + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + const output = lastFrame(); + expect(output).not.toContain('Cached'); + expect(output).not.toContain('Thoughts'); + expect(output).not.toContain('Tool'); + expect(output).toMatchSnapshot(); + }); + + it('should display conditional rows if at least one model has data', () => { + const { lastFrame } = renderWithMockedStats({ + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 5, + thoughts: 2, + tool: 0, + }, + }, + 'gemini-2.5-flash': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 50 }, + tokens: { + prompt: 5, + candidates: 10, + total: 15, + cached: 0, + thoughts: 0, + tool: 3, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + const output = lastFrame(); + expect(output).toContain('Cached'); + expect(output).toContain('Thoughts'); + expect(output).toContain('Tool'); + expect(output).toMatchSnapshot(); + }); + + it('should display stats for multiple models correctly', () => { + const { lastFrame } = renderWithMockedStats({ + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 10, totalErrors: 1, totalLatencyMs: 1000 }, + tokens: { + prompt: 100, + candidates: 200, + total: 300, + cached: 50, + thoughts: 10, + tool: 5, + }, + }, + 'gemini-2.5-flash': { + api: { totalRequests: 20, totalErrors: 2, totalLatencyMs: 500 }, + tokens: { + prompt: 200, + candidates: 400, + total: 600, + cached: 100, + thoughts: 20, + tool: 10, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + const output = lastFrame(); + expect(output).toContain('gemini-2.5-pro'); + expect(output).toContain('gemini-2.5-flash'); + expect(output).toMatchSnapshot(); + }); + + it('should handle large values without wrapping or overlapping', () => { + const { lastFrame } = renderWithMockedStats({ + models: { + 'gemini-2.5-pro': { + api: { + totalRequests: 999999999, + totalErrors: 123456789, + totalLatencyMs: 9876, + }, + tokens: { + prompt: 987654321, + candidates: 123456789, + total: 999999999, + cached: 123456789, + thoughts: 111111111, + tool: 222222222, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + expect(lastFrame()).toMatchSnapshot(); + }); + + it('should display a single model correctly', () => { + const { lastFrame } = renderWithMockedStats({ + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 5, + thoughts: 2, + tool: 1, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + const output = lastFrame(); + expect(output).toContain('gemini-2.5-pro'); + expect(output).not.toContain('gemini-2.5-flash'); + expect(output).toMatchSnapshot(); + }); +}); diff --git a/packages/cli/src/ui/components/ModelStatsDisplay.tsx b/packages/cli/src/ui/components/ModelStatsDisplay.tsx new file mode 100644 index 00000000..1911e757 --- /dev/null +++ b/packages/cli/src/ui/components/ModelStatsDisplay.tsx @@ -0,0 +1,197 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import React from 'react'; +import { Box, Text } from 'ink'; +import { Colors } from '../colors.js'; +import { formatDuration } from '../utils/formatters.js'; +import { + calculateAverageLatency, + calculateCacheHitRate, + calculateErrorRate, +} from '../utils/computeStats.js'; +import { useSessionStats, ModelMetrics } from '../contexts/SessionContext.js'; + +const METRIC_COL_WIDTH = 28; +const MODEL_COL_WIDTH = 22; + +interface StatRowProps { + title: string; + values: Array; + isSubtle?: boolean; + isSection?: boolean; +} + +const StatRow: React.FC = ({ + title, + values, + isSubtle = false, + isSection = false, +}) => ( + + + + {isSubtle ? ` ↳ ${title}` : title} + + + {values.map((value, index) => ( + + {value} + + ))} + +); + +export const ModelStatsDisplay: React.FC = () => { + const { stats } = useSessionStats(); + const { models } = stats.metrics; + const activeModels = Object.entries(models).filter( + ([, metrics]) => metrics.api.totalRequests > 0, + ); + + if (activeModels.length === 0) { + return ( + + No API calls have been made in this session. + + ); + } + + const modelNames = activeModels.map(([name]) => name); + + const getModelValues = ( + getter: (metrics: ModelMetrics) => string | React.ReactElement, + ) => activeModels.map(([, metrics]) => getter(metrics)); + + const hasThoughts = activeModels.some( + ([, metrics]) => metrics.tokens.thoughts > 0, + ); + const hasTool = activeModels.some(([, metrics]) => metrics.tokens.tool > 0); + const hasCached = activeModels.some( + ([, metrics]) => metrics.tokens.cached > 0, + ); + + return ( + + + Model Stats For Nerds + + + + {/* Header */} + + + Metric + + {modelNames.map((name) => ( + + {name} + + ))} + + + {/* Divider */} + + + {/* API Section */} + + m.api.totalRequests.toLocaleString())} + /> + { + const errorRate = calculateErrorRate(m); + return ( + 0 ? Colors.AccentRed : Colors.Foreground + } + > + {m.api.totalErrors.toLocaleString()} ({errorRate.toFixed(1)}%) + + ); + })} + /> + { + const avgLatency = calculateAverageLatency(m); + return formatDuration(avgLatency); + })} + /> + + + + {/* Tokens Section */} + + ( + + {m.tokens.total.toLocaleString()} + + ))} + /> + m.tokens.prompt.toLocaleString())} + /> + {hasCached && ( + { + const cacheHitRate = calculateCacheHitRate(m); + return ( + + {m.tokens.cached.toLocaleString()} ({cacheHitRate.toFixed(1)}%) + + ); + })} + /> + )} + {hasThoughts && ( + m.tokens.thoughts.toLocaleString())} + /> + )} + {hasTool && ( + m.tokens.tool.toLocaleString())} + /> + )} + m.tokens.candidates.toLocaleString())} + /> + + ); +}; diff --git a/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx b/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx index 14d8a277..afb822e5 100644 --- a/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx +++ b/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx @@ -5,48 +5,92 @@ */ import { render } from 'ink-testing-library'; -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { SessionSummaryDisplay } from './SessionSummaryDisplay.js'; -import { type CumulativeStats } from '../contexts/SessionContext.js'; +import * as SessionContext from '../contexts/SessionContext.js'; +import { SessionMetrics } from '../contexts/SessionContext.js'; + +vi.mock('../contexts/SessionContext.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + useSessionStats: vi.fn(), + }; +}); + +const useSessionStatsMock = vi.mocked(SessionContext.useSessionStats); + +const renderWithMockedStats = (metrics: SessionMetrics) => { + useSessionStatsMock.mockReturnValue({ + stats: { + sessionStartTime: new Date(), + metrics, + lastPromptTokenCount: 0, + }, + }); + + return render(); +}; describe('', () => { - const mockStats: CumulativeStats = { - turnCount: 10, - promptTokenCount: 1000, - candidatesTokenCount: 2000, - totalTokenCount: 3500, - cachedContentTokenCount: 500, - toolUsePromptTokenCount: 200, - thoughtsTokenCount: 300, - apiTimeMs: 50234, - }; + it('correctly sums and displays stats from multiple models', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 10, totalErrors: 1, totalLatencyMs: 50234 }, + tokens: { + prompt: 1000, + candidates: 2000, + total: 3500, + cached: 500, + thoughts: 300, + tool: 200, + }, + }, + 'gemini-2.5-flash': { + api: { totalRequests: 5, totalErrors: 0, totalLatencyMs: 12345 }, + tokens: { + prompt: 500, + candidates: 1000, + total: 1500, + cached: 100, + thoughts: 50, + tool: 20, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; - const mockDuration = '1h 23m 45s'; + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); - it('renders correctly with given stats and duration', () => { - const { lastFrame } = render( - , - ); - - expect(lastFrame()).toMatchSnapshot(); + // Verify totals are summed correctly + expect(output).toContain('Cumulative Stats (15 API calls)'); + expect(output).toMatchSnapshot(); }); it('renders zero state correctly', () => { - const zeroStats: CumulativeStats = { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, + const zeroMetrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, }; - const { lastFrame } = render( - , - ); - + const { lastFrame } = renderWithMockedStats(zeroMetrics); expect(lastFrame()).toMatchSnapshot(); }); }); diff --git a/packages/cli/src/ui/components/SessionSummaryDisplay.tsx b/packages/cli/src/ui/components/SessionSummaryDisplay.tsx index d3ee0f5f..a009f3d8 100644 --- a/packages/cli/src/ui/components/SessionSummaryDisplay.tsx +++ b/packages/cli/src/ui/components/SessionSummaryDisplay.tsx @@ -9,31 +9,57 @@ import { Box, Text } from 'ink'; import Gradient from 'ink-gradient'; import { Colors } from '../colors.js'; import { formatDuration } from '../utils/formatters.js'; -import { CumulativeStats } from '../contexts/SessionContext.js'; +import { useSessionStats } from '../contexts/SessionContext.js'; +import { computeSessionStats } from '../utils/computeStats.js'; import { FormattedStats, StatRow, StatsColumn } from './Stats.js'; // --- Prop and Data Structures --- interface SessionSummaryDisplayProps { - stats: CumulativeStats; duration: string; } // --- Main Component --- export const SessionSummaryDisplay: React.FC = ({ - stats, duration, }) => { + const { stats } = useSessionStats(); + const { metrics } = stats; + const computed = computeSessionStats(metrics); + const cumulativeFormatted: FormattedStats = { - inputTokens: stats.promptTokenCount, - outputTokens: stats.candidatesTokenCount, - toolUseTokens: stats.toolUsePromptTokenCount, - thoughtsTokens: stats.thoughtsTokenCount, - cachedTokens: stats.cachedContentTokenCount, - totalTokens: stats.totalTokenCount, + inputTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.prompt, + 0, + ), + outputTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.candidates, + 0, + ), + toolUseTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.tool, + 0, + ), + thoughtsTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.thoughts, + 0, + ), + cachedTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.cached, + 0, + ), + totalTokens: Object.values(metrics.models).reduce( + (acc, model) => acc + model.tokens.total, + 0, + ), }; + const totalRequests = Object.values(metrics.models).reduce( + (acc, model) => acc + model.api.totalRequests, + 0, + ); + const title = 'Agent powering down. Goodbye!'; return ( @@ -57,14 +83,18 @@ export const SessionSummaryDisplay: React.FC = ({ + diff --git a/packages/cli/src/ui/components/StatsDisplay.test.tsx b/packages/cli/src/ui/components/StatsDisplay.test.tsx index c7b574a5..29f322f4 100644 --- a/packages/cli/src/ui/components/StatsDisplay.test.tsx +++ b/packages/cli/src/ui/components/StatsDisplay.test.tsx @@ -5,67 +5,259 @@ */ import { render } from 'ink-testing-library'; -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { StatsDisplay } from './StatsDisplay.js'; -import { type CumulativeStats } from '../contexts/SessionContext.js'; +import * as SessionContext from '../contexts/SessionContext.js'; +import { SessionMetrics } from '../contexts/SessionContext.js'; + +// Mock the context to provide controlled data for testing +vi.mock('../contexts/SessionContext.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + useSessionStats: vi.fn(), + }; +}); + +const useSessionStatsMock = vi.mocked(SessionContext.useSessionStats); + +const renderWithMockedStats = (metrics: SessionMetrics) => { + useSessionStatsMock.mockReturnValue({ + stats: { + sessionStartTime: new Date(), + metrics, + lastPromptTokenCount: 0, + }, + }); + + return render(); +}; describe('', () => { - const mockStats: CumulativeStats = { - turnCount: 10, - promptTokenCount: 1000, - candidatesTokenCount: 2000, - totalTokenCount: 3500, - cachedContentTokenCount: 500, - toolUsePromptTokenCount: 200, - thoughtsTokenCount: 300, - apiTimeMs: 50234, - }; - - const mockLastTurnStats: CumulativeStats = { - turnCount: 1, - promptTokenCount: 100, - candidatesTokenCount: 200, - totalTokenCount: 350, - cachedContentTokenCount: 50, - toolUsePromptTokenCount: 20, - thoughtsTokenCount: 30, - apiTimeMs: 1234, - }; - - const mockDuration = '1h 23m 45s'; - - it('renders correctly with given stats and duration', () => { - const { lastFrame } = render( - , - ); - - expect(lastFrame()).toMatchSnapshot(); - }); - - it('renders zero state correctly', () => { - const zeroStats: CumulativeStats = { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, + it('renders only the Performance section in its zero state', () => { + const zeroMetrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, }; - const { lastFrame } = render( - , - ); + const { lastFrame } = renderWithMockedStats(zeroMetrics); + const output = lastFrame(); - expect(lastFrame()).toMatchSnapshot(); + expect(output).toContain('Performance'); + expect(output).not.toContain('Interaction Summary'); + expect(output).not.toContain('Efficiency & Optimizations'); + expect(output).not.toContain('Model'); // The table header + expect(output).toMatchSnapshot(); + }); + + it('renders a table with two models correctly', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 3, totalErrors: 0, totalLatencyMs: 15000 }, + tokens: { + prompt: 1000, + candidates: 2000, + total: 43234, + cached: 500, + thoughts: 100, + tool: 50, + }, + }, + 'gemini-2.5-flash': { + api: { totalRequests: 5, totalErrors: 1, totalLatencyMs: 4500 }, + tokens: { + prompt: 25000, + candidates: 15000, + total: 150000000, + cached: 10000, + thoughts: 2000, + tool: 1000, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('gemini-2.5-pro'); + expect(output).toContain('gemini-2.5-flash'); + expect(output).toContain('1,000'); + expect(output).toContain('25,000'); + expect(output).toMatchSnapshot(); + }); + + it('renders all sections when all data is present', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 100, + candidates: 100, + total: 250, + cached: 50, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 2, + totalSuccess: 1, + totalFail: 1, + totalDurationMs: 123, + totalDecisions: { accept: 1, reject: 0, modify: 0 }, + byName: { + 'test-tool': { + count: 2, + success: 1, + fail: 1, + durationMs: 123, + decisions: { accept: 1, reject: 0, modify: 0 }, + }, + }, + }, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('Performance'); + expect(output).toContain('Interaction Summary'); + expect(output).toContain('User Agreement'); + expect(output).toContain('Savings Highlight'); + expect(output).toContain('gemini-2.5-pro'); + expect(output).toMatchSnapshot(); + }); + + describe('Conditional Rendering Tests', () => { + it('hides User Agreement when no decisions are made', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 2, + totalSuccess: 1, + totalFail: 1, + totalDurationMs: 123, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, // No decisions + byName: { + 'test-tool': { + count: 2, + success: 1, + fail: 1, + durationMs: 123, + decisions: { accept: 0, reject: 0, modify: 0 }, + }, + }, + }, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('Interaction Summary'); + expect(output).toContain('Success Rate'); + expect(output).not.toContain('User Agreement'); + expect(output).toMatchSnapshot(); + }); + + it('hides Efficiency section when cache is not used', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-2.5-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 100, + candidates: 100, + total: 200, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).not.toContain('Efficiency & Optimizations'); + expect(output).toMatchSnapshot(); + }); + }); + + describe('Conditional Color Tests', () => { + it('renders success rate in green for high values', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 10, + totalSuccess: 10, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + const { lastFrame } = renderWithMockedStats(metrics); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('renders success rate in yellow for medium values', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 10, + totalSuccess: 9, + totalFail: 1, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + const { lastFrame } = renderWithMockedStats(metrics); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('renders success rate in red for low values', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 10, + totalSuccess: 5, + totalFail: 5, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + const { lastFrame } = renderWithMockedStats(metrics); + expect(lastFrame()).toMatchSnapshot(); + }); }); }); diff --git a/packages/cli/src/ui/components/StatsDisplay.tsx b/packages/cli/src/ui/components/StatsDisplay.tsx index 76d48821..249fc106 100644 --- a/packages/cli/src/ui/components/StatsDisplay.tsx +++ b/packages/cli/src/ui/components/StatsDisplay.tsx @@ -8,90 +8,230 @@ import React from 'react'; import { Box, Text } from 'ink'; import { Colors } from '../colors.js'; import { formatDuration } from '../utils/formatters.js'; -import { CumulativeStats } from '../contexts/SessionContext.js'; -import { FormattedStats, StatRow, StatsColumn } from './Stats.js'; +import { useSessionStats, ModelMetrics } from '../contexts/SessionContext.js'; +import { + getStatusColor, + TOOL_SUCCESS_RATE_HIGH, + TOOL_SUCCESS_RATE_MEDIUM, + USER_AGREEMENT_RATE_HIGH, + USER_AGREEMENT_RATE_MEDIUM, +} from '../utils/displayUtils.js'; +import { computeSessionStats } from '../utils/computeStats.js'; -// --- Constants --- +// A more flexible and powerful StatRow component +interface StatRowProps { + title: string; + children: React.ReactNode; // Use children to allow for complex, colored values +} -const COLUMN_WIDTH = '48%'; +const StatRow: React.FC = ({ title, children }) => ( + + {/* Fixed width for the label creates a clean "gutter" for alignment */} + + {title} + + {children} + +); -// --- Prop and Data Structures --- +// A SubStatRow for indented, secondary information +interface SubStatRowProps { + title: string; + children: React.ReactNode; +} + +const SubStatRow: React.FC = ({ title, children }) => ( + + {/* Adjust width for the "» " prefix */} + + » {title} + + {children} + +); + +// A Section component to group related stats +interface SectionProps { + title: string; + children: React.ReactNode; +} + +const Section: React.FC = ({ title, children }) => ( + + {title} + {children} + +); + +const ModelUsageTable: React.FC<{ + models: Record; + totalCachedTokens: number; + cacheEfficiency: number; +}> = ({ models, totalCachedTokens, cacheEfficiency }) => { + const nameWidth = 25; + const requestsWidth = 8; + const inputTokensWidth = 15; + const outputTokensWidth = 15; + + return ( + + {/* Header */} + + + Model Usage + + + Reqs + + + Input Tokens + + + Output Tokens + + + {/* Divider */} + + + {/* Rows */} + {Object.entries(models).map(([name, modelMetrics]) => ( + + + {name.replace('-001', '')} + + + {modelMetrics.api.totalRequests} + + + + {modelMetrics.tokens.prompt.toLocaleString()} + + + + + {modelMetrics.tokens.candidates.toLocaleString()} + + + + ))} + {cacheEfficiency > 0 && ( + + + Savings Highlight:{' '} + {totalCachedTokens.toLocaleString()} ({cacheEfficiency.toFixed(1)} + %) of input tokens were served from the cache, reducing costs. + + + + » Tip: For a full token breakdown, run `/stats model`. + + + )} + + ); +}; interface StatsDisplayProps { - stats: CumulativeStats; - lastTurnStats: CumulativeStats; duration: string; } -// --- Main Component --- +export const StatsDisplay: React.FC = ({ duration }) => { + const { stats } = useSessionStats(); + const { metrics } = stats; + const { models, tools } = metrics; + const computed = computeSessionStats(metrics); -export const StatsDisplay: React.FC = ({ - stats, - lastTurnStats, - duration, -}) => { - const lastTurnFormatted: FormattedStats = { - inputTokens: lastTurnStats.promptTokenCount, - outputTokens: lastTurnStats.candidatesTokenCount, - toolUseTokens: lastTurnStats.toolUsePromptTokenCount, - thoughtsTokens: lastTurnStats.thoughtsTokenCount, - cachedTokens: lastTurnStats.cachedContentTokenCount, - totalTokens: lastTurnStats.totalTokenCount, + const successThresholds = { + green: TOOL_SUCCESS_RATE_HIGH, + yellow: TOOL_SUCCESS_RATE_MEDIUM, }; - - const cumulativeFormatted: FormattedStats = { - inputTokens: stats.promptTokenCount, - outputTokens: stats.candidatesTokenCount, - toolUseTokens: stats.toolUsePromptTokenCount, - thoughtsTokens: stats.thoughtsTokenCount, - cachedTokens: stats.cachedContentTokenCount, - totalTokens: stats.totalTokenCount, + const agreementThresholds = { + green: USER_AGREEMENT_RATE_HIGH, + yellow: USER_AGREEMENT_RATE_MEDIUM, }; + const successColor = getStatusColor(computed.successRate, successThresholds); + const agreementColor = getStatusColor( + computed.agreementRate, + agreementThresholds, + ); return ( - Stats + Session Stats + - - 0 && ( +
+ + + {tools.totalCalls} ({' '} + ✔ {tools.totalSuccess}{' '} + ✖ {tools.totalFail} ) + + + + {computed.successRate.toFixed(1)}% + + {computed.totalDecisions > 0 && ( + + + {computed.agreementRate.toFixed(1)}%{' '} + + ({computed.totalDecisions} reviewed) + + + + )} +
+ )} + +
+ + {duration} + + + {formatDuration(computed.agentActiveTime)} + + + + {formatDuration(computed.totalApiTime)}{' '} + + ({computed.apiTimePercent.toFixed(1)}%) + + + + + + {formatDuration(computed.totalToolTime)}{' '} + + ({computed.toolTimePercent.toFixed(1)}%) + + + +
+ + {Object.keys(models).length > 0 && ( + - -
- - - {/* Left column for "Last Turn" duration */} - - - - - {/* Right column for "Cumulative" durations */} - - - - - + )}
); }; diff --git a/packages/cli/src/ui/components/ToolStatsDisplay.test.tsx b/packages/cli/src/ui/components/ToolStatsDisplay.test.tsx new file mode 100644 index 00000000..54902788 --- /dev/null +++ b/packages/cli/src/ui/components/ToolStatsDisplay.test.tsx @@ -0,0 +1,176 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { render } from 'ink-testing-library'; +import { describe, it, expect, vi } from 'vitest'; +import { ToolStatsDisplay } from './ToolStatsDisplay.js'; +import * as SessionContext from '../contexts/SessionContext.js'; +import { SessionMetrics } from '../contexts/SessionContext.js'; + +// Mock the context to provide controlled data for testing +vi.mock('../contexts/SessionContext.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + useSessionStats: vi.fn(), + }; +}); + +const useSessionStatsMock = vi.mocked(SessionContext.useSessionStats); + +const renderWithMockedStats = (metrics: SessionMetrics) => { + useSessionStatsMock.mockReturnValue({ + stats: { + sessionStartTime: new Date(), + metrics, + lastPromptTokenCount: 0, + }, + }); + + return render(); +}; + +describe('', () => { + it('should render "no tool calls" message when there are no active tools', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }); + + expect(lastFrame()).toContain( + 'No tool calls have been made in this session.', + ); + expect(lastFrame()).toMatchSnapshot(); + }); + + it('should display stats for a single tool correctly', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 1, + totalSuccess: 1, + totalFail: 0, + totalDurationMs: 100, + totalDecisions: { accept: 1, reject: 0, modify: 0 }, + byName: { + 'test-tool': { + count: 1, + success: 1, + fail: 0, + durationMs: 100, + decisions: { accept: 1, reject: 0, modify: 0 }, + }, + }, + }, + }); + + const output = lastFrame(); + expect(output).toContain('test-tool'); + expect(output).toMatchSnapshot(); + }); + + it('should display stats for multiple tools correctly', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 3, + totalSuccess: 2, + totalFail: 1, + totalDurationMs: 300, + totalDecisions: { accept: 1, reject: 1, modify: 1 }, + byName: { + 'tool-a': { + count: 2, + success: 1, + fail: 1, + durationMs: 200, + decisions: { accept: 1, reject: 1, modify: 0 }, + }, + 'tool-b': { + count: 1, + success: 1, + fail: 0, + durationMs: 100, + decisions: { accept: 0, reject: 0, modify: 1 }, + }, + }, + }, + }); + + const output = lastFrame(); + expect(output).toContain('tool-a'); + expect(output).toContain('tool-b'); + expect(output).toMatchSnapshot(); + }); + + it('should handle large values without wrapping or overlapping', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 999999999, + totalSuccess: 888888888, + totalFail: 111111111, + totalDurationMs: 987654321, + totalDecisions: { + accept: 123456789, + reject: 98765432, + modify: 12345, + }, + byName: { + 'long-named-tool-for-testing-wrapping-and-such': { + count: 999999999, + success: 888888888, + fail: 111111111, + durationMs: 987654321, + decisions: { + accept: 123456789, + reject: 98765432, + modify: 12345, + }, + }, + }, + }, + }); + + expect(lastFrame()).toMatchSnapshot(); + }); + + it('should handle zero decisions gracefully', () => { + const { lastFrame } = renderWithMockedStats({ + models: {}, + tools: { + totalCalls: 1, + totalSuccess: 1, + totalFail: 0, + totalDurationMs: 100, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: { + 'test-tool': { + count: 1, + success: 1, + fail: 0, + durationMs: 100, + decisions: { accept: 0, reject: 0, modify: 0 }, + }, + }, + }, + }); + + const output = lastFrame(); + expect(output).toContain('Total Reviewed Suggestions:'); + expect(output).toContain('0'); + expect(output).toContain('Overall Agreement Rate:'); + expect(output).toContain('--'); + expect(output).toMatchSnapshot(); + }); +}); diff --git a/packages/cli/src/ui/components/ToolStatsDisplay.tsx b/packages/cli/src/ui/components/ToolStatsDisplay.tsx new file mode 100644 index 00000000..f2335d9e --- /dev/null +++ b/packages/cli/src/ui/components/ToolStatsDisplay.tsx @@ -0,0 +1,208 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import React from 'react'; +import { Box, Text } from 'ink'; +import { Colors } from '../colors.js'; +import { formatDuration } from '../utils/formatters.js'; +import { + getStatusColor, + TOOL_SUCCESS_RATE_HIGH, + TOOL_SUCCESS_RATE_MEDIUM, + USER_AGREEMENT_RATE_HIGH, + USER_AGREEMENT_RATE_MEDIUM, +} from '../utils/displayUtils.js'; +import { useSessionStats } from '../contexts/SessionContext.js'; +import { ToolCallStats } from '@google/gemini-cli-core'; + +const TOOL_NAME_COL_WIDTH = 25; +const CALLS_COL_WIDTH = 8; +const SUCCESS_RATE_COL_WIDTH = 15; +const AVG_DURATION_COL_WIDTH = 15; + +const StatRow: React.FC<{ + name: string; + stats: ToolCallStats; +}> = ({ name, stats }) => { + const successRate = stats.count > 0 ? (stats.success / stats.count) * 100 : 0; + const avgDuration = stats.count > 0 ? stats.durationMs / stats.count : 0; + const successColor = getStatusColor(successRate, { + green: TOOL_SUCCESS_RATE_HIGH, + yellow: TOOL_SUCCESS_RATE_MEDIUM, + }); + + return ( + + + {name} + + + {stats.count} + + + {successRate.toFixed(1)}% + + + {formatDuration(avgDuration)} + + + ); +}; + +export const ToolStatsDisplay: React.FC = () => { + const { stats } = useSessionStats(); + const { tools } = stats.metrics; + const activeTools = Object.entries(tools.byName).filter( + ([, metrics]) => metrics.count > 0, + ); + + if (activeTools.length === 0) { + return ( + + No tool calls have been made in this session. + + ); + } + + const totalDecisions = Object.values(tools.byName).reduce( + (acc, tool) => { + acc.accept += tool.decisions.accept; + acc.reject += tool.decisions.reject; + acc.modify += tool.decisions.modify; + return acc; + }, + { accept: 0, reject: 0, modify: 0 }, + ); + + const totalReviewed = + totalDecisions.accept + totalDecisions.reject + totalDecisions.modify; + const agreementRate = + totalReviewed > 0 ? (totalDecisions.accept / totalReviewed) * 100 : 0; + const agreementColor = getStatusColor(agreementRate, { + green: USER_AGREEMENT_RATE_HIGH, + yellow: USER_AGREEMENT_RATE_MEDIUM, + }); + + return ( + + + Tool Stats For Nerds + + + + {/* Header */} + + + Tool Name + + + Calls + + + Success Rate + + + Avg Duration + + + + {/* Divider */} + + + {/* Tool Rows */} + {activeTools.map(([name, stats]) => ( + + ))} + + + + {/* User Decision Summary */} + User Decision Summary + + + Total Reviewed Suggestions: + + + {totalReviewed} + + + + + » Accepted: + + + {totalDecisions.accept} + + + + + » Rejected: + + + {totalDecisions.reject} + + + + + » Modified: + + + {totalDecisions.modify} + + + + {/* Divider */} + + + + + Overall Agreement Rate: + + + 0 ? agreementColor : undefined}> + {totalReviewed > 0 ? `${agreementRate.toFixed(1)}%` : '--'} + + + + + ); +}; diff --git a/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap new file mode 100644 index 00000000..efc0862b --- /dev/null +++ b/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap @@ -0,0 +1,121 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[` > should display a single model correctly 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Model Stats For Nerds │ +│ │ +│ Metric gemini-2.5-pro │ +│ ────────────────────────────────────────────────────────────────────────────────────────────── │ +│ API │ +│ Requests 1 │ +│ Errors 0 (0.0%) │ +│ Avg Latency 100ms │ +│ │ +│ Tokens │ +│ Total 30 │ +│ ↳ Prompt 10 │ +│ ↳ Cached 5 (50.0%) │ +│ ↳ Thoughts 2 │ +│ ↳ Tool 1 │ +│ ↳ Output 20 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should display conditional rows if at least one model has data 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Model Stats For Nerds │ +│ │ +│ Metric gemini-2.5-pro gemini-2.5-flash │ +│ ────────────────────────────────────────────────────────────────────────────────────────────── │ +│ API │ +│ Requests 1 1 │ +│ Errors 0 (0.0%) 0 (0.0%) │ +│ Avg Latency 100ms 50ms │ +│ │ +│ Tokens │ +│ Total 30 15 │ +│ ↳ Prompt 10 5 │ +│ ↳ Cached 5 (50.0%) 0 (0.0%) │ +│ ↳ Thoughts 2 0 │ +│ ↳ Tool 0 3 │ +│ ↳ Output 20 10 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should display stats for multiple models correctly 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Model Stats For Nerds │ +│ │ +│ Metric gemini-2.5-pro gemini-2.5-flash │ +│ ────────────────────────────────────────────────────────────────────────────────────────────── │ +│ API │ +│ Requests 10 20 │ +│ Errors 1 (10.0%) 2 (10.0%) │ +│ Avg Latency 100ms 25ms │ +│ │ +│ Tokens │ +│ Total 300 600 │ +│ ↳ Prompt 100 200 │ +│ ↳ Cached 50 (50.0%) 100 (50.0%) │ +│ ↳ Thoughts 10 20 │ +│ ↳ Tool 5 10 │ +│ ↳ Output 200 400 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should handle large values without wrapping or overlapping 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Model Stats For Nerds │ +│ │ +│ Metric gemini-2.5-pro │ +│ ────────────────────────────────────────────────────────────────────────────────────────────── │ +│ API │ +│ Requests 999,999,999 │ +│ Errors 123,456,789 (12.3%) │ +│ Avg Latency 0ms │ +│ │ +│ Tokens │ +│ Total 999,999,999 │ +│ ↳ Prompt 987,654,321 │ +│ ↳ Cached 123,456,789 (12.5%) │ +│ ↳ Thoughts 111,111,111 │ +│ ↳ Tool 222,222,222 │ +│ ↳ Output 123,456,789 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should not display conditional rows if no model has data for them 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Model Stats For Nerds │ +│ │ +│ Metric gemini-2.5-pro │ +│ ────────────────────────────────────────────────────────────────────────────────────────────── │ +│ API │ +│ Requests 1 │ +│ Errors 0 (0.0%) │ +│ Avg Latency 100ms │ +│ │ +│ Tokens │ +│ Total 30 │ +│ ↳ Prompt 10 │ +│ ↳ Output 20 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should render "no API calls" message when there are no active models 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ No API calls have been made in this session. │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; diff --git a/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap index 3d2c373c..06dc2116 100644 --- a/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap @@ -1,43 +1,45 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[` > renders correctly with given stats and duration 1`] = ` +exports[` > correctly sums and displays stats from multiple models 1`] = ` "╭─────────────────────────────────────╮ │ │ │ Agent powering down. Goodbye! │ │ │ │ │ -│ Cumulative Stats (10 Turns) │ +│ Cumulative Stats (15 API calls) │ │ │ -│ Input Tokens 1,000 │ -│ Output Tokens 2,000 │ -│ Tool Use Tokens 200 │ -│ Thoughts Tokens 300 │ -│ Cached Tokens 500 (14.3%) │ +│ Input Tokens 1,500 │ +│ Output Tokens 3,000 │ +│ Tool Use Tokens 220 │ +│ Thoughts Tokens 350 │ +│ Cached Tokens 600 (12.0%) │ │ ───────────────────────────────── │ -│ Total Tokens 3,500 │ +│ Total Tokens 5,000 │ │ │ -│ Total duration (API) 50.2s │ +│ Total duration (API) 1m 2s │ +│ Total duration (Tools) 0s │ │ Total duration (wall) 1h 23m 45s │ │ │ ╰─────────────────────────────────────╯" `; exports[` > renders zero state correctly 1`] = ` -"╭─────────────────────────────────╮ -│ │ -│ Agent powering down. Goodbye! │ -│ │ -│ │ -│ Cumulative Stats (0 Turns) │ -│ │ -│ Input Tokens 0 │ -│ Output Tokens 0 │ -│ Thoughts Tokens 0 │ -│ ────────────────────────── │ -│ Total Tokens 0 │ -│ │ -│ Total duration (API) 0s │ -│ Total duration (wall) 0s │ -│ │ -╰─────────────────────────────────╯" +"╭─────────────────────────────────────╮ +│ │ +│ Agent powering down. Goodbye! │ +│ │ +│ │ +│ Cumulative Stats (0 API calls) │ +│ │ +│ Input Tokens 0 │ +│ Output Tokens 0 │ +│ Thoughts Tokens 0 │ +│ ───────────────────────────────── │ +│ Total Tokens 0 │ +│ │ +│ Total duration (API) 0s │ +│ Total duration (Tools) 0s │ +│ Total duration (wall) 1h 23m 45s │ +│ │ +╰─────────────────────────────────────╯" `; diff --git a/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap index b8a070a3..6fc2565e 100644 --- a/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap @@ -1,41 +1,163 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[` > renders correctly with given stats and duration 1`] = ` +exports[` > Conditional Color Tests > renders success rate in green for high values 1`] = ` "╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ │ │ -│ Stats │ +│ Session Stats │ │ │ -│ Last Turn Cumulative (10 Turns) │ +│ Interaction Summary │ +│ Tool Calls: 10 ( ✔ 10 ✖ 0 ) │ +│ Success Rate: 100.0% │ │ │ -│ Input Tokens 100 Input Tokens 1,000 │ -│ Output Tokens 200 Output Tokens 2,000 │ -│ Tool Use Tokens 20 Tool Use Tokens 200 │ -│ Thoughts Tokens 30 Thoughts Tokens 300 │ -│ Cached Tokens 50 Cached Tokens 500 (14.3%) │ -│ ───────────────────────────────────────────── ───────────────────────────────────────────── │ -│ Total Tokens 350 Total Tokens 3,500 │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 0s │ +│ » API Time: 0s (0.0%) │ +│ » Tool Time: 0s (0.0%) │ │ │ -│ Turn Duration (API) 1.2s Total duration (API) 50.2s │ -│ Total duration (wall) 1h 23m 45s │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; -exports[` > renders zero state correctly 1`] = ` +exports[` > Conditional Color Tests > renders success rate in red for low values 1`] = ` "╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ │ │ -│ Stats │ +│ Session Stats │ │ │ -│ Last Turn Cumulative (0 Turns) │ +│ Interaction Summary │ +│ Tool Calls: 10 ( ✔ 5 ✖ 5 ) │ +│ Success Rate: 50.0% │ │ │ -│ Input Tokens 0 Input Tokens 0 │ -│ Output Tokens 0 Output Tokens 0 │ -│ Thoughts Tokens 0 Thoughts Tokens 0 │ -│ ───────────────────────────────────────────── ───────────────────────────────────────────── │ -│ Total Tokens 0 Total Tokens 0 │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 0s │ +│ » API Time: 0s (0.0%) │ +│ » Tool Time: 0s (0.0%) │ +│ │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > Conditional Color Tests > renders success rate in yellow for medium values 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Interaction Summary │ +│ Tool Calls: 10 ( ✔ 9 ✖ 1 ) │ +│ Success Rate: 90.0% │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 0s │ +│ » API Time: 0s (0.0%) │ +│ » Tool Time: 0s (0.0%) │ +│ │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > Conditional Rendering Tests > hides Efficiency section when cache is not used 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 100ms │ +│ » API Time: 100ms (100.0%) │ +│ » Tool Time: 0s (0.0%) │ +│ │ +│ │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ─────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 1 100 100 │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > Conditional Rendering Tests > hides User Agreement when no decisions are made 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Interaction Summary │ +│ Tool Calls: 2 ( ✔ 1 ✖ 1 ) │ +│ Success Rate: 50.0% │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 123ms │ +│ » API Time: 0s (0.0%) │ +│ » Tool Time: 123ms (100.0%) │ +│ │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > renders a table with two models correctly 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 19.5s │ +│ » API Time: 19.5s (100.0%) │ +│ » Tool Time: 0s (0.0%) │ +│ │ +│ │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ─────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 3 1,000 2,000 │ +│ gemini-2.5-flash 5 25,000 15,000 │ +│ │ +│ Savings Highlight: 10,500 (40.4%) of input tokens were served from the cache, reducing costs. │ +│ │ +│ » Tip: For a full token breakdown, run \`/stats model\`. │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > renders all sections when all data is present 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Interaction Summary │ +│ Tool Calls: 2 ( ✔ 1 ✖ 1 ) │ +│ Success Rate: 50.0% │ +│ User Agreement: 100.0% (1 reviewed) │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 223ms │ +│ » API Time: 100ms (44.8%) │ +│ » Tool Time: 123ms (55.2%) │ +│ │ +│ │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ─────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 1 100 100 │ +│ │ +│ Savings Highlight: 50 (50.0%) of input tokens were served from the cache, reducing costs. │ +│ │ +│ » Tip: For a full token breakdown, run \`/stats model\`. │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > renders only the Performance section in its zero state 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ Session Stats │ +│ │ +│ Performance │ +│ Wall Time: 1s │ +│ Agent Active: 0s │ +│ » API Time: 0s (0.0%) │ +│ » Tool Time: 0s (0.0%) │ │ │ -│ Turn Duration (API) 0s Total duration (API) 0s │ -│ Total duration (wall) 0s │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; diff --git a/packages/cli/src/ui/components/__snapshots__/ToolStatsDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ToolStatsDisplay.test.tsx.snap new file mode 100644 index 00000000..61fb3efc --- /dev/null +++ b/packages/cli/src/ui/components/__snapshots__/ToolStatsDisplay.test.tsx.snap @@ -0,0 +1,91 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[` > should display stats for a single tool correctly 1`] = ` +"╭────────────────────────────────────────────────────────────────────╮ +│ │ +│ Tool Stats For Nerds │ +│ │ +│ Tool Name Calls Success Rate Avg Duration │ +│ ──────────────────────────────────────────────────────────────── │ +│ test-tool 1 100.0% 100ms │ +│ │ +│ User Decision Summary │ +│ Total Reviewed Suggestions: 1 │ +│ » Accepted: 1 │ +│ » Rejected: 0 │ +│ » Modified: 0 │ +│ ──────────────────────────────────────────────────────────────── │ +│ Overall Agreement Rate: 100.0% │ +│ │ +╰────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should display stats for multiple tools correctly 1`] = ` +"╭────────────────────────────────────────────────────────────────────╮ +│ │ +│ Tool Stats For Nerds │ +│ │ +│ Tool Name Calls Success Rate Avg Duration │ +│ ──────────────────────────────────────────────────────────────── │ +│ tool-a 2 50.0% 100ms │ +│ tool-b 1 100.0% 100ms │ +│ │ +│ User Decision Summary │ +│ Total Reviewed Suggestions: 3 │ +│ » Accepted: 1 │ +│ » Rejected: 1 │ +│ » Modified: 1 │ +│ ──────────────────────────────────────────────────────────────── │ +│ Overall Agreement Rate: 33.3% │ +│ │ +╰────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should handle large values without wrapping or overlapping 1`] = ` +"╭────────────────────────────────────────────────────────────────────╮ +│ │ +│ Tool Stats For Nerds │ +│ │ +│ Tool Name Calls Success Rate Avg Duration │ +│ ──────────────────────────────────────────────────────────────── │ +│ long-named-tool-for-testi99999999 88.9% 1ms │ +│ ng-wrapping-and-such 9 │ +│ │ +│ User Decision Summary │ +│ Total Reviewed Suggestions: 222234566 │ +│ » Accepted: 123456789 │ +│ » Rejected: 98765432 │ +│ » Modified: 12345 │ +│ ──────────────────────────────────────────────────────────────── │ +│ Overall Agreement Rate: 55.6% │ +│ │ +╰────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should handle zero decisions gracefully 1`] = ` +"╭────────────────────────────────────────────────────────────────────╮ +│ │ +│ Tool Stats For Nerds │ +│ │ +│ Tool Name Calls Success Rate Avg Duration │ +│ ──────────────────────────────────────────────────────────────── │ +│ test-tool 1 100.0% 100ms │ +│ │ +│ User Decision Summary │ +│ Total Reviewed Suggestions: 0 │ +│ » Accepted: 0 │ +│ » Rejected: 0 │ +│ » Modified: 0 │ +│ ──────────────────────────────────────────────────────────────── │ +│ Overall Agreement Rate: -- │ +│ │ +╰────────────────────────────────────────────────────────────────────╯" +`; + +exports[` > should render "no tool calls" message when there are no active tools 1`] = ` +"╭──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ No tool calls have been made in this session. │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" +`; diff --git a/packages/cli/src/ui/contexts/SessionContext.test.tsx b/packages/cli/src/ui/contexts/SessionContext.test.tsx index fedb5341..5b05c284 100644 --- a/packages/cli/src/ui/contexts/SessionContext.test.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.test.tsx @@ -8,28 +8,13 @@ import { type MutableRefObject } from 'react'; import { render } from 'ink-testing-library'; import { renderHook } from '@testing-library/react'; import { act } from 'react-dom/test-utils'; -import { SessionStatsProvider, useSessionStats } from './SessionContext.js'; +import { + SessionStatsProvider, + useSessionStats, + SessionMetrics, +} from './SessionContext.js'; import { describe, it, expect, vi } from 'vitest'; -import { GenerateContentResponseUsageMetadata } from '@google/genai'; - -// Mock data that simulates what the Gemini API would return. -const mockMetadata1: GenerateContentResponseUsageMetadata = { - promptTokenCount: 100, - candidatesTokenCount: 200, - totalTokenCount: 300, - cachedContentTokenCount: 50, - toolUsePromptTokenCount: 10, - thoughtsTokenCount: 20, -}; - -const mockMetadata2: GenerateContentResponseUsageMetadata = { - promptTokenCount: 10, - candidatesTokenCount: 20, - totalTokenCount: 30, - cachedContentTokenCount: 5, - toolUsePromptTokenCount: 1, - thoughtsTokenCount: 2, -}; +import { uiTelemetryService } from '@google/gemini-cli-core'; /** * A test harness component that uses the hook and exposes the context value @@ -60,13 +45,11 @@ describe('SessionStatsContext', () => { const stats = contextRef.current?.stats; expect(stats?.sessionStartTime).toBeInstanceOf(Date); - expect(stats?.currentTurn).toBeDefined(); - expect(stats?.cumulative.turnCount).toBe(0); - expect(stats?.cumulative.totalTokenCount).toBe(0); - expect(stats?.cumulative.promptTokenCount).toBe(0); + expect(stats?.metrics).toBeDefined(); + expect(stats?.metrics.models).toEqual({}); }); - it('should increment turnCount when startNewTurn is called', () => { + it('should update metrics when the uiTelemetryService emits an update', () => { const contextRef: MutableRefObject< ReturnType | undefined > = { current: undefined }; @@ -77,150 +60,60 @@ describe('SessionStatsContext', () => { , ); + const newMetrics: SessionMetrics = { + models: { + 'gemini-pro': { + api: { + totalRequests: 1, + totalErrors: 0, + totalLatencyMs: 123, + }, + tokens: { + prompt: 100, + candidates: 200, + total: 300, + cached: 50, + thoughts: 20, + tool: 10, + }, + }, + }, + tools: { + totalCalls: 1, + totalSuccess: 1, + totalFail: 0, + totalDurationMs: 456, + totalDecisions: { + accept: 1, + reject: 0, + modify: 0, + }, + byName: { + 'test-tool': { + count: 1, + success: 1, + fail: 0, + durationMs: 456, + decisions: { + accept: 1, + reject: 0, + modify: 0, + }, + }, + }, + }, + }; + act(() => { - contextRef.current?.startNewTurn(); + uiTelemetryService.emit('update', { + metrics: newMetrics, + lastPromptTokenCount: 100, + }); }); const stats = contextRef.current?.stats; - expect(stats?.currentTurn.totalTokenCount).toBe(0); - expect(stats?.cumulative.turnCount).toBe(1); - // Ensure token counts are unaffected - expect(stats?.cumulative.totalTokenCount).toBe(0); - }); - - it('should aggregate token usage correctly when addUsage is called', () => { - const contextRef: MutableRefObject< - ReturnType | undefined - > = { current: undefined }; - - render( - - - , - ); - - act(() => { - contextRef.current?.addUsage({ ...mockMetadata1, apiTimeMs: 123 }); - }); - - const stats = contextRef.current?.stats; - - // Check that token counts are updated - expect(stats?.cumulative.totalTokenCount).toBe( - mockMetadata1.totalTokenCount ?? 0, - ); - expect(stats?.cumulative.promptTokenCount).toBe( - mockMetadata1.promptTokenCount ?? 0, - ); - expect(stats?.cumulative.apiTimeMs).toBe(123); - - // Check that turn count is NOT incremented - expect(stats?.cumulative.turnCount).toBe(0); - - // Check that currentTurn is updated - expect(stats?.currentTurn?.totalTokenCount).toEqual( - mockMetadata1.totalTokenCount, - ); - expect(stats?.currentTurn?.apiTimeMs).toBe(123); - }); - - it('should correctly track a full logical turn with multiple API calls', () => { - const contextRef: MutableRefObject< - ReturnType | undefined - > = { current: undefined }; - - render( - - - , - ); - - // 1. User starts a new turn - act(() => { - contextRef.current?.startNewTurn(); - }); - - // 2. First API call (e.g., prompt with a tool request) - act(() => { - contextRef.current?.addUsage({ ...mockMetadata1, apiTimeMs: 100 }); - }); - - // 3. Second API call (e.g., sending tool response back) - act(() => { - contextRef.current?.addUsage({ ...mockMetadata2, apiTimeMs: 50 }); - }); - - const stats = contextRef.current?.stats; - - // Turn count should only be 1 - expect(stats?.cumulative.turnCount).toBe(1); - - // --- Check Cumulative Stats --- - // These fields should be the SUM of both calls - expect(stats?.cumulative.totalTokenCount).toBe(300 + 30); - expect(stats?.cumulative.candidatesTokenCount).toBe(200 + 20); - expect(stats?.cumulative.thoughtsTokenCount).toBe(20 + 2); - expect(stats?.cumulative.apiTimeMs).toBe(100 + 50); - - // These fields should be the SUM of both calls - expect(stats?.cumulative.promptTokenCount).toBe(100 + 10); - expect(stats?.cumulative.cachedContentTokenCount).toBe(50 + 5); - expect(stats?.cumulative.toolUsePromptTokenCount).toBe(10 + 1); - - // --- Check Current Turn Stats --- - // All fields should be the SUM of both calls for the turn - expect(stats?.currentTurn.totalTokenCount).toBe(300 + 30); - expect(stats?.currentTurn.candidatesTokenCount).toBe(200 + 20); - expect(stats?.currentTurn.thoughtsTokenCount).toBe(20 + 2); - expect(stats?.currentTurn.promptTokenCount).toBe(100 + 10); - expect(stats?.currentTurn.cachedContentTokenCount).toBe(50 + 5); - expect(stats?.currentTurn.toolUsePromptTokenCount).toBe(10 + 1); - expect(stats?.currentTurn.apiTimeMs).toBe(100 + 50); - }); - - it('should overwrite currentResponse with each API call', () => { - const contextRef: MutableRefObject< - ReturnType | undefined - > = { current: undefined }; - - render( - - - , - ); - - // 1. First API call - act(() => { - contextRef.current?.addUsage({ ...mockMetadata1, apiTimeMs: 100 }); - }); - - let stats = contextRef.current?.stats; - - // currentResponse should match the first call - expect(stats?.currentResponse.totalTokenCount).toBe(300); - expect(stats?.currentResponse.apiTimeMs).toBe(100); - - // 2. Second API call - act(() => { - contextRef.current?.addUsage({ ...mockMetadata2, apiTimeMs: 50 }); - }); - - stats = contextRef.current?.stats; - - // currentResponse should now match the second call - expect(stats?.currentResponse.totalTokenCount).toBe(30); - expect(stats?.currentResponse.apiTimeMs).toBe(50); - - // 3. Start a new turn - act(() => { - contextRef.current?.startNewTurn(); - }); - - stats = contextRef.current?.stats; - - // currentResponse should be reset - expect(stats?.currentResponse.totalTokenCount).toBe(0); - expect(stats?.currentResponse.apiTimeMs).toBe(0); + expect(stats?.metrics).toEqual(newMetrics); + expect(stats?.lastPromptTokenCount).toBe(100); }); it('should throw an error when useSessionStats is used outside of a provider', () => { diff --git a/packages/cli/src/ui/contexts/SessionContext.tsx b/packages/cli/src/ui/contexts/SessionContext.tsx index f59e17e1..b89d19e7 100644 --- a/packages/cli/src/ui/contexts/SessionContext.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.tsx @@ -9,39 +9,43 @@ import React, { useContext, useState, useMemo, - useCallback, + useEffect, } from 'react'; -import { type GenerateContentResponseUsageMetadata } from '@google/genai'; +import { + uiTelemetryService, + SessionMetrics, + ModelMetrics, +} from '@google/gemini-cli-core'; // --- Interface Definitions --- -export interface CumulativeStats { - turnCount: number; - promptTokenCount: number; - candidatesTokenCount: number; - totalTokenCount: number; - cachedContentTokenCount: number; - toolUsePromptTokenCount: number; - thoughtsTokenCount: number; - apiTimeMs: number; -} +export type { SessionMetrics, ModelMetrics }; interface SessionStatsState { sessionStartTime: Date; - cumulative: CumulativeStats; - currentTurn: CumulativeStats; - currentResponse: CumulativeStats; + metrics: SessionMetrics; + lastPromptTokenCount: number; +} + +export interface ComputedSessionStats { + totalApiTime: number; + totalToolTime: number; + agentActiveTime: number; + apiTimePercent: number; + toolTimePercent: number; + cacheEfficiency: number; + totalDecisions: number; + successRate: number; + agreementRate: number; + totalCachedTokens: number; + totalPromptTokens: number; } // Defines the final "value" of our context, including the state // and the functions to update it. interface SessionStatsContextValue { stats: SessionStatsState; - startNewTurn: () => void; - addUsage: ( - metadata: GenerateContentResponseUsageMetadata & { apiTimeMs?: number }, - ) => void; } // --- Context Definition --- @@ -50,27 +54,6 @@ const SessionStatsContext = createContext( undefined, ); -// --- Helper Functions --- - -/** - * A small, reusable helper function to sum token counts. - * It unconditionally adds all token values from the source to the target. - * @param target The object to add the tokens to (e.g., cumulative, currentTurn). - * @param source The metadata object from the API response. - */ -const addTokens = ( - target: CumulativeStats, - source: GenerateContentResponseUsageMetadata & { apiTimeMs?: number }, -) => { - target.candidatesTokenCount += source.candidatesTokenCount ?? 0; - target.thoughtsTokenCount += source.thoughtsTokenCount ?? 0; - target.totalTokenCount += source.totalTokenCount ?? 0; - target.apiTimeMs += source.apiTimeMs ?? 0; - target.promptTokenCount += source.promptTokenCount ?? 0; - target.cachedContentTokenCount += source.cachedContentTokenCount ?? 0; - target.toolUsePromptTokenCount += source.toolUsePromptTokenCount ?? 0; -}; - // --- Provider Component --- export const SessionStatsProvider: React.FC<{ children: React.ReactNode }> = ({ @@ -78,110 +61,42 @@ export const SessionStatsProvider: React.FC<{ children: React.ReactNode }> = ({ }) => { const [stats, setStats] = useState({ sessionStartTime: new Date(), - cumulative: { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }, - currentTurn: { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }, - currentResponse: { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }, + metrics: uiTelemetryService.getMetrics(), + lastPromptTokenCount: 0, }); - // A single, internal worker function to handle all metadata aggregation. - const aggregateTokens = useCallback( - ( - metadata: GenerateContentResponseUsageMetadata & { apiTimeMs?: number }, - ) => { - setStats((prevState) => { - const newCumulative = { ...prevState.cumulative }; - const newCurrentTurn = { ...prevState.currentTurn }; - const newCurrentResponse = { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }; + useEffect(() => { + const handleUpdate = ({ + metrics, + lastPromptTokenCount, + }: { + metrics: SessionMetrics; + lastPromptTokenCount: number; + }) => { + setStats((prevState) => ({ + ...prevState, + metrics, + lastPromptTokenCount, + })); + }; - // Add all tokens to the current turn's stats as well as cumulative stats. - addTokens(newCurrentTurn, metadata); - addTokens(newCumulative, metadata); - addTokens(newCurrentResponse, metadata); + uiTelemetryService.on('update', handleUpdate); + // Set initial state + handleUpdate({ + metrics: uiTelemetryService.getMetrics(), + lastPromptTokenCount: uiTelemetryService.getLastPromptTokenCount(), + }); - return { - ...prevState, - cumulative: newCumulative, - currentTurn: newCurrentTurn, - currentResponse: newCurrentResponse, - }; - }); - }, - [], - ); - - const startNewTurn = useCallback(() => { - setStats((prevState) => ({ - ...prevState, - cumulative: { - ...prevState.cumulative, - turnCount: prevState.cumulative.turnCount + 1, - }, - currentTurn: { - turnCount: 0, // Reset for the new turn's accumulation. - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }, - currentResponse: { - turnCount: 0, - promptTokenCount: 0, - candidatesTokenCount: 0, - totalTokenCount: 0, - cachedContentTokenCount: 0, - toolUsePromptTokenCount: 0, - thoughtsTokenCount: 0, - apiTimeMs: 0, - }, - })); + return () => { + uiTelemetryService.off('update', handleUpdate); + }; }, []); const value = useMemo( () => ({ stats, - startNewTurn, - addUsage: aggregateTokens, }), - [stats, startNewTurn, aggregateTokens], + [stats], ); return ( diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts index 01954670..d10ae22b 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts @@ -296,19 +296,9 @@ describe('useSlashCommandProcessor', () => { describe('/stats command', () => { it('should show detailed session statistics', async () => { // Arrange - const cumulativeStats = { - totalTokenCount: 900, - promptTokenCount: 200, - candidatesTokenCount: 400, - cachedContentTokenCount: 100, - turnCount: 1, - toolUsePromptTokenCount: 50, - thoughtsTokenCount: 150, - }; mockUseSessionStats.mockReturnValue({ stats: { sessionStartTime: new Date('2025-01-01T00:00:00.000Z'), - cumulative: cumulativeStats, }, }); @@ -326,7 +316,6 @@ describe('useSlashCommandProcessor', () => { 2, // Called after the user message expect.objectContaining({ type: MessageType.STATS, - stats: cumulativeStats, duration: '1h 2m 3s', }), expect.any(Number), @@ -334,6 +323,44 @@ describe('useSlashCommandProcessor', () => { vi.useRealTimers(); }); + + it('should show model-specific statistics when using /stats model', async () => { + // Arrange + const { handleSlashCommand } = getProcessor(); + + // Act + await act(async () => { + handleSlashCommand('/stats model'); + }); + + // Assert + expect(mockAddItem).toHaveBeenNthCalledWith( + 2, // Called after the user message + expect.objectContaining({ + type: MessageType.MODEL_STATS, + }), + expect.any(Number), + ); + }); + + it('should show tool-specific statistics when using /stats tools', async () => { + // Arrange + const { handleSlashCommand } = getProcessor(); + + // Act + await act(async () => { + handleSlashCommand('/stats tools'); + }); + + // Assert + expect(mockAddItem).toHaveBeenNthCalledWith( + 2, // Called after the user message + expect.objectContaining({ + type: MessageType.TOOL_STATS, + }), + expect.any(Number), + ); + }); }); describe('/about command', () => { @@ -598,7 +625,6 @@ describe('useSlashCommandProcessor', () => { }, { type: 'quit', - stats: expect.any(Object), duration: '1h 2m 3s', id: expect.any(Number), }, diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts index b7dcbdcb..ffc3d7d1 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts @@ -110,14 +110,19 @@ export const useSlashCommandProcessor = ( } else if (message.type === MessageType.STATS) { historyItemContent = { type: 'stats', - stats: message.stats, - lastTurnStats: message.lastTurnStats, duration: message.duration, }; + } else if (message.type === MessageType.MODEL_STATS) { + historyItemContent = { + type: 'model_stats', + }; + } else if (message.type === MessageType.TOOL_STATS) { + historyItemContent = { + type: 'tool_stats', + }; } else if (message.type === MessageType.QUIT) { historyItemContent = { type: 'quit', - stats: message.stats, duration: message.duration, }; } else if (message.type === MessageType.COMPRESSION) { @@ -262,16 +267,28 @@ export const useSlashCommandProcessor = ( { name: 'stats', altName: 'usage', - description: 'check session stats', - action: (_mainCommand, _subCommand, _args) => { + description: 'check session stats. Usage: /stats [model|tools]', + action: (_mainCommand, subCommand, _args) => { + if (subCommand === 'model') { + addMessage({ + type: MessageType.MODEL_STATS, + timestamp: new Date(), + }); + return; + } else if (subCommand === 'tools') { + addMessage({ + type: MessageType.TOOL_STATS, + timestamp: new Date(), + }); + return; + } + const now = new Date(); - const { sessionStartTime, cumulative, currentTurn } = session.stats; + const { sessionStartTime } = session.stats; const wallDuration = now.getTime() - sessionStartTime.getTime(); addMessage({ type: MessageType.STATS, - stats: cumulative, - lastTurnStats: currentTurn, duration: formatDuration(wallDuration), timestamp: new Date(), }); @@ -805,7 +822,7 @@ export const useSlashCommandProcessor = ( description: 'exit the cli', action: async (mainCommand, _subCommand, _args) => { const now = new Date(); - const { sessionStartTime, cumulative } = session.stats; + const { sessionStartTime } = session.stats; const wallDuration = now.getTime() - sessionStartTime.getTime(); setQuittingMessages([ @@ -816,7 +833,6 @@ export const useSlashCommandProcessor = ( }, { type: 'quit', - stats: cumulative, duration: formatDuration(wallDuration), id: now.getTime(), }, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 0c8b261e..9751f470 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -604,78 +604,6 @@ describe('useGeminiStream', () => { }); }); - describe('Session Stats Integration', () => { - it('should call startNewTurn and addUsage for a simple prompt', async () => { - const mockMetadata = { totalTokenCount: 123 }; - const mockStream = (async function* () { - yield { type: 'content', value: 'Response' }; - yield { type: 'usage_metadata', value: mockMetadata }; - })(); - mockSendMessageStream.mockReturnValue(mockStream); - - const { result } = renderTestHook(); - - await act(async () => { - await result.current.submitQuery('Hello, world!'); - }); - - expect(mockStartNewTurn).toHaveBeenCalledTimes(1); - expect(mockAddUsage).toHaveBeenCalledTimes(1); - expect(mockAddUsage).toHaveBeenCalledWith(mockMetadata); - }); - - it('should only call addUsage for a tool continuation prompt', async () => { - const mockMetadata = { totalTokenCount: 456 }; - const mockStream = (async function* () { - yield { type: 'content', value: 'Final Answer' }; - yield { type: 'usage_metadata', value: mockMetadata }; - })(); - mockSendMessageStream.mockReturnValue(mockStream); - - const { result } = renderTestHook(); - - await act(async () => { - await result.current.submitQuery([{ text: 'tool response' }], { - isContinuation: true, - }); - }); - - expect(mockStartNewTurn).not.toHaveBeenCalled(); - expect(mockAddUsage).toHaveBeenCalledTimes(1); - expect(mockAddUsage).toHaveBeenCalledWith(mockMetadata); - }); - - it('should not call addUsage if the stream contains no usage metadata', async () => { - // Arrange: A stream that yields content but never a usage_metadata event - const mockStream = (async function* () { - yield { type: 'content', value: 'Some response text' }; - })(); - mockSendMessageStream.mockReturnValue(mockStream); - - const { result } = renderTestHook(); - - await act(async () => { - await result.current.submitQuery('Query with no usage data'); - }); - - expect(mockStartNewTurn).toHaveBeenCalledTimes(1); - expect(mockAddUsage).not.toHaveBeenCalled(); - }); - - it('should not call startNewTurn for a slash command', async () => { - mockHandleSlashCommand.mockReturnValue(true); - - const { result } = renderTestHook(); - - await act(async () => { - await result.current.submitQuery('/stats'); - }); - - expect(mockStartNewTurn).not.toHaveBeenCalled(); - expect(mockSendMessageStream).not.toHaveBeenCalled(); - }); - }); - it('should not flicker streaming state to Idle between tool completion and submission', async () => { const toolCallResponseParts: PartListUnion = [ { text: 'tool 1 final response' }, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 3d24ede7..e2226761 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -51,7 +51,6 @@ import { TrackedCompletedToolCall, TrackedCancelledToolCall, } from './useReactToolScheduler.js'; -import { useSessionStats } from '../contexts/SessionContext.js'; export function mergePartListUnions(list: PartListUnion[]): PartListUnion { const resultParts: PartListUnion = []; @@ -101,7 +100,6 @@ export const useGeminiStream = ( useStateAndRef(null); const processedMemoryToolsRef = useRef>(new Set()); const logger = useLogger(); - const { startNewTurn, addUsage } = useSessionStats(); const gitService = useMemo(() => { if (!config.getProjectRoot()) { return; @@ -461,9 +459,6 @@ export const useGeminiStream = ( case ServerGeminiEventType.ChatCompressed: handleChatCompressionEvent(event.value); break; - case ServerGeminiEventType.UsageMetadata: - addUsage(event.value); - break; case ServerGeminiEventType.ToolCallConfirmation: case ServerGeminiEventType.ToolCallResponse: // do nothing @@ -486,7 +481,6 @@ export const useGeminiStream = ( handleErrorEvent, scheduleToolCalls, handleChatCompressionEvent, - addUsage, ], ); @@ -516,10 +510,6 @@ export const useGeminiStream = ( return; } - if (!options?.isContinuation) { - startNewTurn(); - } - setIsResponding(true); setInitError(null); @@ -568,7 +558,6 @@ export const useGeminiStream = ( setPendingHistoryItem, setInitError, geminiClient, - startNewTurn, onAuthError, config, ], diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 66a465ba..dd78c0c9 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -8,7 +8,6 @@ import { ToolCallConfirmationDetails, ToolResultDisplay, } from '@google/gemini-cli-core'; -import { CumulativeStats } from './contexts/SessionContext.js'; // Only defining the state enum needed by the UI export enum StreamingState { @@ -100,14 +99,19 @@ export type HistoryItemAbout = HistoryItemBase & { export type HistoryItemStats = HistoryItemBase & { type: 'stats'; - stats: CumulativeStats; - lastTurnStats: CumulativeStats; duration: string; }; +export type HistoryItemModelStats = HistoryItemBase & { + type: 'model_stats'; +}; + +export type HistoryItemToolStats = HistoryItemBase & { + type: 'tool_stats'; +}; + export type HistoryItemQuit = HistoryItemBase & { type: 'quit'; - stats: CumulativeStats; duration: string; }; @@ -140,6 +144,8 @@ export type HistoryItemWithoutId = | HistoryItemAbout | HistoryItemToolGroup | HistoryItemStats + | HistoryItemModelStats + | HistoryItemToolStats | HistoryItemQuit | HistoryItemCompression; @@ -152,6 +158,8 @@ export enum MessageType { USER = 'user', ABOUT = 'about', STATS = 'stats', + MODEL_STATS = 'model_stats', + TOOL_STATS = 'tool_stats', QUIT = 'quit', GEMINI = 'gemini', COMPRESSION = 'compression', @@ -178,15 +186,22 @@ export type Message = | { type: MessageType.STATS; timestamp: Date; - stats: CumulativeStats; - lastTurnStats: CumulativeStats; duration: string; content?: string; } + | { + type: MessageType.MODEL_STATS; + timestamp: Date; + content?: string; + } + | { + type: MessageType.TOOL_STATS; + timestamp: Date; + content?: string; + } | { type: MessageType.QUIT; timestamp: Date; - stats: CumulativeStats; duration: string; content?: string; } diff --git a/packages/cli/src/ui/utils/computeStats.test.ts b/packages/cli/src/ui/utils/computeStats.test.ts new file mode 100644 index 00000000..0e32ffe2 --- /dev/null +++ b/packages/cli/src/ui/utils/computeStats.test.ts @@ -0,0 +1,247 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { + calculateAverageLatency, + calculateCacheHitRate, + calculateErrorRate, + computeSessionStats, +} from './computeStats.js'; +import { ModelMetrics, SessionMetrics } from '../contexts/SessionContext.js'; + +describe('calculateErrorRate', () => { + it('should return 0 if totalRequests is 0', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateErrorRate(metrics)).toBe(0); + }); + + it('should calculate the error rate correctly', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 10, totalErrors: 2, totalLatencyMs: 0 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateErrorRate(metrics)).toBe(20); + }); +}); + +describe('calculateAverageLatency', () => { + it('should return 0 if totalRequests is 0', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 1000 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateAverageLatency(metrics)).toBe(0); + }); + + it('should calculate the average latency correctly', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 10, totalErrors: 0, totalLatencyMs: 1500 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateAverageLatency(metrics)).toBe(150); + }); +}); + +describe('calculateCacheHitRate', () => { + it('should return 0 if prompt tokens is 0', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 100, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateCacheHitRate(metrics)).toBe(0); + }); + + it('should calculate the cache hit rate correctly', () => { + const metrics: ModelMetrics = { + api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, + tokens: { + prompt: 200, + candidates: 0, + total: 0, + cached: 50, + thoughts: 0, + tool: 0, + }, + }; + expect(calculateCacheHitRate(metrics)).toBe(25); + }); +}); + +describe('computeSessionStats', () => { + it('should return all zeros for initial empty metrics', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const result = computeSessionStats(metrics); + + expect(result).toEqual({ + totalApiTime: 0, + totalToolTime: 0, + agentActiveTime: 0, + apiTimePercent: 0, + toolTimePercent: 0, + cacheEfficiency: 0, + totalDecisions: 0, + successRate: 0, + agreementRate: 0, + totalPromptTokens: 0, + totalCachedTokens: 0, + }); + }); + + it('should correctly calculate API and tool time percentages', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-pro': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 750 }, + tokens: { + prompt: 10, + candidates: 10, + total: 20, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 1, + totalSuccess: 1, + totalFail: 0, + totalDurationMs: 250, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const result = computeSessionStats(metrics); + + expect(result.totalApiTime).toBe(750); + expect(result.totalToolTime).toBe(250); + expect(result.agentActiveTime).toBe(1000); + expect(result.apiTimePercent).toBe(75); + expect(result.toolTimePercent).toBe(25); + }); + + it('should correctly calculate cache efficiency', () => { + const metrics: SessionMetrics = { + models: { + 'gemini-pro': { + api: { totalRequests: 2, totalErrors: 0, totalLatencyMs: 1000 }, + tokens: { + prompt: 150, + candidates: 10, + total: 160, + cached: 50, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const result = computeSessionStats(metrics); + + expect(result.cacheEfficiency).toBeCloseTo(33.33); // 50 / 150 + }); + + it('should correctly calculate success and agreement rates', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 10, + totalSuccess: 8, + totalFail: 2, + totalDurationMs: 1000, + totalDecisions: { accept: 6, reject: 2, modify: 2 }, + byName: {}, + }, + }; + + const result = computeSessionStats(metrics); + + expect(result.successRate).toBe(80); // 8 / 10 + expect(result.agreementRate).toBe(60); // 6 / 10 + }); + + it('should handle division by zero gracefully', () => { + const metrics: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }, + }; + + const result = computeSessionStats(metrics); + + expect(result.apiTimePercent).toBe(0); + expect(result.toolTimePercent).toBe(0); + expect(result.cacheEfficiency).toBe(0); + expect(result.successRate).toBe(0); + expect(result.agreementRate).toBe(0); + }); +}); diff --git a/packages/cli/src/ui/utils/computeStats.ts b/packages/cli/src/ui/utils/computeStats.ts new file mode 100644 index 00000000..e0483c3b --- /dev/null +++ b/packages/cli/src/ui/utils/computeStats.ts @@ -0,0 +1,84 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + SessionMetrics, + ComputedSessionStats, + ModelMetrics, +} from '../contexts/SessionContext.js'; + +export function calculateErrorRate(metrics: ModelMetrics): number { + if (metrics.api.totalRequests === 0) { + return 0; + } + return (metrics.api.totalErrors / metrics.api.totalRequests) * 100; +} + +export function calculateAverageLatency(metrics: ModelMetrics): number { + if (metrics.api.totalRequests === 0) { + return 0; + } + return metrics.api.totalLatencyMs / metrics.api.totalRequests; +} + +export function calculateCacheHitRate(metrics: ModelMetrics): number { + if (metrics.tokens.prompt === 0) { + return 0; + } + return (metrics.tokens.cached / metrics.tokens.prompt) * 100; +} + +export const computeSessionStats = ( + metrics: SessionMetrics, +): ComputedSessionStats => { + const { models, tools } = metrics; + const totalApiTime = Object.values(models).reduce( + (acc, model) => acc + model.api.totalLatencyMs, + 0, + ); + const totalToolTime = tools.totalDurationMs; + const agentActiveTime = totalApiTime + totalToolTime; + const apiTimePercent = + agentActiveTime > 0 ? (totalApiTime / agentActiveTime) * 100 : 0; + const toolTimePercent = + agentActiveTime > 0 ? (totalToolTime / agentActiveTime) * 100 : 0; + + const totalCachedTokens = Object.values(models).reduce( + (acc, model) => acc + model.tokens.cached, + 0, + ); + const totalPromptTokens = Object.values(models).reduce( + (acc, model) => acc + model.tokens.prompt, + 0, + ); + const cacheEfficiency = + totalPromptTokens > 0 ? (totalCachedTokens / totalPromptTokens) * 100 : 0; + + const totalDecisions = + tools.totalDecisions.accept + + tools.totalDecisions.reject + + tools.totalDecisions.modify; + const successRate = + tools.totalCalls > 0 ? (tools.totalSuccess / tools.totalCalls) * 100 : 0; + const agreementRate = + totalDecisions > 0 + ? (tools.totalDecisions.accept / totalDecisions) * 100 + : 0; + + return { + totalApiTime, + totalToolTime, + agentActiveTime, + apiTimePercent, + toolTimePercent, + cacheEfficiency, + totalDecisions, + successRate, + agreementRate, + totalCachedTokens, + totalPromptTokens, + }; +}; diff --git a/packages/cli/src/ui/utils/displayUtils.test.ts b/packages/cli/src/ui/utils/displayUtils.test.ts new file mode 100644 index 00000000..7dd9f0e8 --- /dev/null +++ b/packages/cli/src/ui/utils/displayUtils.test.ts @@ -0,0 +1,58 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { + getStatusColor, + TOOL_SUCCESS_RATE_HIGH, + TOOL_SUCCESS_RATE_MEDIUM, + USER_AGREEMENT_RATE_HIGH, + USER_AGREEMENT_RATE_MEDIUM, + CACHE_EFFICIENCY_HIGH, + CACHE_EFFICIENCY_MEDIUM, +} from './displayUtils.js'; +import { Colors } from '../colors.js'; + +describe('displayUtils', () => { + describe('getStatusColor', () => { + const thresholds = { + green: 80, + yellow: 50, + }; + + it('should return green for values >= green threshold', () => { + expect(getStatusColor(90, thresholds)).toBe(Colors.AccentGreen); + expect(getStatusColor(80, thresholds)).toBe(Colors.AccentGreen); + }); + + it('should return yellow for values < green and >= yellow threshold', () => { + expect(getStatusColor(79, thresholds)).toBe(Colors.AccentYellow); + expect(getStatusColor(50, thresholds)).toBe(Colors.AccentYellow); + }); + + it('should return red for values < yellow threshold', () => { + expect(getStatusColor(49, thresholds)).toBe(Colors.AccentRed); + expect(getStatusColor(0, thresholds)).toBe(Colors.AccentRed); + }); + + it('should return defaultColor for values < yellow threshold when provided', () => { + expect( + getStatusColor(49, thresholds, { defaultColor: Colors.Foreground }), + ).toBe(Colors.Foreground); + }); + }); + + describe('Threshold Constants', () => { + it('should have the correct values', () => { + expect(TOOL_SUCCESS_RATE_HIGH).toBe(95); + expect(TOOL_SUCCESS_RATE_MEDIUM).toBe(85); + expect(USER_AGREEMENT_RATE_HIGH).toBe(75); + expect(USER_AGREEMENT_RATE_MEDIUM).toBe(45); + expect(CACHE_EFFICIENCY_HIGH).toBe(40); + expect(CACHE_EFFICIENCY_MEDIUM).toBe(15); + }); + }); +}); diff --git a/packages/cli/src/ui/utils/displayUtils.ts b/packages/cli/src/ui/utils/displayUtils.ts new file mode 100644 index 00000000..a52c6ff0 --- /dev/null +++ b/packages/cli/src/ui/utils/displayUtils.ts @@ -0,0 +1,32 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Colors } from '../colors.js'; + +// --- Thresholds --- +export const TOOL_SUCCESS_RATE_HIGH = 95; +export const TOOL_SUCCESS_RATE_MEDIUM = 85; + +export const USER_AGREEMENT_RATE_HIGH = 75; +export const USER_AGREEMENT_RATE_MEDIUM = 45; + +export const CACHE_EFFICIENCY_HIGH = 40; +export const CACHE_EFFICIENCY_MEDIUM = 15; + +// --- Color Logic --- +export const getStatusColor = ( + value: number, + thresholds: { green: number; yellow: number }, + options: { defaultColor?: string } = {}, +) => { + if (value >= thresholds.green) { + return Colors.AccentGreen; + } + if (value >= thresholds.yellow) { + return Colors.AccentYellow; + } + return options.defaultColor || Colors.AccentRed; +}; diff --git a/packages/cli/src/ui/utils/formatters.ts b/packages/cli/src/ui/utils/formatters.ts index 82a78109..2b6af545 100644 --- a/packages/cli/src/ui/utils/formatters.ts +++ b/packages/cli/src/ui/utils/formatters.ts @@ -27,7 +27,7 @@ export const formatDuration = (milliseconds: number): string => { } if (milliseconds < 1000) { - return `${milliseconds}ms`; + return `${Math.round(milliseconds)}ms`; } const totalSeconds = milliseconds / 1000; diff --git a/packages/core/src/core/turn.test.ts b/packages/core/src/core/turn.test.ts index 602a0b74..bfbd6e17 100644 --- a/packages/core/src/core/turn.test.ts +++ b/packages/core/src/core/turn.test.ts @@ -10,14 +10,8 @@ import { GeminiEventType, ServerGeminiToolCallRequestEvent, ServerGeminiErrorEvent, - ServerGeminiUsageMetadataEvent, } from './turn.js'; -import { - GenerateContentResponse, - Part, - Content, - GenerateContentResponseUsageMetadata, -} from '@google/genai'; +import { GenerateContentResponse, Part, Content } from '@google/genai'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat } from './geminiChat.js'; @@ -55,24 +49,6 @@ describe('Turn', () => { }; let mockChatInstance: MockedChatInstance; - const mockMetadata1: GenerateContentResponseUsageMetadata = { - promptTokenCount: 10, - candidatesTokenCount: 20, - totalTokenCount: 30, - cachedContentTokenCount: 5, - toolUsePromptTokenCount: 2, - thoughtsTokenCount: 3, - }; - - const mockMetadata2: GenerateContentResponseUsageMetadata = { - promptTokenCount: 100, - candidatesTokenCount: 200, - totalTokenCount: 300, - cachedContentTokenCount: 50, - toolUsePromptTokenCount: 20, - thoughtsTokenCount: 30, - }; - beforeEach(() => { vi.resetAllMocks(); mockChatInstance = { @@ -245,46 +221,6 @@ describe('Turn', () => { ); }); - it('should yield the last UsageMetadata event from the stream', async () => { - const mockResponseStream = (async function* () { - yield { - candidates: [{ content: { parts: [{ text: 'First response' }] } }], - usageMetadata: mockMetadata1, - } as unknown as GenerateContentResponse; - // Add a small delay to ensure apiTimeMs is > 0 - await new Promise((resolve) => setTimeout(resolve, 10)); - yield { - functionCalls: [{ name: 'aTool' }], - usageMetadata: mockMetadata2, - } as unknown as GenerateContentResponse; - })(); - mockSendMessageStream.mockResolvedValue(mockResponseStream); - - const events = []; - const reqParts: Part[] = [{ text: 'Test metadata' }]; - for await (const event of turn.run( - reqParts, - new AbortController().signal, - )) { - events.push(event); - } - - // There should be a content event, a tool call, and our metadata event - expect(events.length).toBe(3); - - const metadataEvent = events[2] as ServerGeminiUsageMetadataEvent; - expect(metadataEvent.type).toBe(GeminiEventType.UsageMetadata); - - // The value should be the *last* metadata object received. - expect(metadataEvent.value).toEqual( - expect.objectContaining(mockMetadata2), - ); - expect(metadataEvent.value.apiTimeMs).toBeGreaterThan(0); - - // Also check the public getter - expect(turn.getUsageMetadata()).toEqual(mockMetadata2); - }); - it('should handle function calls with undefined name or args', async () => { const mockResponseStream = (async function* () { yield { diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 72a1180b..4f93247b 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -9,7 +9,6 @@ import { GenerateContentResponse, FunctionCall, FunctionDeclaration, - GenerateContentResponseUsageMetadata, } from '@google/genai'; import { ToolCallConfirmationDetails, @@ -48,7 +47,6 @@ export enum GeminiEventType { UserCancelled = 'user_cancelled', Error = 'error', ChatCompressed = 'chat_compressed', - UsageMetadata = 'usage_metadata', Thought = 'thought', } @@ -129,11 +127,6 @@ export type ServerGeminiChatCompressedEvent = { value: ChatCompressionInfo | null; }; -export type ServerGeminiUsageMetadataEvent = { - type: GeminiEventType.UsageMetadata; - value: GenerateContentResponseUsageMetadata & { apiTimeMs?: number }; -}; - // The original union type, now composed of the individual types export type ServerGeminiStreamEvent = | ServerGeminiContentEvent @@ -143,14 +136,12 @@ export type ServerGeminiStreamEvent = | ServerGeminiUserCancelledEvent | ServerGeminiErrorEvent | ServerGeminiChatCompressedEvent - | ServerGeminiUsageMetadataEvent | ServerGeminiThoughtEvent; // A turn manages the agentic loop turn within the server context. export class Turn { readonly pendingToolCalls: ToolCallRequestInfo[]; private debugResponses: GenerateContentResponse[]; - private lastUsageMetadata: GenerateContentResponseUsageMetadata | null = null; constructor(private readonly chat: GeminiChat) { this.pendingToolCalls = []; @@ -161,7 +152,6 @@ export class Turn { req: PartListUnion, signal: AbortSignal, ): AsyncGenerator { - const startTime = Date.now(); try { const responseStream = await this.chat.sendMessageStream({ message: req, @@ -213,19 +203,6 @@ export class Turn { yield event; } } - - if (resp.usageMetadata) { - this.lastUsageMetadata = - resp.usageMetadata as GenerateContentResponseUsageMetadata; - } - } - - if (this.lastUsageMetadata) { - const durationMs = Date.now() - startTime; - yield { - type: GeminiEventType.UsageMetadata, - value: { ...this.lastUsageMetadata, apiTimeMs: durationMs }, - }; } } catch (e) { const error = toFriendlyError(e); @@ -286,8 +263,4 @@ export class Turn { getDebugResponses(): GenerateContentResponse[] { return this.debugResponses; } - - getUsageMetadata(): GenerateContentResponseUsageMetadata | null { - return this.lastUsageMetadata; - } } diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts index 138c8486..a17c8af3 100644 --- a/packages/core/src/telemetry/index.ts +++ b/packages/core/src/telemetry/index.ts @@ -38,3 +38,4 @@ export { } from './types.js'; export { SpanStatusCode, ValueType } from '@opentelemetry/api'; export { SemanticAttributes } from '@opentelemetry/semantic-conventions'; +export * from './uiTelemetry.js'; diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index 2d7835bf..5b922333 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -43,15 +43,22 @@ import * as metrics from './metrics.js'; import * as sdk from './sdk.js'; import { vi, describe, beforeEach, it, expect } from 'vitest'; import { GenerateContentResponseUsageMetadata } from '@google/genai'; +import * as uiTelemetry from './uiTelemetry.js'; describe('loggers', () => { const mockLogger = { emit: vi.fn(), }; + const mockUiEvent = { + addEvent: vi.fn(), + }; beforeEach(() => { vi.spyOn(sdk, 'isTelemetrySdkInitialized').mockReturnValue(true); vi.spyOn(logs, 'getLogger').mockReturnValue(mockLogger); + vi.spyOn(uiTelemetry.uiTelemetryService, 'addEvent').mockImplementation( + mockUiEvent.addEvent, + ); vi.useFakeTimers(); vi.setSystemTime(new Date('2025-01-01T00:00:00.000Z')); }); @@ -215,6 +222,7 @@ describe('loggers', () => { cached_content_token_count: 10, thoughts_token_count: 5, tool_token_count: 2, + total_token_count: 0, response_text: 'test-response', }, }); @@ -233,6 +241,12 @@ describe('loggers', () => { 50, 'output', ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_API_RESPONSE, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); it('should log an API response with an error', () => { @@ -263,6 +277,12 @@ describe('loggers', () => { 'error.message': 'test-error', }, }); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_API_RESPONSE, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); }); @@ -417,6 +437,12 @@ describe('loggers', () => { true, ToolCallDecision.ACCEPT, ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); it('should log a tool call with a reject decision', () => { const call: ErroredToolCall = { @@ -471,6 +497,12 @@ describe('loggers', () => { false, ToolCallDecision.REJECT, ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); it('should log a tool call with a modify decision', () => { @@ -527,6 +559,12 @@ describe('loggers', () => { true, ToolCallDecision.MODIFY, ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); it('should log a tool call without a decision', () => { @@ -581,6 +619,12 @@ describe('loggers', () => { true, undefined, ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); it('should log a failed tool call with an error', () => { @@ -641,6 +685,12 @@ describe('loggers', () => { false, undefined, ); + + expect(mockUiEvent.addEvent).toHaveBeenCalledWith({ + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': '2025-01-01T00:00:00.000Z', + }); }); }); }); diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index 054386b8..a7231e2f 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -31,6 +31,7 @@ import { recordToolCallMetrics, } from './metrics.js'; import { isTelemetrySdkInitialized } from './sdk.js'; +import { uiTelemetryService, UiEvent } from './uiTelemetry.js'; import { ClearcutLogger } from './clearcut-logger/clearcut-logger.js'; const shouldLogUserPrompts = (config: Config): boolean => @@ -98,6 +99,12 @@ export function logUserPrompt(config: Config, event: UserPromptEvent): void { } export function logToolCall(config: Config, event: ToolCallEvent): void { + const uiEvent = { + ...event, + 'event.name': EVENT_TOOL_CALL, + 'event.timestamp': new Date().toISOString(), + } as UiEvent; + uiTelemetryService.addEvent(uiEvent); ClearcutLogger.getInstance(config)?.logToolCallEvent(event); if (!isTelemetrySdkInitialized()) return; @@ -150,6 +157,12 @@ export function logApiRequest(config: Config, event: ApiRequestEvent): void { } export function logApiError(config: Config, event: ApiErrorEvent): void { + const uiEvent = { + ...event, + 'event.name': EVENT_API_ERROR, + 'event.timestamp': new Date().toISOString(), + } as UiEvent; + uiTelemetryService.addEvent(uiEvent); ClearcutLogger.getInstance(config)?.logApiErrorEvent(event); if (!isTelemetrySdkInitialized()) return; @@ -186,6 +199,12 @@ export function logApiError(config: Config, event: ApiErrorEvent): void { } export function logApiResponse(config: Config, event: ApiResponseEvent): void { + const uiEvent = { + ...event, + 'event.name': EVENT_API_RESPONSE, + 'event.timestamp': new Date().toISOString(), + } as UiEvent; + uiTelemetryService.addEvent(uiEvent); ClearcutLogger.getInstance(config)?.logApiResponseEvent(event); if (!isTelemetrySdkInitialized()) return; const attributes: LogAttributes = { diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index f70daa78..9883111a 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -183,6 +183,7 @@ export class ApiResponseEvent { cached_content_token_count: number; thoughts_token_count: number; tool_token_count: number; + total_token_count: number; response_text?: string; constructor( @@ -202,6 +203,7 @@ export class ApiResponseEvent { this.cached_content_token_count = usage_data?.cachedContentTokenCount ?? 0; this.thoughts_token_count = usage_data?.thoughtsTokenCount ?? 0; this.tool_token_count = usage_data?.toolUsePromptTokenCount ?? 0; + this.total_token_count = usage_data?.totalTokenCount ?? 0; this.response_text = response_text; this.error = error; } diff --git a/packages/core/src/telemetry/uiTelemetry.test.ts b/packages/core/src/telemetry/uiTelemetry.test.ts new file mode 100644 index 00000000..9643ed97 --- /dev/null +++ b/packages/core/src/telemetry/uiTelemetry.test.ts @@ -0,0 +1,510 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { UiTelemetryService } from './uiTelemetry.js'; +import { + ApiErrorEvent, + ApiResponseEvent, + ToolCallEvent, + ToolCallDecision, +} from './types.js'; +import { + EVENT_API_ERROR, + EVENT_API_RESPONSE, + EVENT_TOOL_CALL, +} from './constants.js'; +import { + CompletedToolCall, + ErroredToolCall, + SuccessfulToolCall, +} from '../core/coreToolScheduler.js'; +import { Tool, ToolConfirmationOutcome } from '../tools/tools.js'; + +const createFakeCompletedToolCall = ( + name: string, + success: boolean, + duration = 100, + outcome?: ToolConfirmationOutcome, + error?: Error, +): CompletedToolCall => { + const request = { + callId: `call_${name}_${Date.now()}`, + name, + args: { foo: 'bar' }, + isClientInitiated: false, + }; + + if (success) { + return { + status: 'success', + request, + tool: { name } as Tool, // Mock tool + response: { + callId: request.callId, + responseParts: { + functionResponse: { + id: request.callId, + name, + response: { output: 'Success!' }, + }, + }, + error: undefined, + resultDisplay: 'Success!', + }, + durationMs: duration, + outcome, + } as SuccessfulToolCall; + } else { + return { + status: 'error', + request, + response: { + callId: request.callId, + responseParts: { + functionResponse: { + id: request.callId, + name, + response: { error: 'Tool failed' }, + }, + }, + error: error || new Error('Tool failed'), + resultDisplay: 'Failure!', + }, + durationMs: duration, + outcome, + } as ErroredToolCall; + } +}; + +describe('UiTelemetryService', () => { + let service: UiTelemetryService; + + beforeEach(() => { + service = new UiTelemetryService(); + }); + + it('should have correct initial metrics', () => { + const metrics = service.getMetrics(); + expect(metrics).toEqual({ + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }, + byName: {}, + }, + }); + expect(service.getLastPromptTokenCount()).toBe(0); + }); + + it('should emit an update event when an event is added', () => { + const spy = vi.fn(); + service.on('update', spy); + + const event = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 500, + input_token_count: 10, + output_token_count: 20, + total_token_count: 30, + cached_content_token_count: 5, + thoughts_token_count: 2, + tool_token_count: 3, + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + service.addEvent(event); + + expect(spy).toHaveBeenCalledOnce(); + const { metrics, lastPromptTokenCount } = spy.mock.calls[0][0]; + expect(metrics).toBeDefined(); + expect(lastPromptTokenCount).toBe(10); + }); + + describe('API Response Event Processing', () => { + it('should process a single ApiResponseEvent', () => { + const event = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 500, + input_token_count: 10, + output_token_count: 20, + total_token_count: 30, + cached_content_token_count: 5, + thoughts_token_count: 2, + tool_token_count: 3, + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + service.addEvent(event); + + const metrics = service.getMetrics(); + expect(metrics.models['gemini-2.5-pro']).toEqual({ + api: { + totalRequests: 1, + totalErrors: 0, + totalLatencyMs: 500, + }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 5, + thoughts: 2, + tool: 3, + }, + }); + expect(service.getLastPromptTokenCount()).toBe(10); + }); + + it('should aggregate multiple ApiResponseEvents for the same model', () => { + const event1 = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 500, + input_token_count: 10, + output_token_count: 20, + total_token_count: 30, + cached_content_token_count: 5, + thoughts_token_count: 2, + tool_token_count: 3, + } as ApiResponseEvent & { + 'event.name': typeof EVENT_API_RESPONSE; + }; + const event2 = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 600, + input_token_count: 15, + output_token_count: 25, + total_token_count: 40, + cached_content_token_count: 10, + thoughts_token_count: 4, + tool_token_count: 6, + } as ApiResponseEvent & { + 'event.name': typeof EVENT_API_RESPONSE; + }; + + service.addEvent(event1); + service.addEvent(event2); + + const metrics = service.getMetrics(); + expect(metrics.models['gemini-2.5-pro']).toEqual({ + api: { + totalRequests: 2, + totalErrors: 0, + totalLatencyMs: 1100, + }, + tokens: { + prompt: 25, + candidates: 45, + total: 70, + cached: 15, + thoughts: 6, + tool: 9, + }, + }); + expect(service.getLastPromptTokenCount()).toBe(15); + }); + + it('should handle ApiResponseEvents for different models', () => { + const event1 = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 500, + input_token_count: 10, + output_token_count: 20, + total_token_count: 30, + cached_content_token_count: 5, + thoughts_token_count: 2, + tool_token_count: 3, + } as ApiResponseEvent & { + 'event.name': typeof EVENT_API_RESPONSE; + }; + const event2 = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-flash', + duration_ms: 1000, + input_token_count: 100, + output_token_count: 200, + total_token_count: 300, + cached_content_token_count: 50, + thoughts_token_count: 20, + tool_token_count: 30, + } as ApiResponseEvent & { + 'event.name': typeof EVENT_API_RESPONSE; + }; + + service.addEvent(event1); + service.addEvent(event2); + + const metrics = service.getMetrics(); + expect(metrics.models['gemini-2.5-pro']).toBeDefined(); + expect(metrics.models['gemini-2.5-flash']).toBeDefined(); + expect(metrics.models['gemini-2.5-pro'].api.totalRequests).toBe(1); + expect(metrics.models['gemini-2.5-flash'].api.totalRequests).toBe(1); + expect(service.getLastPromptTokenCount()).toBe(100); + }); + }); + + describe('API Error Event Processing', () => { + it('should process a single ApiErrorEvent', () => { + const event = { + 'event.name': EVENT_API_ERROR, + model: 'gemini-2.5-pro', + duration_ms: 300, + error: 'Something went wrong', + } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }; + + service.addEvent(event); + + const metrics = service.getMetrics(); + expect(metrics.models['gemini-2.5-pro']).toEqual({ + api: { + totalRequests: 1, + totalErrors: 1, + totalLatencyMs: 300, + }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }); + }); + + it('should aggregate ApiErrorEvents and ApiResponseEvents', () => { + const responseEvent = { + 'event.name': EVENT_API_RESPONSE, + model: 'gemini-2.5-pro', + duration_ms: 500, + input_token_count: 10, + output_token_count: 20, + total_token_count: 30, + cached_content_token_count: 5, + thoughts_token_count: 2, + tool_token_count: 3, + } as ApiResponseEvent & { + 'event.name': typeof EVENT_API_RESPONSE; + }; + const errorEvent = { + 'event.name': EVENT_API_ERROR, + model: 'gemini-2.5-pro', + duration_ms: 300, + error: 'Something went wrong', + } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }; + + service.addEvent(responseEvent); + service.addEvent(errorEvent); + + const metrics = service.getMetrics(); + expect(metrics.models['gemini-2.5-pro']).toEqual({ + api: { + totalRequests: 2, + totalErrors: 1, + totalLatencyMs: 800, + }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 5, + thoughts: 2, + tool: 3, + }, + }); + }); + }); + + describe('Tool Call Event Processing', () => { + it('should process a single successful ToolCallEvent', () => { + const toolCall = createFakeCompletedToolCall( + 'test_tool', + true, + 150, + ToolConfirmationOutcome.ProceedOnce, + ); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalCalls).toBe(1); + expect(tools.totalSuccess).toBe(1); + expect(tools.totalFail).toBe(0); + expect(tools.totalDurationMs).toBe(150); + expect(tools.totalDecisions[ToolCallDecision.ACCEPT]).toBe(1); + expect(tools.byName['test_tool']).toEqual({ + count: 1, + success: 1, + fail: 0, + durationMs: 150, + decisions: { + [ToolCallDecision.ACCEPT]: 1, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }, + }); + }); + + it('should process a single failed ToolCallEvent', () => { + const toolCall = createFakeCompletedToolCall( + 'test_tool', + false, + 200, + ToolConfirmationOutcome.Cancel, + ); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalCalls).toBe(1); + expect(tools.totalSuccess).toBe(0); + expect(tools.totalFail).toBe(1); + expect(tools.totalDurationMs).toBe(200); + expect(tools.totalDecisions[ToolCallDecision.REJECT]).toBe(1); + expect(tools.byName['test_tool']).toEqual({ + count: 1, + success: 0, + fail: 1, + durationMs: 200, + decisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 1, + [ToolCallDecision.MODIFY]: 0, + }, + }); + }); + + it('should process a ToolCallEvent with modify decision', () => { + const toolCall = createFakeCompletedToolCall( + 'test_tool', + true, + 250, + ToolConfirmationOutcome.ModifyWithEditor, + ); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalDecisions[ToolCallDecision.MODIFY]).toBe(1); + expect(tools.byName['test_tool'].decisions[ToolCallDecision.MODIFY]).toBe( + 1, + ); + }); + + it('should process a ToolCallEvent without a decision', () => { + const toolCall = createFakeCompletedToolCall('test_tool', true, 100); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalDecisions).toEqual({ + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }); + expect(tools.byName['test_tool'].decisions).toEqual({ + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }); + }); + + it('should aggregate multiple ToolCallEvents for the same tool', () => { + const toolCall1 = createFakeCompletedToolCall( + 'test_tool', + true, + 100, + ToolConfirmationOutcome.ProceedOnce, + ); + const toolCall2 = createFakeCompletedToolCall( + 'test_tool', + false, + 150, + ToolConfirmationOutcome.Cancel, + ); + + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall1))), + 'event.name': EVENT_TOOL_CALL, + }); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall2))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalCalls).toBe(2); + expect(tools.totalSuccess).toBe(1); + expect(tools.totalFail).toBe(1); + expect(tools.totalDurationMs).toBe(250); + expect(tools.totalDecisions[ToolCallDecision.ACCEPT]).toBe(1); + expect(tools.totalDecisions[ToolCallDecision.REJECT]).toBe(1); + expect(tools.byName['test_tool']).toEqual({ + count: 2, + success: 1, + fail: 1, + durationMs: 250, + decisions: { + [ToolCallDecision.ACCEPT]: 1, + [ToolCallDecision.REJECT]: 1, + [ToolCallDecision.MODIFY]: 0, + }, + }); + }); + + it('should handle ToolCallEvents for different tools', () => { + const toolCall1 = createFakeCompletedToolCall('tool_A', true, 100); + const toolCall2 = createFakeCompletedToolCall('tool_B', false, 200); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall1))), + 'event.name': EVENT_TOOL_CALL, + }); + service.addEvent({ + ...JSON.parse(JSON.stringify(new ToolCallEvent(toolCall2))), + 'event.name': EVENT_TOOL_CALL, + }); + + const metrics = service.getMetrics(); + const { tools } = metrics; + + expect(tools.totalCalls).toBe(2); + expect(tools.totalSuccess).toBe(1); + expect(tools.totalFail).toBe(1); + expect(tools.byName['tool_A']).toBeDefined(); + expect(tools.byName['tool_B']).toBeDefined(); + expect(tools.byName['tool_A'].count).toBe(1); + expect(tools.byName['tool_B'].count).toBe(1); + }); + }); +}); diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts new file mode 100644 index 00000000..71409696 --- /dev/null +++ b/packages/core/src/telemetry/uiTelemetry.ts @@ -0,0 +1,207 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { EventEmitter } from 'events'; +import { + EVENT_API_ERROR, + EVENT_API_RESPONSE, + EVENT_TOOL_CALL, +} from './constants.js'; + +import { + ApiErrorEvent, + ApiResponseEvent, + ToolCallEvent, + ToolCallDecision, +} from './types.js'; + +export type UiEvent = + | (ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }) + | (ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }) + | (ToolCallEvent & { 'event.name': typeof EVENT_TOOL_CALL }); + +export interface ToolCallStats { + count: number; + success: number; + fail: number; + durationMs: number; + decisions: { + [ToolCallDecision.ACCEPT]: number; + [ToolCallDecision.REJECT]: number; + [ToolCallDecision.MODIFY]: number; + }; +} + +export interface ModelMetrics { + api: { + totalRequests: number; + totalErrors: number; + totalLatencyMs: number; + }; + tokens: { + prompt: number; + candidates: number; + total: number; + cached: number; + thoughts: number; + tool: number; + }; +} + +export interface SessionMetrics { + models: Record; + tools: { + totalCalls: number; + totalSuccess: number; + totalFail: number; + totalDurationMs: number; + totalDecisions: { + [ToolCallDecision.ACCEPT]: number; + [ToolCallDecision.REJECT]: number; + [ToolCallDecision.MODIFY]: number; + }; + byName: Record; + }; +} + +const createInitialModelMetrics = (): ModelMetrics => ({ + api: { + totalRequests: 0, + totalErrors: 0, + totalLatencyMs: 0, + }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, +}); + +const createInitialMetrics = (): SessionMetrics => ({ + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }, + byName: {}, + }, +}); + +export class UiTelemetryService extends EventEmitter { + #metrics: SessionMetrics = createInitialMetrics(); + #lastPromptTokenCount = 0; + + addEvent(event: UiEvent) { + switch (event['event.name']) { + case EVENT_API_RESPONSE: + this.processApiResponse(event); + break; + case EVENT_API_ERROR: + this.processApiError(event); + break; + case EVENT_TOOL_CALL: + this.processToolCall(event); + break; + default: + // We should not emit update for any other event metric. + return; + } + + this.emit('update', { + metrics: this.#metrics, + lastPromptTokenCount: this.#lastPromptTokenCount, + }); + } + + getMetrics(): SessionMetrics { + return this.#metrics; + } + + getLastPromptTokenCount(): number { + return this.#lastPromptTokenCount; + } + + private getOrCreateModelMetrics(modelName: string): ModelMetrics { + if (!this.#metrics.models[modelName]) { + this.#metrics.models[modelName] = createInitialModelMetrics(); + } + return this.#metrics.models[modelName]; + } + + private processApiResponse(event: ApiResponseEvent) { + const modelMetrics = this.getOrCreateModelMetrics(event.model); + + modelMetrics.api.totalRequests++; + modelMetrics.api.totalLatencyMs += event.duration_ms; + + modelMetrics.tokens.prompt += event.input_token_count; + modelMetrics.tokens.candidates += event.output_token_count; + modelMetrics.tokens.total += event.total_token_count; + modelMetrics.tokens.cached += event.cached_content_token_count; + modelMetrics.tokens.thoughts += event.thoughts_token_count; + modelMetrics.tokens.tool += event.tool_token_count; + + this.#lastPromptTokenCount = event.input_token_count; + } + + private processApiError(event: ApiErrorEvent) { + const modelMetrics = this.getOrCreateModelMetrics(event.model); + modelMetrics.api.totalRequests++; + modelMetrics.api.totalErrors++; + modelMetrics.api.totalLatencyMs += event.duration_ms; + } + + private processToolCall(event: ToolCallEvent) { + const { tools } = this.#metrics; + tools.totalCalls++; + tools.totalDurationMs += event.duration_ms; + + if (event.success) { + tools.totalSuccess++; + } else { + tools.totalFail++; + } + + if (!tools.byName[event.function_name]) { + tools.byName[event.function_name] = { + count: 0, + success: 0, + fail: 0, + durationMs: 0, + decisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + }, + }; + } + + const toolStats = tools.byName[event.function_name]; + toolStats.count++; + toolStats.durationMs += event.duration_ms; + if (event.success) { + toolStats.success++; + } else { + toolStats.fail++; + } + + if (event.decision) { + tools.totalDecisions[event.decision]++; + toolStats.decisions[event.decision]++; + } + } +} + +export const uiTelemetryService = new UiTelemetryService();