diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index feb132ae..833cc2b5 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -67,6 +67,10 @@ import { useBracketedPaste } from './hooks/useBracketedPaste.js'; import { useTextBuffer } from './components/shared/text-buffer.js'; import * as fs from 'fs'; import { UpdateNotification } from './components/UpdateNotification.js'; +import { + isProQuotaExceededError, + isGenericQuotaExceededError, +} from '@google/gemini-cli-core'; import { checkForUpdates } from './utils/updateCheck.js'; import ansiEscapes from 'ansi-escapes'; import { OverflowProvider } from './contexts/OverflowContext.js'; @@ -243,15 +247,34 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { const flashFallbackHandler = async ( currentModel: string, fallbackModel: string, + error?: unknown, ): Promise => { + let message: string; + + // Check if this is a Pro quota exceeded error + if (error && isProQuotaExceededError(error)) { + message = `⚡ You have reached your daily ${currentModel} quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ You can switch authentication methods by typing /auth`; + } else if (error && isGenericQuotaExceededError(error)) { + message = `⚡ You have reached your daily quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ You can switch authentication methods by typing /auth`; + } else { + // Default fallback message for other cases (like consecutive 429s) + message = `⚡ Slow response times detected. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`; + } + // Add message to UI history addItem( { type: MessageType.INFO, - text: `⚡ Slow response times detected. Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session. -⚡ To avoid this you can either upgrade to Standard tier. See: https://goo.gle/set-up-gemini-code-assist -⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key -⚡ You can switch authentication methods by typing /auth`, + text: message, }, Date.now(), ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 3a002919..fc6f93c5 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -1097,6 +1097,7 @@ describe('useGeminiStream', () => { getContentGeneratorConfig: vi.fn(() => ({ authType: mockAuthType, })), + getModel: vi.fn(() => 'gemini-2.5-pro'), } as unknown as Config; const { result } = renderHook(() => @@ -1125,6 +1126,9 @@ describe('useGeminiStream', () => { expect(mockParseAndFormatApiError).toHaveBeenCalledWith( 'Rate limit exceeded', mockAuthType, + undefined, + 'gemini-2.5-pro', + 'gemini-2.5-flash', ); }); }); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index b4acdb9a..550cab86 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -24,6 +24,7 @@ import { ThoughtSummary, UnauthorizedError, UserPromptEvent, + DEFAULT_GEMINI_FLASH_MODEL, } from '@google/gemini-cli-core'; import { type Part, type PartListUnion } from '@google/genai'; import { @@ -397,6 +398,9 @@ export const useGeminiStream = ( text: parseAndFormatApiError( eventValue.error, config.getContentGeneratorConfig().authType, + undefined, + config.getModel(), + DEFAULT_GEMINI_FLASH_MODEL, ), }, userMessageTimestamp, @@ -533,6 +537,9 @@ export const useGeminiStream = ( text: parseAndFormatApiError( getErrorMessage(error) || 'Unknown error', config.getContentGeneratorConfig().authType, + undefined, + config.getModel(), + DEFAULT_GEMINI_FLASH_MODEL, ), }, userMessageTimestamp, diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts index 4bbaabf1..3d228efb 100644 --- a/packages/cli/src/ui/utils/errorParsing.test.ts +++ b/packages/cli/src/ui/utils/errorParsing.test.ts @@ -6,10 +6,16 @@ import { describe, it, expect } from 'vitest'; import { parseAndFormatApiError } from './errorParsing.js'; -import { AuthType, StructuredError } from '@google/gemini-cli-core'; +import { + AuthType, + UserTierId, + DEFAULT_GEMINI_FLASH_MODEL, + isProQuotaExceededError, +} from '@google/gemini-cli-core'; describe('parseAndFormatApiError', () => { - const enterpriseMessage = 'upgrade to a plan with higher limits'; + const _enterpriseMessage = + 'upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits'; const vertexMessage = 'request a quota increase through Vertex'; const geminiMessage = 'request a quota increase through AI Studio'; @@ -24,9 +30,17 @@ describe('parseAndFormatApiError', () => { it('should format a 429 API error with the default message', () => { const errorMessage = 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; - const result = parseAndFormatApiError(errorMessage); + const result = parseAndFormatApiError( + errorMessage, + undefined, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); expect(result).toContain('[API Error: Rate limit exceeded'); - expect(result).toContain('Your request has been rate limited'); + expect(result).toContain( + 'Slow response times detected. Switching to the gemini-2.5-flash model', + ); }); it('should format a 429 API error with the personal message', () => { @@ -35,9 +49,14 @@ describe('parseAndFormatApiError', () => { const result = parseAndFormatApiError( errorMessage, AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, ); expect(result).toContain('[API Error: Rate limit exceeded'); - expect(result).toContain(enterpriseMessage); + expect(result).toContain( + 'Slow response times detected. Switching to the gemini-2.5-flash model', + ); }); it('should format a 429 API error with the vertex message', () => { @@ -116,4 +135,284 @@ describe('parseAndFormatApiError', () => { const expected = '[API Error: An unknown error occurred.]'; expect(parseAndFormatApiError(error)).toBe(expected); }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); + + it('should format a regular 429 API error with standard message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain('[API Error: Rate limit exceeded'); + expect(result).toContain( + 'Slow response times detected. Switching to the gemini-2.5-flash model', + ); + expect(result).not.toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + }); + + it('should format a 429 API error with generic quota exceeded message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'GenerationRequests'", + ); + expect(result).toContain('You have reached your daily quota limit'); + expect(result).not.toContain( + 'You have reached your daily Gemini 2.5 Pro quota limit', + ); + }); + + it('should prioritize Pro quota message over generic quota message for Google auth', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).not.toContain('You have reached your daily quota limit'); + }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); + + it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.LEGACY, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'", + ); + expect(result).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', + ); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); + + it('should handle different Gemini version strings in Pro quota exceeded errors', () => { + const errorMessage15 = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const errorMessagePreview = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const errorMessageBeta = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const errorMessageExperimental = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + + const result15 = parseAndFormatApiError( + errorMessage15, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-1.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + const resultPreview = parseAndFormatApiError( + errorMessagePreview, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-2.5-preview-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + const resultBeta = parseAndFormatApiError( + errorMessageBeta, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-beta-3.0-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + const resultExperimental = parseAndFormatApiError( + errorMessageExperimental, + AuthType.LOGIN_WITH_GOOGLE, + undefined, + 'gemini-experimental-v2-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + + expect(result15).toContain( + 'You have reached your daily gemini-1.5-pro quota limit', + ); + expect(resultPreview).toContain( + 'You have reached your daily gemini-2.5-preview-pro quota limit', + ); + expect(resultBeta).toContain( + 'You have reached your daily gemini-beta-3.0-pro quota limit', + ); + expect(resultExperimental).toContain( + 'You have reached your daily gemini-experimental-v2-pro quota limit', + ); + expect(result15).toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + expect(resultPreview).toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + expect(resultBeta).toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + expect(resultExperimental).toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); + + it('should not match non-Pro models with similar version strings', () => { + // Test that Flash models with similar version strings don't match + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit", + ), + ).toBe(false); + + // Test other model types + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit", + ), + ).toBe(false); + + // Test generic quota messages + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'GenerationRequests' and limit", + ), + ).toBe(false); + expect( + isProQuotaExceededError( + "Quota exceeded for quota metric 'EmbeddingRequests' and limit", + ), + ).toBe(false); + }); + + it('should format a generic quota exceeded message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain( + "[API Error: Quota exceeded for quota metric 'GenerationRequests'", + ); + expect(result).toContain('You have reached your daily quota limit'); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); + + it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => { + const errorMessage = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}'; + const result = parseAndFormatApiError( + errorMessage, + AuthType.LOGIN_WITH_GOOGLE, + UserTierId.STANDARD, + 'gemini-2.5-pro', + DEFAULT_GEMINI_FLASH_MODEL, + ); + expect(result).toContain('[API Error: Rate limit exceeded'); + expect(result).toContain( + 'We appreciate you for choosing Gemini Code Assist and the Gemini CLI', + ); + expect(result).not.toContain( + 'upgrade to a Gemini Code Assist Standard or Enterprise plan', + ); + }); }); diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts index 33014812..555d5e4e 100644 --- a/packages/cli/src/ui/utils/errorParsing.ts +++ b/packages/cli/src/ui/utils/errorParsing.ts @@ -4,66 +4,118 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { AuthType, StructuredError } from '@google/gemini-cli-core'; +import { + AuthType, + UserTierId, + DEFAULT_GEMINI_FLASH_MODEL, + DEFAULT_GEMINI_MODEL, + isProQuotaExceededError, + isGenericQuotaExceededError, + isApiError, + isStructuredError, +} from '@google/gemini-cli-core'; -const RATE_LIMIT_ERROR_MESSAGE_GOOGLE = - '\nPlease wait and try again later. To increase your limits, upgrade to a plan with higher limits, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey'; +// Free Tier message functions +const getRateLimitErrorMessageGoogleFree = ( + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + +const getRateLimitErrorMessageGoogleProQuotaFree = ( + currentModel: string = DEFAULT_GEMINI_MODEL, + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +const getRateLimitErrorMessageGoogleGenericQuotaFree = () => + `\nYou have reached your daily quota limit. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +// Legacy/Standard Tier message functions +const getRateLimitErrorMessageGooglePaid = ( + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; + +const getRateLimitErrorMessageGoogleProQuotaPaid = ( + currentModel: string = DEFAULT_GEMINI_MODEL, + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + +const getRateLimitErrorMessageGoogleGenericQuotaPaid = ( + currentModel: string = DEFAULT_GEMINI_MODEL, +) => + `\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI = '\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method'; const RATE_LIMIT_ERROR_MESSAGE_VERTEX = '\nPlease wait and try again later. To increase your limits, request a quota increase through Vertex, or switch to another /auth method'; -const RATE_LIMIT_ERROR_MESSAGE_DEFAULT = - 'Your request has been rate limited. Please wait and try again later.'; +const getRateLimitErrorMessageDefault = ( + fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, +) => + `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; -export interface ApiError { - error: { - code: number; - message: string; - status: string; - details: unknown[]; - }; -} - -function isApiError(error: unknown): error is ApiError { - return ( - typeof error === 'object' && - error !== null && - 'error' in error && - typeof (error as ApiError).error === 'object' && - 'message' in (error as ApiError).error - ); -} - -function isStructuredError(error: unknown): error is StructuredError { - return ( - typeof error === 'object' && - error !== null && - 'message' in error && - typeof (error as StructuredError).message === 'string' - ); -} - -function getRateLimitMessage(authType?: AuthType): string { +function getRateLimitMessage( + authType?: AuthType, + error?: unknown, + userTier?: UserTierId, + currentModel?: string, + fallbackModel?: string, +): string { switch (authType) { - case AuthType.LOGIN_WITH_GOOGLE: - return RATE_LIMIT_ERROR_MESSAGE_GOOGLE; + case AuthType.LOGIN_WITH_GOOGLE: { + // Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified + const isPaidTier = + userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD; + + if (isProQuotaExceededError(error)) { + return isPaidTier + ? getRateLimitErrorMessageGoogleProQuotaPaid( + currentModel || DEFAULT_GEMINI_MODEL, + fallbackModel, + ) + : getRateLimitErrorMessageGoogleProQuotaFree( + currentModel || DEFAULT_GEMINI_MODEL, + fallbackModel, + ); + } else if (isGenericQuotaExceededError(error)) { + return isPaidTier + ? getRateLimitErrorMessageGoogleGenericQuotaPaid( + currentModel || DEFAULT_GEMINI_MODEL, + ) + : getRateLimitErrorMessageGoogleGenericQuotaFree(); + } else { + return isPaidTier + ? getRateLimitErrorMessageGooglePaid(fallbackModel) + : getRateLimitErrorMessageGoogleFree(fallbackModel); + } + } case AuthType.USE_GEMINI: return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI; case AuthType.USE_VERTEX_AI: return RATE_LIMIT_ERROR_MESSAGE_VERTEX; default: - return RATE_LIMIT_ERROR_MESSAGE_DEFAULT; + return getRateLimitErrorMessageDefault(fallbackModel); } } export function parseAndFormatApiError( error: unknown, authType?: AuthType, + userTier?: UserTierId, + currentModel?: string, + fallbackModel?: string, ): string { if (isStructuredError(error)) { let text = `[API Error: ${error.message}]`; if (error.status === 429) { - text += getRateLimitMessage(authType); + text += getRateLimitMessage( + authType, + error, + userTier, + currentModel, + fallbackModel, + ); } return text; } @@ -92,7 +144,13 @@ export function parseAndFormatApiError( } let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`; if (parsedError.error.code === 429) { - text += getRateLimitMessage(authType); + text += getRateLimitMessage( + authType, + parsedError, + userTier, + currentModel, + fallbackModel, + ); } return text; } diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 2cea70ca..b0659a9d 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -103,6 +103,7 @@ export interface SandboxConfig { export type FlashFallbackHandler = ( currentModel: string, fallbackModel: string, + error?: unknown, ) => Promise; export interface ConfigParameters { diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 9d3791fd..80680aca 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => { expect(mockFallbackHandler).toHaveBeenCalledWith( currentModel, fallbackModel, + undefined, ); }); }); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 6cfcd407..b8996cbf 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -323,8 +323,8 @@ export class GeminiClient { }); const result = await retryWithBackoff(apiCall, { - onPersistent429: async (authType?: string) => - await this.handleFlashFallback(authType), + onPersistent429: async (authType?: string, error?: unknown) => + await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); @@ -411,8 +411,8 @@ export class GeminiClient { }); const result = await retryWithBackoff(apiCall, { - onPersistent429: async (authType?: string) => - await this.handleFlashFallback(authType), + onPersistent429: async (authType?: string, error?: unknown) => + await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); return result; @@ -559,7 +559,10 @@ export class GeminiClient { * Handles fallback to Flash model when persistent 429 errors occur for OAuth users. * Uses a fallback handler if provided by the config, otherwise returns null. */ - private async handleFlashFallback(authType?: string): Promise { + private async handleFlashFallback( + authType?: string, + error?: unknown, + ): Promise { // Only handle fallback for OAuth users if (authType !== AuthType.LOGIN_WITH_GOOGLE) { return null; @@ -577,7 +580,11 @@ export class GeminiClient { const fallbackHandler = this.config.flashFallbackHandler; if (typeof fallbackHandler === 'function') { try { - const accepted = await fallbackHandler(currentModel, fallbackModel); + const accepted = await fallbackHandler( + currentModel, + fallbackModel, + error, + ); if (accepted) { this.config.setModel(fallbackModel); return fallbackModel; diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 537d55a0..1be84f2e 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -191,7 +191,10 @@ export class GeminiChat { * Handles fallback to Flash model when persistent 429 errors occur for OAuth users. * Uses a fallback handler if provided by the config, otherwise returns null. */ - private async handleFlashFallback(authType?: string): Promise { + private async handleFlashFallback( + authType?: string, + error?: unknown, + ): Promise { // Only handle fallback for OAuth users if (authType !== AuthType.LOGIN_WITH_GOOGLE) { return null; @@ -209,7 +212,11 @@ export class GeminiChat { const fallbackHandler = this.config.flashFallbackHandler; if (typeof fallbackHandler === 'function') { try { - const accepted = await fallbackHandler(currentModel, fallbackModel); + const accepted = await fallbackHandler( + currentModel, + fallbackModel, + error, + ); if (accepted) { this.config.setModel(fallbackModel); return fallbackModel; @@ -270,8 +277,8 @@ export class GeminiChat { } return false; }, - onPersistent429: async (authType?: string) => - await this.handleFlashFallback(authType), + onPersistent429: async (authType?: string, error?: unknown) => + await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); const durationMs = Date.now() - startTime; @@ -367,8 +374,8 @@ export class GeminiChat { } return false; // Don't retry other errors by default }, - onPersistent429: async (authType?: string) => - await this.handleFlashFallback(authType), + onPersistent429: async (authType?: string, error?: unknown) => + await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index aff37f50..df7db12c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js'; export * from './utils/memoryDiscovery.js'; export * from './utils/gitIgnoreParser.js'; export * from './utils/editor.js'; +export * from './utils/quotaErrorDetection.js'; // Export services export * from './services/fileDiscoveryService.js'; diff --git a/packages/core/src/utils/flashFallback.integration.test.ts b/packages/core/src/utils/flashFallback.integration.test.ts index 6554425f..f5e354a0 100644 --- a/packages/core/src/utils/flashFallback.integration.test.ts +++ b/packages/core/src/utils/flashFallback.integration.test.ts @@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => { expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL); expect(mockFallbackHandler).toHaveBeenCalledWith( AuthType.LOGIN_WITH_GOOGLE, + expect.any(Error), ); expect(result).toBe('success after fallback'); // Should have: 2 failures, then fallback triggered, then 1 success after retry reset diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts new file mode 100644 index 00000000..ec77f5ee --- /dev/null +++ b/packages/core/src/utils/quotaErrorDetection.ts @@ -0,0 +1,82 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface ApiError { + error: { + code: number; + message: string; + status: string; + details: unknown[]; + }; +} + +interface StructuredError { + message: string; + status?: number; +} + +export function isApiError(error: unknown): error is ApiError { + return ( + typeof error === 'object' && + error !== null && + 'error' in error && + typeof (error as ApiError).error === 'object' && + 'message' in (error as ApiError).error + ); +} + +export function isStructuredError(error: unknown): error is StructuredError { + return ( + typeof error === 'object' && + error !== null && + 'message' in error && + typeof (error as StructuredError).message === 'string' + ); +} + +export function isProQuotaExceededError(error: unknown): boolean { + // Check for Pro quota exceeded errors by looking for the specific pattern + // This will match patterns like: + // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'" + // - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'" + // - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'" + // - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'" + // We use string methods instead of regex to avoid ReDoS vulnerabilities + + const checkMessage = (message: string): boolean => + message.includes("Quota exceeded for quota metric 'Gemini") && + message.includes("Pro Requests'"); + + if (typeof error === 'string') { + return checkMessage(error); + } + + if (isStructuredError(error)) { + return checkMessage(error.message); + } + + if (isApiError(error)) { + return checkMessage(error.error.message); + } + + return false; +} + +export function isGenericQuotaExceededError(error: unknown): boolean { + if (typeof error === 'string') { + return error.includes('Quota exceeded for quota metric'); + } + + if (isStructuredError(error)) { + return error.message.includes('Quota exceeded for quota metric'); + } + + if (isApiError(error)) { + return error.error.message.includes('Quota exceeded for quota metric'); + } + + return false; +} diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index a0294c31..f84d2004 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -357,7 +357,10 @@ describe('retryWithBackoff', () => { // Should fail with original error when fallback is rejected expect(result).toBeInstanceOf(Error); expect(result.message).toBe('Rate limit exceeded'); - expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal'); + expect(fallbackCallback).toHaveBeenCalledWith( + 'oauth-personal', + expect.any(Error), + ); }); it('should handle mixed error types (only count consecutive 429s)', async () => { diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index f3f5f2d2..01651950 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -5,13 +5,20 @@ */ import { AuthType } from '../core/contentGenerator.js'; +import { + isProQuotaExceededError, + isGenericQuotaExceededError, +} from './quotaErrorDetection.js'; export interface RetryOptions { maxAttempts: number; initialDelayMs: number; maxDelayMs: number; shouldRetry: (error: Error) => boolean; - onPersistent429?: (authType?: string) => Promise; + onPersistent429?: ( + authType?: string, + error?: unknown, + ) => Promise; authType?: string; } @@ -86,6 +93,53 @@ export async function retryWithBackoff( } catch (error) { const errorStatus = getErrorStatus(error); + // Check for Pro quota exceeded error first - immediate fallback for OAuth users + if ( + errorStatus === 429 && + authType === AuthType.LOGIN_WITH_GOOGLE && + isProQuotaExceededError(error) && + onPersistent429 + ) { + try { + const fallbackModel = await onPersistent429(authType, error); + if (fallbackModel) { + // Reset attempt counter and try with new model + attempt = 0; + consecutive429Count = 0; + currentDelay = initialDelayMs; + // With the model updated, we continue to the next attempt + continue; + } + } catch (fallbackError) { + // If fallback fails, continue with original error + console.warn('Fallback to Flash model failed:', fallbackError); + } + } + + // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users + if ( + errorStatus === 429 && + authType === AuthType.LOGIN_WITH_GOOGLE && + !isProQuotaExceededError(error) && + isGenericQuotaExceededError(error) && + onPersistent429 + ) { + try { + const fallbackModel = await onPersistent429(authType, error); + if (fallbackModel) { + // Reset attempt counter and try with new model + attempt = 0; + consecutive429Count = 0; + currentDelay = initialDelayMs; + // With the model updated, we continue to the next attempt + continue; + } + } catch (fallbackError) { + // If fallback fails, continue with original error + console.warn('Fallback to Flash model failed:', fallbackError); + } + } + // Track consecutive 429 errors if (errorStatus === 429) { consecutive429Count++; @@ -100,7 +154,7 @@ export async function retryWithBackoff( authType === AuthType.LOGIN_WITH_GOOGLE ) { try { - const fallbackModel = await onPersistent429(authType); + const fallbackModel = await onPersistent429(authType, error); if (fallbackModel) { // Reset attempt counter and try with new model attempt = 0;