Improve quota- and resource-related 429 error handling, also taking Code Assist customer tiers into consideration (#3609)

This commit is contained in:
Bryan Morgan 2025-07-09 10:18:15 -04:00 committed by GitHub
parent 8f2da86aa5
commit b0cce95286
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 611 additions and 63 deletions

View File

@ -67,6 +67,10 @@ import { useBracketedPaste } from './hooks/useBracketedPaste.js';
import { useTextBuffer } from './components/shared/text-buffer.js';
import * as fs from 'fs';
import { UpdateNotification } from './components/UpdateNotification.js';
import {
isProQuotaExceededError,
isGenericQuotaExceededError,
} from '@google/gemini-cli-core';
import { checkForUpdates } from './utils/updateCheck.js';
import ansiEscapes from 'ansi-escapes';
import { OverflowProvider } from './contexts/OverflowContext.js';
@ -243,15 +247,34 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
const flashFallbackHandler = async (
currentModel: string,
fallbackModel: string,
error?: unknown,
): Promise<boolean> => {
let message: string;
// Check if this is a Pro quota exceeded error
if (error && isProQuotaExceededError(error)) {
message = `⚡ You have reached your daily ${currentModel} quota limit.
Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
You can switch authentication methods by typing /auth`;
} else if (error && isGenericQuotaExceededError(error)) {
message = `⚡ You have reached your daily quota limit.
Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
You can switch authentication methods by typing /auth`;
} else {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Slow response times detected.
Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
}
// Add message to UI history
addItem(
{
type: MessageType.INFO,
text: `⚡ Slow response times detected. Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
To avoid this you can either upgrade to Standard tier. See: https://goo.gle/set-up-gemini-code-assist
Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
You can switch authentication methods by typing /auth`,
text: message,
},
Date.now(),
);

View File

@ -1097,6 +1097,7 @@ describe('useGeminiStream', () => {
getContentGeneratorConfig: vi.fn(() => ({
authType: mockAuthType,
})),
getModel: vi.fn(() => 'gemini-2.5-pro'),
} as unknown as Config;
const { result } = renderHook(() =>
@ -1125,6 +1126,9 @@ describe('useGeminiStream', () => {
expect(mockParseAndFormatApiError).toHaveBeenCalledWith(
'Rate limit exceeded',
mockAuthType,
undefined,
'gemini-2.5-pro',
'gemini-2.5-flash',
);
});
});

View File

@ -24,6 +24,7 @@ import {
ThoughtSummary,
UnauthorizedError,
UserPromptEvent,
DEFAULT_GEMINI_FLASH_MODEL,
} from '@google/gemini-cli-core';
import { type Part, type PartListUnion } from '@google/genai';
import {
@ -397,6 +398,9 @@ export const useGeminiStream = (
text: parseAndFormatApiError(
eventValue.error,
config.getContentGeneratorConfig().authType,
undefined,
config.getModel(),
DEFAULT_GEMINI_FLASH_MODEL,
),
},
userMessageTimestamp,
@ -533,6 +537,9 @@ export const useGeminiStream = (
text: parseAndFormatApiError(
getErrorMessage(error) || 'Unknown error',
config.getContentGeneratorConfig().authType,
undefined,
config.getModel(),
DEFAULT_GEMINI_FLASH_MODEL,
),
},
userMessageTimestamp,

View File

@ -6,10 +6,16 @@
import { describe, it, expect } from 'vitest';
import { parseAndFormatApiError } from './errorParsing.js';
import { AuthType, StructuredError } from '@google/gemini-cli-core';
import {
AuthType,
UserTierId,
DEFAULT_GEMINI_FLASH_MODEL,
isProQuotaExceededError,
} from '@google/gemini-cli-core';
describe('parseAndFormatApiError', () => {
const enterpriseMessage = 'upgrade to a plan with higher limits';
const _enterpriseMessage =
'upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits';
const vertexMessage = 'request a quota increase through Vertex';
const geminiMessage = 'request a quota increase through AI Studio';
@ -24,9 +30,17 @@ describe('parseAndFormatApiError', () => {
it('should format a 429 API error with the default message', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(errorMessage);
const result = parseAndFormatApiError(
errorMessage,
undefined,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain('Your request has been rate limited');
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
);
});
it('should format a 429 API error with the personal message', () => {
@ -35,9 +49,14 @@ describe('parseAndFormatApiError', () => {
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(enterpriseMessage);
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
);
});
it('should format a 429 API error with the vertex message', () => {
@ -116,4 +135,284 @@ describe('parseAndFormatApiError', () => {
const expected = '[API Error: An unknown error occurred.]';
expect(parseAndFormatApiError(error)).toBe(expected);
});
it('should format a 429 API error with Pro quota exceeded message for Google auth (Free tier)', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
);
expect(result).toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
expect(result).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should format a regular 429 API error with standard message for Google auth', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
);
expect(result).not.toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
});
it('should format a 429 API error with generic quota exceeded message for Google auth', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'GenerationRequests'",
);
expect(result).toContain('You have reached your daily quota limit');
expect(result).not.toContain(
'You have reached your daily Gemini 2.5 Pro quota limit',
);
});
it('should prioritize Pro quota message over generic quota message for Google auth', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
);
expect(result).toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
expect(result).not.toContain('You have reached your daily quota limit');
});
it('should format a 429 API error with Pro quota exceeded message for Google auth (Standard tier)', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
UserTierId.STANDARD,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
);
expect(result).toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
expect(result).toContain(
'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
);
expect(result).not.toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should format a 429 API error with Pro quota exceeded message for Google auth (Legacy tier)', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
UserTierId.LEGACY,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'",
);
expect(result).toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
expect(result).toContain(
'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
);
expect(result).not.toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
const errorMessage15 =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessagePreview =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessageBeta =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessageExperimental =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result15 = parseAndFormatApiError(
errorMessage15,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-1.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultPreview = parseAndFormatApiError(
errorMessagePreview,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-2.5-preview-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultBeta = parseAndFormatApiError(
errorMessageBeta,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-beta-3.0-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultExperimental = parseAndFormatApiError(
errorMessageExperimental,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-experimental-v2-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result15).toContain(
'You have reached your daily gemini-1.5-pro quota limit',
);
expect(resultPreview).toContain(
'You have reached your daily gemini-2.5-preview-pro quota limit',
);
expect(resultBeta).toContain(
'You have reached your daily gemini-beta-3.0-pro quota limit',
);
expect(resultExperimental).toContain(
'You have reached your daily gemini-experimental-v2-pro quota limit',
);
expect(result15).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultPreview).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultBeta).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultExperimental).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should not match non-Pro models with similar version strings', () => {
// Test that Flash models with similar version strings don't match
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini 2.5 Flash Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
),
).toBe(false);
// Test other model types
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini 2.5 Ultra Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini 2.5 Standard Requests' and limit",
),
).toBe(false);
// Test generic quota messages
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'GenerationRequests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'EmbeddingRequests' and limit",
),
).toBe(false);
});
it('should format a generic quota exceeded message for Google auth (Standard tier)', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'GenerationRequests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
UserTierId.STANDARD,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain(
"[API Error: Quota exceeded for quota metric 'GenerationRequests'",
);
expect(result).toContain('You have reached your daily quota limit');
expect(result).toContain(
'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
);
expect(result).not.toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should format a regular 429 API error with standard message for Google auth (Standard tier)', () => {
const errorMessage =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Rate limit exceeded","status":"RESOURCE_EXHAUSTED"}}';
const result = parseAndFormatApiError(
errorMessage,
AuthType.LOGIN_WITH_GOOGLE,
UserTierId.STANDARD,
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
'We appreciate you for choosing Gemini Code Assist and the Gemini CLI',
);
expect(result).not.toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
});

View File

@ -4,66 +4,118 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { AuthType, StructuredError } from '@google/gemini-cli-core';
import {
AuthType,
UserTierId,
DEFAULT_GEMINI_FLASH_MODEL,
DEFAULT_GEMINI_MODEL,
isProQuotaExceededError,
isGenericQuotaExceededError,
isApiError,
isStructuredError,
} from '@google/gemini-cli-core';
const RATE_LIMIT_ERROR_MESSAGE_GOOGLE =
'\nPlease wait and try again later. To increase your limits, upgrade to a plan with higher limits, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey';
// Free Tier message functions
const getRateLimitErrorMessageGoogleFree = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
const getRateLimitErrorMessageGoogleProQuotaFree = (
currentModel: string = DEFAULT_GEMINI_MODEL,
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
`\nYou have reached your daily quota limit. To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist, or use /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
// Legacy/Standard Tier message functions
const getRateLimitErrorMessageGooglePaid = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
const getRateLimitErrorMessageGoogleProQuotaPaid = (
currentModel: string = DEFAULT_GEMINI_MODEL,
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nYou have reached your daily ${currentModel} quota limit. You will be switched to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
const getRateLimitErrorMessageGoogleGenericQuotaPaid = (
currentModel: string = DEFAULT_GEMINI_MODEL,
) =>
`\nYou have reached your daily quota limit. We appreciate you for choosing Gemini Code Assist and the Gemini CLI. To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
const RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI =
'\nPlease wait and try again later. To increase your limits, request a quota increase through AI Studio, or switch to another /auth method';
const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
'\nPlease wait and try again later. To increase your limits, request a quota increase through Vertex, or switch to another /auth method';
const RATE_LIMIT_ERROR_MESSAGE_DEFAULT =
'Your request has been rate limited. Please wait and try again later.';
const getRateLimitErrorMessageDefault = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
export interface ApiError {
error: {
code: number;
message: string;
status: string;
details: unknown[];
};
}
function isApiError(error: unknown): error is ApiError {
return (
typeof error === 'object' &&
error !== null &&
'error' in error &&
typeof (error as ApiError).error === 'object' &&
'message' in (error as ApiError).error
);
}
function isStructuredError(error: unknown): error is StructuredError {
return (
typeof error === 'object' &&
error !== null &&
'message' in error &&
typeof (error as StructuredError).message === 'string'
);
}
function getRateLimitMessage(authType?: AuthType): string {
function getRateLimitMessage(
authType?: AuthType,
error?: unknown,
userTier?: UserTierId,
currentModel?: string,
fallbackModel?: string,
): string {
switch (authType) {
case AuthType.LOGIN_WITH_GOOGLE:
return RATE_LIMIT_ERROR_MESSAGE_GOOGLE;
case AuthType.LOGIN_WITH_GOOGLE: {
// Determine if user is on a paid tier (Legacy or Standard) - default to FREE if not specified
const isPaidTier =
userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
if (isProQuotaExceededError(error)) {
return isPaidTier
? getRateLimitErrorMessageGoogleProQuotaPaid(
currentModel || DEFAULT_GEMINI_MODEL,
fallbackModel,
)
: getRateLimitErrorMessageGoogleProQuotaFree(
currentModel || DEFAULT_GEMINI_MODEL,
fallbackModel,
);
} else if (isGenericQuotaExceededError(error)) {
return isPaidTier
? getRateLimitErrorMessageGoogleGenericQuotaPaid(
currentModel || DEFAULT_GEMINI_MODEL,
)
: getRateLimitErrorMessageGoogleGenericQuotaFree();
} else {
return isPaidTier
? getRateLimitErrorMessageGooglePaid(fallbackModel)
: getRateLimitErrorMessageGoogleFree(fallbackModel);
}
}
case AuthType.USE_GEMINI:
return RATE_LIMIT_ERROR_MESSAGE_USE_GEMINI;
case AuthType.USE_VERTEX_AI:
return RATE_LIMIT_ERROR_MESSAGE_VERTEX;
default:
return RATE_LIMIT_ERROR_MESSAGE_DEFAULT;
return getRateLimitErrorMessageDefault(fallbackModel);
}
}
export function parseAndFormatApiError(
error: unknown,
authType?: AuthType,
userTier?: UserTierId,
currentModel?: string,
fallbackModel?: string,
): string {
if (isStructuredError(error)) {
let text = `[API Error: ${error.message}]`;
if (error.status === 429) {
text += getRateLimitMessage(authType);
text += getRateLimitMessage(
authType,
error,
userTier,
currentModel,
fallbackModel,
);
}
return text;
}
@ -92,7 +144,13 @@ export function parseAndFormatApiError(
}
let text = `[API Error: ${finalMessage} (Status: ${parsedError.error.status})]`;
if (parsedError.error.code === 429) {
text += getRateLimitMessage(authType);
text += getRateLimitMessage(
authType,
parsedError,
userTier,
currentModel,
fallbackModel,
);
}
return text;
}

View File

@ -103,6 +103,7 @@ export interface SandboxConfig {
export type FlashFallbackHandler = (
currentModel: string,
fallbackModel: string,
error?: unknown,
) => Promise<boolean>;
export interface ConfigParameters {

View File

@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => {
expect(mockFallbackHandler).toHaveBeenCalledWith(
currentModel,
fallbackModel,
undefined,
);
});
});

View File

@ -323,8 +323,8 @@ export class GeminiClient {
});
const result = await retryWithBackoff(apiCall, {
onPersistent429: async (authType?: string) =>
await this.handleFlashFallback(authType),
onPersistent429: async (authType?: string, error?: unknown) =>
await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
@ -411,8 +411,8 @@ export class GeminiClient {
});
const result = await retryWithBackoff(apiCall, {
onPersistent429: async (authType?: string) =>
await this.handleFlashFallback(authType),
onPersistent429: async (authType?: string, error?: unknown) =>
await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
return result;
@ -559,7 +559,10 @@ export class GeminiClient {
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
private async handleFlashFallback(authType?: string): Promise<string | null> {
private async handleFlashFallback(
authType?: string,
error?: unknown,
): Promise<string | null> {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
@ -577,7 +580,11 @@ export class GeminiClient {
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
const accepted = await fallbackHandler(currentModel, fallbackModel);
const accepted = await fallbackHandler(
currentModel,
fallbackModel,
error,
);
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;

View File

@ -191,7 +191,10 @@ export class GeminiChat {
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
private async handleFlashFallback(authType?: string): Promise<string | null> {
private async handleFlashFallback(
authType?: string,
error?: unknown,
): Promise<string | null> {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
@ -209,7 +212,11 @@ export class GeminiChat {
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
const accepted = await fallbackHandler(currentModel, fallbackModel);
const accepted = await fallbackHandler(
currentModel,
fallbackModel,
error,
);
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;
@ -270,8 +277,8 @@ export class GeminiChat {
}
return false;
},
onPersistent429: async (authType?: string) =>
await this.handleFlashFallback(authType),
onPersistent429: async (authType?: string, error?: unknown) =>
await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});
const durationMs = Date.now() - startTime;
@ -367,8 +374,8 @@ export class GeminiChat {
}
return false; // Don't retry other errors by default
},
onPersistent429: async (authType?: string) =>
await this.handleFlashFallback(authType),
onPersistent429: async (authType?: string, error?: unknown) =>
await this.handleFlashFallback(authType, error),
authType: this.config.getContentGeneratorConfig()?.authType,
});

View File

@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js';
export * from './utils/memoryDiscovery.js';
export * from './utils/gitIgnoreParser.js';
export * from './utils/editor.js';
export * from './utils/quotaErrorDetection.js';
// Export services
export * from './services/fileDiscoveryService.js';

View File

@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
expect(mockFallbackHandler).toHaveBeenCalledWith(
AuthType.LOGIN_WITH_GOOGLE,
expect.any(Error),
);
expect(result).toBe('success after fallback');
// Should have: 2 failures, then fallback triggered, then 1 success after retry reset

View File

@ -0,0 +1,82 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export interface ApiError {
error: {
code: number;
message: string;
status: string;
details: unknown[];
};
}
interface StructuredError {
message: string;
status?: number;
}
export function isApiError(error: unknown): error is ApiError {
return (
typeof error === 'object' &&
error !== null &&
'error' in error &&
typeof (error as ApiError).error === 'object' &&
'message' in (error as ApiError).error
);
}
export function isStructuredError(error: unknown): error is StructuredError {
return (
typeof error === 'object' &&
error !== null &&
'message' in error &&
typeof (error as StructuredError).message === 'string'
);
}
export function isProQuotaExceededError(error: unknown): boolean {
// Check for Pro quota exceeded errors by looking for the specific pattern
// This will match patterns like:
// - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
// We use string methods instead of regex to avoid ReDoS vulnerabilities
const checkMessage = (message: string): boolean =>
message.includes("Quota exceeded for quota metric 'Gemini") &&
message.includes("Pro Requests'");
if (typeof error === 'string') {
return checkMessage(error);
}
if (isStructuredError(error)) {
return checkMessage(error.message);
}
if (isApiError(error)) {
return checkMessage(error.error.message);
}
return false;
}
export function isGenericQuotaExceededError(error: unknown): boolean {
if (typeof error === 'string') {
return error.includes('Quota exceeded for quota metric');
}
if (isStructuredError(error)) {
return error.message.includes('Quota exceeded for quota metric');
}
if (isApiError(error)) {
return error.error.message.includes('Quota exceeded for quota metric');
}
return false;
}

View File

@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
// Should fail with original error when fallback is rejected
expect(result).toBeInstanceOf(Error);
expect(result.message).toBe('Rate limit exceeded');
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
expect(fallbackCallback).toHaveBeenCalledWith(
'oauth-personal',
expect.any(Error),
);
});
it('should handle mixed error types (only count consecutive 429s)', async () => {

View File

@ -5,13 +5,20 @@
*/
import { AuthType } from '../core/contentGenerator.js';
import {
isProQuotaExceededError,
isGenericQuotaExceededError,
} from './quotaErrorDetection.js';
export interface RetryOptions {
maxAttempts: number;
initialDelayMs: number;
maxDelayMs: number;
shouldRetry: (error: Error) => boolean;
onPersistent429?: (authType?: string) => Promise<string | null>;
onPersistent429?: (
authType?: string,
error?: unknown,
) => Promise<string | null>;
authType?: string;
}
@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
} catch (error) {
const errorStatus = getErrorStatus(error);
// Check for Pro quota exceeded error first - immediate fallback for OAuth users
if (
errorStatus === 429 &&
authType === AuthType.LOGIN_WITH_GOOGLE &&
isProQuotaExceededError(error) &&
onPersistent429
) {
try {
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;
consecutive429Count = 0;
currentDelay = initialDelayMs;
// With the model updated, we continue to the next attempt
continue;
}
} catch (fallbackError) {
// If fallback fails, continue with original error
console.warn('Fallback to Flash model failed:', fallbackError);
}
}
// Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
if (
errorStatus === 429 &&
authType === AuthType.LOGIN_WITH_GOOGLE &&
!isProQuotaExceededError(error) &&
isGenericQuotaExceededError(error) &&
onPersistent429
) {
try {
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;
consecutive429Count = 0;
currentDelay = initialDelayMs;
// With the model updated, we continue to the next attempt
continue;
}
} catch (fallbackError) {
// If fallback fails, continue with original error
console.warn('Fallback to Flash model failed:', fallbackError);
}
}
// Track consecutive 429 errors
if (errorStatus === 429) {
consecutive429Count++;
@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
authType === AuthType.LOGIN_WITH_GOOGLE
) {
try {
const fallbackModel = await onPersistent429(authType);
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;