diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 833cc2b5..e3a5eb55 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js'; import { isProQuotaExceededError, isGenericQuotaExceededError, + UserTierId, } from '@google/gemini-cli-core'; import { checkForUpdates } from './utils/updateCheck.js'; import ansiEscapes from 'ansi-escapes'; @@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { const ctrlDTimerRef = useRef(null); const [constrainHeight, setConstrainHeight] = useState(true); const [showPrivacyNotice, setShowPrivacyNotice] = useState(false); + const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] = + useState(false); const openPrivacyNotice = useCallback(() => { setShowPrivacyNotice(true); @@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { ): Promise => { let message: string; + // For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users + // TODO: Get actual user tier from config when available + const userTier = undefined; // Defaults to FREE tier behavior + const isPaidTier = + userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD; + // Check if this is a Pro quota exceeded error if (error && isProQuotaExceededError(error)) { - message = `⚡ You have reached your daily ${currentModel} quota limit. + if (isPaidTier) { + message = `⚡ You have reached your daily ${currentModel} quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + message = `⚡ You have reached your daily ${currentModel} quota limit. ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; + } } else if (error && isGenericQuotaExceededError(error)) { - message = `⚡ You have reached your daily quota limit. + if (isPaidTier) { + message = `⚡ You have reached your daily quota limit. +⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + message = `⚡ You have reached your daily quota limit. ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session. ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key ⚡ You can switch authentication methods by typing /auth`; + } } else { - // Default fallback message for other cases (like consecutive 429s) - message = `⚡ Slow response times detected. -⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`; + if (isPaidTier) { + // Default fallback message for other cases (like consecutive 429s) + message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session. +⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit +⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`; + } else { + // Default fallback message for other cases (like consecutive 429s) + message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session. +⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit +⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist +⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key +⚡ You can switch authentication methods by typing /auth`; + } } // Add message to UI history @@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { }, Date.now(), ); - return true; // Always accept the fallback + + // Set the flag to prevent tool continuation + setModelSwitchedFromQuotaError(true); + // Set global quota error flag to prevent Flash model calls + config.setQuotaErrorOccurred(true); + // Switch model for future use but return false to stop current retry + config.setModel(fallbackModel); + return false; // Don't continue with current prompt }; config.setFlashFallbackHandler(flashFallbackHandler); @@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => { getPreferredEditor, onAuthError, performMemoryRefresh, + modelSwitchedFromQuotaError, + setModelSwitchedFromQuotaError, ); pendingHistoryItems.push(...pendingGeminiHistoryItems); const { elapsedTime, currentLoadingPhrase } = diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index fc6f93c5..62ade50f 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -301,6 +301,8 @@ describe('useGeminiStream', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, addHistory: vi.fn(), + setQuotaErrorOccurred: vi.fn(), + getQuotaErrorOccurred: vi.fn(() => false), } as unknown as Config; mockOnDebugMessage = vi.fn(); mockHandleSlashCommand = vi.fn().mockResolvedValue(false); @@ -386,6 +388,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ); }, { @@ -518,6 +522,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -582,6 +588,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -675,6 +683,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -775,6 +785,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); @@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, mockPerformMemoryRefresh, + false, + () => {}, ), ); @@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => { () => 'vscode' as EditorType, () => {}, () => Promise.resolve(), + false, + () => {}, ), ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 550cab86..d32c9ffa 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -90,6 +90,8 @@ export const useGeminiStream = ( getPreferredEditor: () => EditorType | undefined, onAuthError: () => void, performMemoryRefresh: () => Promise, + modelSwitchedFromQuotaError: boolean, + setModelSwitchedFromQuotaError: React.Dispatch>, ) => { const [initError, setInitError] = useState(null); const abortControllerRef = useRef(null); @@ -494,6 +496,12 @@ export const useGeminiStream = ( const userMessageTimestamp = Date.now(); setShowHelp(false); + // Reset quota error flag when starting a new query (not a continuation) + if (!options?.isContinuation) { + setModelSwitchedFromQuotaError(false); + config.setQuotaErrorOccurred(false); + } + abortControllerRef.current = new AbortController(); const abortSignal = abortControllerRef.current.signal; turnCancelledRef.current = false; @@ -552,6 +560,7 @@ export const useGeminiStream = ( [ streamingState, setShowHelp, + setModelSwitchedFromQuotaError, prepareQueryForGemini, processGeminiStreamEvents, pendingHistoryItemRef, @@ -668,6 +677,12 @@ export const useGeminiStream = ( ); markToolsAsSubmitted(callIdsToMarkAsSubmitted); + + // Don't continue if model was switched due to quota error + if (modelSwitchedFromQuotaError) { + return; + } + submitQuery(mergePartListUnions(responsesToSend), { isContinuation: true, }); @@ -678,6 +693,7 @@ export const useGeminiStream = ( markToolsAsSubmitted, geminiClient, performMemoryRefresh, + modelSwitchedFromQuotaError, ], ); diff --git a/packages/cli/src/ui/utils/errorParsing.test.ts b/packages/cli/src/ui/utils/errorParsing.test.ts index 3d228efb..770dffad 100644 --- a/packages/cli/src/ui/utils/errorParsing.test.ts +++ b/packages/cli/src/ui/utils/errorParsing.test.ts @@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); }); @@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); }); @@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => { ); expect(result).toContain('[API Error: Rate limit exceeded'); expect(result).toContain( - 'Slow response times detected. Switching to the gemini-2.5-flash model', + 'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model', ); expect(result).not.toContain( 'You have reached your daily gemini-2.5-pro quota limit', @@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => { ); }); - it('should handle different Gemini version strings in Pro quota exceeded errors', () => { - const errorMessage15 = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; + it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => { + const errorMessage25 = + 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; const errorMessagePreview = 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const errorMessageBeta = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const errorMessageExperimental = - 'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}'; - const result15 = parseAndFormatApiError( - errorMessage15, + const result25 = parseAndFormatApiError( + errorMessage25, AuthType.LOGIN_WITH_GOOGLE, undefined, - 'gemini-1.5-pro', + 'gemini-2.5-pro', DEFAULT_GEMINI_FLASH_MODEL, ); const resultPreview = parseAndFormatApiError( @@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => { 'gemini-2.5-preview-pro', DEFAULT_GEMINI_FLASH_MODEL, ); - const resultBeta = parseAndFormatApiError( - errorMessageBeta, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-beta-3.0-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - const resultExperimental = parseAndFormatApiError( - errorMessageExperimental, - AuthType.LOGIN_WITH_GOOGLE, - undefined, - 'gemini-experimental-v2-pro', - DEFAULT_GEMINI_FLASH_MODEL, - ); - expect(result15).toContain( - 'You have reached your daily gemini-1.5-pro quota limit', + expect(result25).toContain( + 'You have reached your daily gemini-2.5-pro quota limit', ); expect(resultPreview).toContain( 'You have reached your daily gemini-2.5-preview-pro quota limit', ); - expect(resultBeta).toContain( - 'You have reached your daily gemini-beta-3.0-pro quota limit', - ); - expect(resultExperimental).toContain( - 'You have reached your daily gemini-experimental-v2-pro quota limit', - ); - expect(result15).toContain( + expect(result25).toContain( 'upgrade to a Gemini Code Assist Standard or Enterprise plan', ); expect(resultPreview).toContain( 'upgrade to a Gemini Code Assist Standard or Enterprise plan', ); - expect(resultBeta).toContain( - 'upgrade to a Gemini Code Assist Standard or Enterprise plan', - ); - expect(resultExperimental).toContain( - 'upgrade to a Gemini Code Assist Standard or Enterprise plan', - ); }); it('should not match non-Pro models with similar version strings', () => { @@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => { "Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit", ), ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit", - ), - ).toBe(false); - expect( - isProQuotaExceededError( - "Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit", - ), - ).toBe(false); // Test other model types expect( diff --git a/packages/cli/src/ui/utils/errorParsing.ts b/packages/cli/src/ui/utils/errorParsing.ts index 555d5e4e..5031bc0a 100644 --- a/packages/cli/src/ui/utils/errorParsing.ts +++ b/packages/cli/src/ui/utils/errorParsing.ts @@ -19,7 +19,7 @@ import { const getRateLimitErrorMessageGoogleFree = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; const getRateLimitErrorMessageGoogleProQuotaFree = ( currentModel: string = DEFAULT_GEMINI_MODEL, @@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () => const getRateLimitErrorMessageGooglePaid = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`; const getRateLimitErrorMessageGoogleProQuotaPaid = ( currentModel: string = DEFAULT_GEMINI_MODEL, @@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX = const getRateLimitErrorMessageDefault = ( fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL, ) => - `\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; + `\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`; function getRateLimitMessage( authType?: AuthType, diff --git a/packages/core/src/code_assist/server.ts b/packages/core/src/code_assist/server.ts index 06ce0341..01fd2462 100644 --- a/packages/core/src/code_assist/server.ts +++ b/packages/core/src/code_assist/server.ts @@ -31,7 +31,23 @@ import { toCountTokenRequest, toGenerateContentRequest, } from './converter.js'; -import { PassThrough } from 'node:stream'; +import { Readable } from 'node:stream'; + +interface ErrorData { + error?: { + message?: string; + }; +} + +interface GaxiosResponse { + status: number; + data: unknown; +} + +interface StreamError extends Error { + status?: number; + response?: GaxiosResponse; +} /** HTTP options to be used in each of the requests. */ export interface HttpOptions { @@ -177,8 +193,45 @@ export class CodeAssistServer implements ContentGenerator { }); return (async function* (): AsyncGenerator { + // Convert ReadableStream to Node.js stream if needed + let nodeStream: NodeJS.ReadableStream; + + if (res.data instanceof ReadableStream) { + // Convert Web ReadableStream to Node.js Readable stream + // eslint-disable-next-line @typescript-eslint/no-explicit-any + nodeStream = Readable.fromWeb(res.data as any); + } else if ( + res.data && + typeof (res.data as NodeJS.ReadableStream).on === 'function' + ) { + // Already a Node.js stream + nodeStream = res.data as NodeJS.ReadableStream; + } else { + // If res.data is not a stream, it might be an error response + // Try to extract error information from the response + let errorMessage = + 'Response data is not a readable stream. This may indicate a server error or quota issue.'; + + if (res.data && typeof res.data === 'object') { + // Check if this is an error response with error details + const errorData = res.data as ErrorData; + if (errorData.error?.message) { + errorMessage = errorData.error.message; + } else if (typeof errorData === 'string') { + errorMessage = errorData; + } + } + + // Create an error that looks like a quota error if it contains quota information + const error: StreamError = new Error(errorMessage); + // Add status and response properties so it can be properly handled by retry logic + error.status = res.status; + error.response = res; + throw error; + } + const rl = readline.createInterface({ - input: res.data as PassThrough, + input: nodeStream, crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b0659a9d..51915fc8 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -104,7 +104,7 @@ export type FlashFallbackHandler = ( currentModel: string, fallbackModel: string, error?: unknown, -) => Promise; +) => Promise; export interface ConfigParameters { sessionId: string; @@ -183,6 +183,7 @@ export class Config { private readonly listExtensions: boolean; private readonly _activeExtensions: ActiveExtension[]; flashFallbackHandler?: FlashFallbackHandler; + private quotaErrorOccurred: boolean = false; constructor(params: ConfigParameters) { this.sessionId = params.sessionId; @@ -304,6 +305,14 @@ export class Config { this.flashFallbackHandler = handler; } + setQuotaErrorOccurred(value: boolean): void { + this.quotaErrorOccurred = value; + } + + getQuotaErrorOccurred(): boolean { + return this.quotaErrorOccurred; + } + getEmbeddingModel(): string { return this.embeddingModel; } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 80680aca..cd77a3f7 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -178,6 +178,8 @@ describe('Gemini Client (client.ts)', () => { getProxy: vi.fn().mockReturnValue(undefined), getWorkingDir: vi.fn().mockReturnValue('/test/dir'), getFileService: vi.fn().mockReturnValue(fileService), + getQuotaErrorOccurred: vi.fn().mockReturnValue(false), + setQuotaErrorOccurred: vi.fn(), }; return mock as unknown as Config; }); @@ -351,7 +353,7 @@ describe('Gemini Client (client.ts)', () => { await client.generateJson(contents, schema, abortSignal); expect(mockGenerateContentFn).toHaveBeenCalledWith({ - model: DEFAULT_GEMINI_FLASH_MODEL, + model: 'test-model', // Should use current model from config config: { abortSignal, systemInstruction: getCoreSystemPrompt(''), diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index b8996cbf..51aab961 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -262,6 +262,7 @@ export class GeminiClient { request: PartListUnion, signal: AbortSignal, turns: number = this.MAX_TURNS, + originalModel?: string, ): AsyncGenerator { // Ensure turns never exceeds MAX_TURNS to prevent infinite loops const boundedTurns = Math.min(turns, this.MAX_TURNS); @@ -269,6 +270,9 @@ export class GeminiClient { return new Turn(this.getChat()); } + // Track the original model from the first call to detect model switching + const initialModel = originalModel || this.config.getModel(); + const compressed = await this.tryCompressChat(); if (compressed) { yield { type: GeminiEventType.ChatCompressed, value: compressed }; @@ -279,6 +283,14 @@ export class GeminiClient { yield event; } if (!turn.pendingToolCalls.length && signal && !signal.aborted) { + // Check if model was switched during the call (likely due to quota error) + const currentModel = this.config.getModel(); + if (currentModel !== initialModel) { + // Model was switched (likely due to quota error fallback) + // Don't continue with recursive call to prevent unwanted Flash execution + return turn; + } + const nextSpeakerCheck = await checkNextSpeaker( this.getChat(), this, @@ -288,7 +300,12 @@ export class GeminiClient { const nextRequest = [{ text: 'Please continue.' }]; // This recursive call's events will be yielded out, but the final // turn object will be from the top-level call. - yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1); + yield* this.sendMessageStream( + nextRequest, + signal, + boundedTurns - 1, + initialModel, + ); } } return turn; @@ -298,9 +315,12 @@ export class GeminiClient { contents: Content[], schema: SchemaUnion, abortSignal: AbortSignal, - model: string = DEFAULT_GEMINI_FLASH_MODEL, + model?: string, config: GenerateContentConfig = {}, ): Promise> { + // Use current model from config instead of hardcoded Flash model + const modelToUse = + model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); @@ -312,7 +332,7 @@ export class GeminiClient { const apiCall = () => this.getContentGenerator().generateContent({ - model, + model: modelToUse, config: { ...requestConfig, systemInstruction, @@ -585,10 +605,14 @@ export class GeminiClient { fallbackModel, error, ); - if (accepted) { + if (accepted !== false && accepted !== null) { this.config.setModel(fallbackModel); return fallbackModel; } + // Check if the model was switched manually in the handler + if (this.config.getModel() === fallbackModel) { + return null; // Model was switched but don't continue with current prompt + } } catch (error) { console.warn('Flash fallback handler failed:', error); } diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index bfaeb8f6..35e6bf6c 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -43,6 +43,8 @@ describe('GeminiChat', () => { }), getModel: vi.fn().mockReturnValue('gemini-pro'), setModel: vi.fn(), + getQuotaErrorOccurred: vi.fn().mockReturnValue(false), + setQuotaErrorOccurred: vi.fn(), flashFallbackHandler: undefined, } as unknown as Config; diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 1be84f2e..2c149e93 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -217,10 +217,14 @@ export class GeminiChat { fallbackModel, error, ); - if (accepted) { + if (accepted !== false && accepted !== null) { this.config.setModel(fallbackModel); return fallbackModel; } + // Check if the model was switched manually in the handler + if (this.config.getModel() === fallbackModel) { + return null; // Model was switched but don't continue with current prompt + } } catch (error) { console.warn('Flash fallback handler failed:', error); } @@ -262,12 +266,25 @@ export class GeminiChat { let response: GenerateContentResponse; try { - const apiCall = () => - this.contentGenerator.generateContent({ - model: this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL, + const apiCall = () => { + const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; + + // Prevent Flash model calls immediately after quota error + if ( + this.config.getQuotaErrorOccurred() && + modelToUse === DEFAULT_GEMINI_FLASH_MODEL + ) { + throw new Error( + 'Please submit a new query to continue with the Flash model.', + ); + } + + return this.contentGenerator.generateContent({ + model: modelToUse, contents: requestContents, config: { ...this.generationConfig, ...params.config }, }); + }; response = await retryWithBackoff(apiCall, { shouldRetry: (error: Error) => { @@ -354,12 +371,25 @@ export class GeminiChat { const startTime = Date.now(); try { - const apiCall = () => - this.contentGenerator.generateContentStream({ - model: this.config.getModel(), + const apiCall = () => { + const modelToUse = this.config.getModel(); + + // Prevent Flash model calls immediately after quota error + if ( + this.config.getQuotaErrorOccurred() && + modelToUse === DEFAULT_GEMINI_FLASH_MODEL + ) { + throw new Error( + 'Please submit a new query to continue with the Flash model.', + ); + } + + return this.contentGenerator.generateContentStream({ + model: modelToUse, contents: requestContents, config: { ...this.generationConfig, ...params.config }, }); + }; // Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries // for transient issues internally before yielding the async generator, this retry will re-initiate diff --git a/packages/core/src/utils/editCorrector.test.ts b/packages/core/src/utils/editCorrector.test.ts index bcf75dfe..cf9008ef 100644 --- a/packages/core/src/utils/editCorrector.test.ts +++ b/packages/core/src/utils/editCorrector.test.ts @@ -214,6 +214,8 @@ describe('editCorrector', () => { setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => { configParams.alwaysSkipModificationConfirmation = skip; }), + getQuotaErrorOccurred: vi.fn().mockReturnValue(false), + setQuotaErrorOccurred: vi.fn(), } as unknown as Config; callCount = 0; @@ -654,6 +656,8 @@ describe('editCorrector', () => { setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => { configParams.alwaysSkipModificationConfirmation = skip; }), + getQuotaErrorOccurred: vi.fn().mockReturnValue(false), + setQuotaErrorOccurred: vi.fn(), } as unknown as Config; callCount = 0; diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts index ec77f5ee..a8e87a5d 100644 --- a/packages/core/src/utils/quotaErrorDetection.ts +++ b/packages/core/src/utils/quotaErrorDetection.ts @@ -41,14 +41,23 @@ export function isProQuotaExceededError(error: unknown): boolean { // Check for Pro quota exceeded errors by looking for the specific pattern // This will match patterns like: // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'" - // - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'" - // - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'" - // - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'" + // - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'" // We use string methods instead of regex to avoid ReDoS vulnerabilities - const checkMessage = (message: string): boolean => - message.includes("Quota exceeded for quota metric 'Gemini") && - message.includes("Pro Requests'"); + const checkMessage = (message: string): boolean => { + console.log('[DEBUG] isProQuotaExceededError checking message:', message); + const result = + message.includes("Quota exceeded for quota metric 'Gemini") && + message.includes("Pro Requests'"); + console.log('[DEBUG] isProQuotaExceededError result:', result); + return result; + }; + + // Log the full error object to understand its structure + console.log( + '[DEBUG] isProQuotaExceededError - full error object:', + JSON.stringify(error, null, 2), + ); if (typeof error === 'string') { return checkMessage(error); @@ -62,6 +71,38 @@ export function isProQuotaExceededError(error: unknown): boolean { return checkMessage(error.error.message); } + // Check if it's a Gaxios error with response data + if (error && typeof error === 'object' && 'response' in error) { + const gaxiosError = error as { + response?: { + data?: unknown; + }; + }; + if (gaxiosError.response && gaxiosError.response.data) { + console.log( + '[DEBUG] isProQuotaExceededError - checking response data:', + gaxiosError.response.data, + ); + if (typeof gaxiosError.response.data === 'string') { + return checkMessage(gaxiosError.response.data); + } + if ( + typeof gaxiosError.response.data === 'object' && + gaxiosError.response.data !== null && + 'error' in gaxiosError.response.data + ) { + const errorData = gaxiosError.response.data as { + error?: { message?: string }; + }; + return checkMessage(errorData.error?.message || ''); + } + } + } + + console.log( + '[DEBUG] isProQuotaExceededError - no matching error format for:', + error, + ); return false; } diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 01651950..e5d65751 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -18,7 +18,7 @@ export interface RetryOptions { onPersistent429?: ( authType?: string, error?: unknown, - ) => Promise; + ) => Promise; authType?: string; } @@ -102,13 +102,16 @@ export async function retryWithBackoff( ) { try { const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel) { + if (fallbackModel !== false && fallbackModel !== null) { // Reset attempt counter and try with new model attempt = 0; consecutive429Count = 0; currentDelay = initialDelayMs; // With the model updated, we continue to the next attempt continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; } } catch (fallbackError) { // If fallback fails, continue with original error @@ -126,13 +129,16 @@ export async function retryWithBackoff( ) { try { const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel) { + if (fallbackModel !== false && fallbackModel !== null) { // Reset attempt counter and try with new model attempt = 0; consecutive429Count = 0; currentDelay = initialDelayMs; // With the model updated, we continue to the next attempt continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; } } catch (fallbackError) { // If fallback fails, continue with original error @@ -155,13 +161,16 @@ export async function retryWithBackoff( ) { try { const fallbackModel = await onPersistent429(authType, error); - if (fallbackModel) { + if (fallbackModel !== false && fallbackModel !== null) { // Reset attempt counter and try with new model attempt = 0; consecutive429Count = 0; currentDelay = initialDelayMs; // With the model updated, we continue to the next attempt continue; + } else { + // Fallback handler returned null/false, meaning don't continue - stop retry process + throw error; } } catch (fallbackError) { // If fallback fails, continue with original error