Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)
Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
parent
01e756481f
commit
8a6509ffeb
|
@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js';
|
||||||
import {
|
import {
|
||||||
isProQuotaExceededError,
|
isProQuotaExceededError,
|
||||||
isGenericQuotaExceededError,
|
isGenericQuotaExceededError,
|
||||||
|
UserTierId,
|
||||||
} from '@google/gemini-cli-core';
|
} from '@google/gemini-cli-core';
|
||||||
import { checkForUpdates } from './utils/updateCheck.js';
|
import { checkForUpdates } from './utils/updateCheck.js';
|
||||||
import ansiEscapes from 'ansi-escapes';
|
import ansiEscapes from 'ansi-escapes';
|
||||||
|
@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||||
const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
|
const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
|
const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
|
||||||
const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
|
const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
|
||||||
|
const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
|
||||||
|
useState<boolean>(false);
|
||||||
|
|
||||||
const openPrivacyNotice = useCallback(() => {
|
const openPrivacyNotice = useCallback(() => {
|
||||||
setShowPrivacyNotice(true);
|
setShowPrivacyNotice(true);
|
||||||
|
@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||||
): Promise<boolean> => {
|
): Promise<boolean> => {
|
||||||
let message: string;
|
let message: string;
|
||||||
|
|
||||||
|
// For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users
|
||||||
|
// TODO: Get actual user tier from config when available
|
||||||
|
const userTier = undefined; // Defaults to FREE tier behavior
|
||||||
|
const isPaidTier =
|
||||||
|
userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
|
||||||
|
|
||||||
// Check if this is a Pro quota exceeded error
|
// Check if this is a Pro quota exceeded error
|
||||||
if (error && isProQuotaExceededError(error)) {
|
if (error && isProQuotaExceededError(error)) {
|
||||||
|
if (isPaidTier) {
|
||||||
|
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
||||||
|
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||||
|
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||||
|
} else {
|
||||||
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
||||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||||
⚡ You can switch authentication methods by typing /auth`;
|
⚡ You can switch authentication methods by typing /auth`;
|
||||||
|
}
|
||||||
} else if (error && isGenericQuotaExceededError(error)) {
|
} else if (error && isGenericQuotaExceededError(error)) {
|
||||||
|
if (isPaidTier) {
|
||||||
|
message = `⚡ You have reached your daily quota limit.
|
||||||
|
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||||
|
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||||
|
} else {
|
||||||
message = `⚡ You have reached your daily quota limit.
|
message = `⚡ You have reached your daily quota limit.
|
||||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||||
⚡ You can switch authentication methods by typing /auth`;
|
⚡ You can switch authentication methods by typing /auth`;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (isPaidTier) {
|
||||||
|
// Default fallback message for other cases (like consecutive 429s)
|
||||||
|
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
|
||||||
|
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
|
||||||
|
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||||
} else {
|
} else {
|
||||||
// Default fallback message for other cases (like consecutive 429s)
|
// Default fallback message for other cases (like consecutive 429s)
|
||||||
message = `⚡ Slow response times detected.
|
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
|
||||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
|
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
|
||||||
|
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||||
|
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||||
|
⚡ You can switch authentication methods by typing /auth`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add message to UI history
|
// Add message to UI history
|
||||||
|
@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||||
},
|
},
|
||||||
Date.now(),
|
Date.now(),
|
||||||
);
|
);
|
||||||
return true; // Always accept the fallback
|
|
||||||
|
// Set the flag to prevent tool continuation
|
||||||
|
setModelSwitchedFromQuotaError(true);
|
||||||
|
// Set global quota error flag to prevent Flash model calls
|
||||||
|
config.setQuotaErrorOccurred(true);
|
||||||
|
// Switch model for future use but return false to stop current retry
|
||||||
|
config.setModel(fallbackModel);
|
||||||
|
return false; // Don't continue with current prompt
|
||||||
};
|
};
|
||||||
|
|
||||||
config.setFlashFallbackHandler(flashFallbackHandler);
|
config.setFlashFallbackHandler(flashFallbackHandler);
|
||||||
|
@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||||
getPreferredEditor,
|
getPreferredEditor,
|
||||||
onAuthError,
|
onAuthError,
|
||||||
performMemoryRefresh,
|
performMemoryRefresh,
|
||||||
|
modelSwitchedFromQuotaError,
|
||||||
|
setModelSwitchedFromQuotaError,
|
||||||
);
|
);
|
||||||
pendingHistoryItems.push(...pendingGeminiHistoryItems);
|
pendingHistoryItems.push(...pendingGeminiHistoryItems);
|
||||||
const { elapsedTime, currentLoadingPhrase } =
|
const { elapsedTime, currentLoadingPhrase } =
|
||||||
|
|
|
@ -301,6 +301,8 @@ describe('useGeminiStream', () => {
|
||||||
getUsageStatisticsEnabled: () => true,
|
getUsageStatisticsEnabled: () => true,
|
||||||
getDebugMode: () => false,
|
getDebugMode: () => false,
|
||||||
addHistory: vi.fn(),
|
addHistory: vi.fn(),
|
||||||
|
setQuotaErrorOccurred: vi.fn(),
|
||||||
|
getQuotaErrorOccurred: vi.fn(() => false),
|
||||||
} as unknown as Config;
|
} as unknown as Config;
|
||||||
mockOnDebugMessage = vi.fn();
|
mockOnDebugMessage = vi.fn();
|
||||||
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
|
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
|
||||||
|
@ -386,6 +388,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -518,6 +522,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -582,6 +588,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -675,6 +683,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -775,6 +785,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
mockPerformMemoryRefresh,
|
mockPerformMemoryRefresh,
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => {
|
||||||
() => 'vscode' as EditorType,
|
() => 'vscode' as EditorType,
|
||||||
() => {},
|
() => {},
|
||||||
() => Promise.resolve(),
|
() => Promise.resolve(),
|
||||||
|
false,
|
||||||
|
() => {},
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,8 @@ export const useGeminiStream = (
|
||||||
getPreferredEditor: () => EditorType | undefined,
|
getPreferredEditor: () => EditorType | undefined,
|
||||||
onAuthError: () => void,
|
onAuthError: () => void,
|
||||||
performMemoryRefresh: () => Promise<void>,
|
performMemoryRefresh: () => Promise<void>,
|
||||||
|
modelSwitchedFromQuotaError: boolean,
|
||||||
|
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
|
||||||
) => {
|
) => {
|
||||||
const [initError, setInitError] = useState<string | null>(null);
|
const [initError, setInitError] = useState<string | null>(null);
|
||||||
const abortControllerRef = useRef<AbortController | null>(null);
|
const abortControllerRef = useRef<AbortController | null>(null);
|
||||||
|
@ -494,6 +496,12 @@ export const useGeminiStream = (
|
||||||
const userMessageTimestamp = Date.now();
|
const userMessageTimestamp = Date.now();
|
||||||
setShowHelp(false);
|
setShowHelp(false);
|
||||||
|
|
||||||
|
// Reset quota error flag when starting a new query (not a continuation)
|
||||||
|
if (!options?.isContinuation) {
|
||||||
|
setModelSwitchedFromQuotaError(false);
|
||||||
|
config.setQuotaErrorOccurred(false);
|
||||||
|
}
|
||||||
|
|
||||||
abortControllerRef.current = new AbortController();
|
abortControllerRef.current = new AbortController();
|
||||||
const abortSignal = abortControllerRef.current.signal;
|
const abortSignal = abortControllerRef.current.signal;
|
||||||
turnCancelledRef.current = false;
|
turnCancelledRef.current = false;
|
||||||
|
@ -552,6 +560,7 @@ export const useGeminiStream = (
|
||||||
[
|
[
|
||||||
streamingState,
|
streamingState,
|
||||||
setShowHelp,
|
setShowHelp,
|
||||||
|
setModelSwitchedFromQuotaError,
|
||||||
prepareQueryForGemini,
|
prepareQueryForGemini,
|
||||||
processGeminiStreamEvents,
|
processGeminiStreamEvents,
|
||||||
pendingHistoryItemRef,
|
pendingHistoryItemRef,
|
||||||
|
@ -668,6 +677,12 @@ export const useGeminiStream = (
|
||||||
);
|
);
|
||||||
|
|
||||||
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
|
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
|
||||||
|
|
||||||
|
// Don't continue if model was switched due to quota error
|
||||||
|
if (modelSwitchedFromQuotaError) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
submitQuery(mergePartListUnions(responsesToSend), {
|
submitQuery(mergePartListUnions(responsesToSend), {
|
||||||
isContinuation: true,
|
isContinuation: true,
|
||||||
});
|
});
|
||||||
|
@ -678,6 +693,7 @@ export const useGeminiStream = (
|
||||||
markToolsAsSubmitted,
|
markToolsAsSubmitted,
|
||||||
geminiClient,
|
geminiClient,
|
||||||
performMemoryRefresh,
|
performMemoryRefresh,
|
||||||
|
modelSwitchedFromQuotaError,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => {
|
||||||
);
|
);
|
||||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||||
expect(result).toContain(
|
expect(result).toContain(
|
||||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => {
|
||||||
);
|
);
|
||||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||||
expect(result).toContain(
|
expect(result).toContain(
|
||||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => {
|
||||||
);
|
);
|
||||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||||
expect(result).toContain(
|
expect(result).toContain(
|
||||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||||
);
|
);
|
||||||
expect(result).not.toContain(
|
expect(result).not.toContain(
|
||||||
'You have reached your daily gemini-2.5-pro quota limit',
|
'You have reached your daily gemini-2.5-pro quota limit',
|
||||||
|
@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
|
it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
|
||||||
const errorMessage15 =
|
const errorMessage25 =
|
||||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||||
const errorMessagePreview =
|
const errorMessagePreview =
|
||||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||||
const errorMessageBeta =
|
|
||||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
|
||||||
const errorMessageExperimental =
|
|
||||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
|
||||||
|
|
||||||
const result15 = parseAndFormatApiError(
|
const result25 = parseAndFormatApiError(
|
||||||
errorMessage15,
|
errorMessage25,
|
||||||
AuthType.LOGIN_WITH_GOOGLE,
|
AuthType.LOGIN_WITH_GOOGLE,
|
||||||
undefined,
|
undefined,
|
||||||
'gemini-1.5-pro',
|
'gemini-2.5-pro',
|
||||||
DEFAULT_GEMINI_FLASH_MODEL,
|
DEFAULT_GEMINI_FLASH_MODEL,
|
||||||
);
|
);
|
||||||
const resultPreview = parseAndFormatApiError(
|
const resultPreview = parseAndFormatApiError(
|
||||||
|
@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => {
|
||||||
'gemini-2.5-preview-pro',
|
'gemini-2.5-preview-pro',
|
||||||
DEFAULT_GEMINI_FLASH_MODEL,
|
DEFAULT_GEMINI_FLASH_MODEL,
|
||||||
);
|
);
|
||||||
const resultBeta = parseAndFormatApiError(
|
|
||||||
errorMessageBeta,
|
|
||||||
AuthType.LOGIN_WITH_GOOGLE,
|
|
||||||
undefined,
|
|
||||||
'gemini-beta-3.0-pro',
|
|
||||||
DEFAULT_GEMINI_FLASH_MODEL,
|
|
||||||
);
|
|
||||||
const resultExperimental = parseAndFormatApiError(
|
|
||||||
errorMessageExperimental,
|
|
||||||
AuthType.LOGIN_WITH_GOOGLE,
|
|
||||||
undefined,
|
|
||||||
'gemini-experimental-v2-pro',
|
|
||||||
DEFAULT_GEMINI_FLASH_MODEL,
|
|
||||||
);
|
|
||||||
|
|
||||||
expect(result15).toContain(
|
expect(result25).toContain(
|
||||||
'You have reached your daily gemini-1.5-pro quota limit',
|
'You have reached your daily gemini-2.5-pro quota limit',
|
||||||
);
|
);
|
||||||
expect(resultPreview).toContain(
|
expect(resultPreview).toContain(
|
||||||
'You have reached your daily gemini-2.5-preview-pro quota limit',
|
'You have reached your daily gemini-2.5-preview-pro quota limit',
|
||||||
);
|
);
|
||||||
expect(resultBeta).toContain(
|
expect(result25).toContain(
|
||||||
'You have reached your daily gemini-beta-3.0-pro quota limit',
|
|
||||||
);
|
|
||||||
expect(resultExperimental).toContain(
|
|
||||||
'You have reached your daily gemini-experimental-v2-pro quota limit',
|
|
||||||
);
|
|
||||||
expect(result15).toContain(
|
|
||||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||||
);
|
);
|
||||||
expect(resultPreview).toContain(
|
expect(resultPreview).toContain(
|
||||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||||
);
|
);
|
||||||
expect(resultBeta).toContain(
|
|
||||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
|
||||||
);
|
|
||||||
expect(resultExperimental).toContain(
|
|
||||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not match non-Pro models with similar version strings', () => {
|
it('should not match non-Pro models with similar version strings', () => {
|
||||||
|
@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => {
|
||||||
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
|
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
|
||||||
),
|
),
|
||||||
).toBe(false);
|
).toBe(false);
|
||||||
expect(
|
|
||||||
isProQuotaExceededError(
|
|
||||||
"Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
|
|
||||||
),
|
|
||||||
).toBe(false);
|
|
||||||
expect(
|
|
||||||
isProQuotaExceededError(
|
|
||||||
"Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
|
|
||||||
),
|
|
||||||
).toBe(false);
|
|
||||||
|
|
||||||
// Test other model types
|
// Test other model types
|
||||||
expect(
|
expect(
|
||||||
|
|
|
@ -19,7 +19,7 @@ import {
|
||||||
const getRateLimitErrorMessageGoogleFree = (
|
const getRateLimitErrorMessageGoogleFree = (
|
||||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||||
) =>
|
) =>
|
||||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||||
|
|
||||||
const getRateLimitErrorMessageGoogleProQuotaFree = (
|
const getRateLimitErrorMessageGoogleProQuotaFree = (
|
||||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||||
|
@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
|
||||||
const getRateLimitErrorMessageGooglePaid = (
|
const getRateLimitErrorMessageGooglePaid = (
|
||||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||||
) =>
|
) =>
|
||||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
||||||
|
|
||||||
const getRateLimitErrorMessageGoogleProQuotaPaid = (
|
const getRateLimitErrorMessageGoogleProQuotaPaid = (
|
||||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||||
|
@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
|
||||||
const getRateLimitErrorMessageDefault = (
|
const getRateLimitErrorMessageDefault = (
|
||||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||||
) =>
|
) =>
|
||||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||||
|
|
||||||
function getRateLimitMessage(
|
function getRateLimitMessage(
|
||||||
authType?: AuthType,
|
authType?: AuthType,
|
||||||
|
|
|
@ -31,7 +31,23 @@ import {
|
||||||
toCountTokenRequest,
|
toCountTokenRequest,
|
||||||
toGenerateContentRequest,
|
toGenerateContentRequest,
|
||||||
} from './converter.js';
|
} from './converter.js';
|
||||||
import { PassThrough } from 'node:stream';
|
import { Readable } from 'node:stream';
|
||||||
|
|
||||||
|
interface ErrorData {
|
||||||
|
error?: {
|
||||||
|
message?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GaxiosResponse {
|
||||||
|
status: number;
|
||||||
|
data: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StreamError extends Error {
|
||||||
|
status?: number;
|
||||||
|
response?: GaxiosResponse;
|
||||||
|
}
|
||||||
|
|
||||||
/** HTTP options to be used in each of the requests. */
|
/** HTTP options to be used in each of the requests. */
|
||||||
export interface HttpOptions {
|
export interface HttpOptions {
|
||||||
|
@ -177,8 +193,45 @@ export class CodeAssistServer implements ContentGenerator {
|
||||||
});
|
});
|
||||||
|
|
||||||
return (async function* (): AsyncGenerator<T> {
|
return (async function* (): AsyncGenerator<T> {
|
||||||
|
// Convert ReadableStream to Node.js stream if needed
|
||||||
|
let nodeStream: NodeJS.ReadableStream;
|
||||||
|
|
||||||
|
if (res.data instanceof ReadableStream) {
|
||||||
|
// Convert Web ReadableStream to Node.js Readable stream
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
nodeStream = Readable.fromWeb(res.data as any);
|
||||||
|
} else if (
|
||||||
|
res.data &&
|
||||||
|
typeof (res.data as NodeJS.ReadableStream).on === 'function'
|
||||||
|
) {
|
||||||
|
// Already a Node.js stream
|
||||||
|
nodeStream = res.data as NodeJS.ReadableStream;
|
||||||
|
} else {
|
||||||
|
// If res.data is not a stream, it might be an error response
|
||||||
|
// Try to extract error information from the response
|
||||||
|
let errorMessage =
|
||||||
|
'Response data is not a readable stream. This may indicate a server error or quota issue.';
|
||||||
|
|
||||||
|
if (res.data && typeof res.data === 'object') {
|
||||||
|
// Check if this is an error response with error details
|
||||||
|
const errorData = res.data as ErrorData;
|
||||||
|
if (errorData.error?.message) {
|
||||||
|
errorMessage = errorData.error.message;
|
||||||
|
} else if (typeof errorData === 'string') {
|
||||||
|
errorMessage = errorData;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create an error that looks like a quota error if it contains quota information
|
||||||
|
const error: StreamError = new Error(errorMessage);
|
||||||
|
// Add status and response properties so it can be properly handled by retry logic
|
||||||
|
error.status = res.status;
|
||||||
|
error.response = res;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
const rl = readline.createInterface({
|
const rl = readline.createInterface({
|
||||||
input: res.data as PassThrough,
|
input: nodeStream,
|
||||||
crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks
|
crlfDelay: Infinity, // Recognizes '\r\n' and '\n' as line breaks
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -104,7 +104,7 @@ export type FlashFallbackHandler = (
|
||||||
currentModel: string,
|
currentModel: string,
|
||||||
fallbackModel: string,
|
fallbackModel: string,
|
||||||
error?: unknown,
|
error?: unknown,
|
||||||
) => Promise<boolean>;
|
) => Promise<boolean | string | null>;
|
||||||
|
|
||||||
export interface ConfigParameters {
|
export interface ConfigParameters {
|
||||||
sessionId: string;
|
sessionId: string;
|
||||||
|
@ -183,6 +183,7 @@ export class Config {
|
||||||
private readonly listExtensions: boolean;
|
private readonly listExtensions: boolean;
|
||||||
private readonly _activeExtensions: ActiveExtension[];
|
private readonly _activeExtensions: ActiveExtension[];
|
||||||
flashFallbackHandler?: FlashFallbackHandler;
|
flashFallbackHandler?: FlashFallbackHandler;
|
||||||
|
private quotaErrorOccurred: boolean = false;
|
||||||
|
|
||||||
constructor(params: ConfigParameters) {
|
constructor(params: ConfigParameters) {
|
||||||
this.sessionId = params.sessionId;
|
this.sessionId = params.sessionId;
|
||||||
|
@ -304,6 +305,14 @@ export class Config {
|
||||||
this.flashFallbackHandler = handler;
|
this.flashFallbackHandler = handler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setQuotaErrorOccurred(value: boolean): void {
|
||||||
|
this.quotaErrorOccurred = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
getQuotaErrorOccurred(): boolean {
|
||||||
|
return this.quotaErrorOccurred;
|
||||||
|
}
|
||||||
|
|
||||||
getEmbeddingModel(): string {
|
getEmbeddingModel(): string {
|
||||||
return this.embeddingModel;
|
return this.embeddingModel;
|
||||||
}
|
}
|
||||||
|
|
|
@ -178,6 +178,8 @@ describe('Gemini Client (client.ts)', () => {
|
||||||
getProxy: vi.fn().mockReturnValue(undefined),
|
getProxy: vi.fn().mockReturnValue(undefined),
|
||||||
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
|
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
|
||||||
getFileService: vi.fn().mockReturnValue(fileService),
|
getFileService: vi.fn().mockReturnValue(fileService),
|
||||||
|
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
|
||||||
|
setQuotaErrorOccurred: vi.fn(),
|
||||||
};
|
};
|
||||||
return mock as unknown as Config;
|
return mock as unknown as Config;
|
||||||
});
|
});
|
||||||
|
@ -351,7 +353,7 @@ describe('Gemini Client (client.ts)', () => {
|
||||||
await client.generateJson(contents, schema, abortSignal);
|
await client.generateJson(contents, schema, abortSignal);
|
||||||
|
|
||||||
expect(mockGenerateContentFn).toHaveBeenCalledWith({
|
expect(mockGenerateContentFn).toHaveBeenCalledWith({
|
||||||
model: DEFAULT_GEMINI_FLASH_MODEL,
|
model: 'test-model', // Should use current model from config
|
||||||
config: {
|
config: {
|
||||||
abortSignal,
|
abortSignal,
|
||||||
systemInstruction: getCoreSystemPrompt(''),
|
systemInstruction: getCoreSystemPrompt(''),
|
||||||
|
|
|
@ -262,6 +262,7 @@ export class GeminiClient {
|
||||||
request: PartListUnion,
|
request: PartListUnion,
|
||||||
signal: AbortSignal,
|
signal: AbortSignal,
|
||||||
turns: number = this.MAX_TURNS,
|
turns: number = this.MAX_TURNS,
|
||||||
|
originalModel?: string,
|
||||||
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
|
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
|
||||||
// Ensure turns never exceeds MAX_TURNS to prevent infinite loops
|
// Ensure turns never exceeds MAX_TURNS to prevent infinite loops
|
||||||
const boundedTurns = Math.min(turns, this.MAX_TURNS);
|
const boundedTurns = Math.min(turns, this.MAX_TURNS);
|
||||||
|
@ -269,6 +270,9 @@ export class GeminiClient {
|
||||||
return new Turn(this.getChat());
|
return new Turn(this.getChat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track the original model from the first call to detect model switching
|
||||||
|
const initialModel = originalModel || this.config.getModel();
|
||||||
|
|
||||||
const compressed = await this.tryCompressChat();
|
const compressed = await this.tryCompressChat();
|
||||||
if (compressed) {
|
if (compressed) {
|
||||||
yield { type: GeminiEventType.ChatCompressed, value: compressed };
|
yield { type: GeminiEventType.ChatCompressed, value: compressed };
|
||||||
|
@ -279,6 +283,14 @@ export class GeminiClient {
|
||||||
yield event;
|
yield event;
|
||||||
}
|
}
|
||||||
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
|
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
|
||||||
|
// Check if model was switched during the call (likely due to quota error)
|
||||||
|
const currentModel = this.config.getModel();
|
||||||
|
if (currentModel !== initialModel) {
|
||||||
|
// Model was switched (likely due to quota error fallback)
|
||||||
|
// Don't continue with recursive call to prevent unwanted Flash execution
|
||||||
|
return turn;
|
||||||
|
}
|
||||||
|
|
||||||
const nextSpeakerCheck = await checkNextSpeaker(
|
const nextSpeakerCheck = await checkNextSpeaker(
|
||||||
this.getChat(),
|
this.getChat(),
|
||||||
this,
|
this,
|
||||||
|
@ -288,7 +300,12 @@ export class GeminiClient {
|
||||||
const nextRequest = [{ text: 'Please continue.' }];
|
const nextRequest = [{ text: 'Please continue.' }];
|
||||||
// This recursive call's events will be yielded out, but the final
|
// This recursive call's events will be yielded out, but the final
|
||||||
// turn object will be from the top-level call.
|
// turn object will be from the top-level call.
|
||||||
yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1);
|
yield* this.sendMessageStream(
|
||||||
|
nextRequest,
|
||||||
|
signal,
|
||||||
|
boundedTurns - 1,
|
||||||
|
initialModel,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return turn;
|
return turn;
|
||||||
|
@ -298,9 +315,12 @@ export class GeminiClient {
|
||||||
contents: Content[],
|
contents: Content[],
|
||||||
schema: SchemaUnion,
|
schema: SchemaUnion,
|
||||||
abortSignal: AbortSignal,
|
abortSignal: AbortSignal,
|
||||||
model: string = DEFAULT_GEMINI_FLASH_MODEL,
|
model?: string,
|
||||||
config: GenerateContentConfig = {},
|
config: GenerateContentConfig = {},
|
||||||
): Promise<Record<string, unknown>> {
|
): Promise<Record<string, unknown>> {
|
||||||
|
// Use current model from config instead of hardcoded Flash model
|
||||||
|
const modelToUse =
|
||||||
|
model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||||
try {
|
try {
|
||||||
const userMemory = this.config.getUserMemory();
|
const userMemory = this.config.getUserMemory();
|
||||||
const systemInstruction = getCoreSystemPrompt(userMemory);
|
const systemInstruction = getCoreSystemPrompt(userMemory);
|
||||||
|
@ -312,7 +332,7 @@ export class GeminiClient {
|
||||||
|
|
||||||
const apiCall = () =>
|
const apiCall = () =>
|
||||||
this.getContentGenerator().generateContent({
|
this.getContentGenerator().generateContent({
|
||||||
model,
|
model: modelToUse,
|
||||||
config: {
|
config: {
|
||||||
...requestConfig,
|
...requestConfig,
|
||||||
systemInstruction,
|
systemInstruction,
|
||||||
|
@ -585,10 +605,14 @@ export class GeminiClient {
|
||||||
fallbackModel,
|
fallbackModel,
|
||||||
error,
|
error,
|
||||||
);
|
);
|
||||||
if (accepted) {
|
if (accepted !== false && accepted !== null) {
|
||||||
this.config.setModel(fallbackModel);
|
this.config.setModel(fallbackModel);
|
||||||
return fallbackModel;
|
return fallbackModel;
|
||||||
}
|
}
|
||||||
|
// Check if the model was switched manually in the handler
|
||||||
|
if (this.config.getModel() === fallbackModel) {
|
||||||
|
return null; // Model was switched but don't continue with current prompt
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('Flash fallback handler failed:', error);
|
console.warn('Flash fallback handler failed:', error);
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,8 @@ describe('GeminiChat', () => {
|
||||||
}),
|
}),
|
||||||
getModel: vi.fn().mockReturnValue('gemini-pro'),
|
getModel: vi.fn().mockReturnValue('gemini-pro'),
|
||||||
setModel: vi.fn(),
|
setModel: vi.fn(),
|
||||||
|
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
|
||||||
|
setQuotaErrorOccurred: vi.fn(),
|
||||||
flashFallbackHandler: undefined,
|
flashFallbackHandler: undefined,
|
||||||
} as unknown as Config;
|
} as unknown as Config;
|
||||||
|
|
||||||
|
|
|
@ -217,10 +217,14 @@ export class GeminiChat {
|
||||||
fallbackModel,
|
fallbackModel,
|
||||||
error,
|
error,
|
||||||
);
|
);
|
||||||
if (accepted) {
|
if (accepted !== false && accepted !== null) {
|
||||||
this.config.setModel(fallbackModel);
|
this.config.setModel(fallbackModel);
|
||||||
return fallbackModel;
|
return fallbackModel;
|
||||||
}
|
}
|
||||||
|
// Check if the model was switched manually in the handler
|
||||||
|
if (this.config.getModel() === fallbackModel) {
|
||||||
|
return null; // Model was switched but don't continue with current prompt
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('Flash fallback handler failed:', error);
|
console.warn('Flash fallback handler failed:', error);
|
||||||
}
|
}
|
||||||
|
@ -262,12 +266,25 @@ export class GeminiChat {
|
||||||
let response: GenerateContentResponse;
|
let response: GenerateContentResponse;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const apiCall = () =>
|
const apiCall = () => {
|
||||||
this.contentGenerator.generateContent({
|
const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||||
model: this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL,
|
|
||||||
|
// Prevent Flash model calls immediately after quota error
|
||||||
|
if (
|
||||||
|
this.config.getQuotaErrorOccurred() &&
|
||||||
|
modelToUse === DEFAULT_GEMINI_FLASH_MODEL
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
'Please submit a new query to continue with the Flash model.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.contentGenerator.generateContent({
|
||||||
|
model: modelToUse,
|
||||||
contents: requestContents,
|
contents: requestContents,
|
||||||
config: { ...this.generationConfig, ...params.config },
|
config: { ...this.generationConfig, ...params.config },
|
||||||
});
|
});
|
||||||
|
};
|
||||||
|
|
||||||
response = await retryWithBackoff(apiCall, {
|
response = await retryWithBackoff(apiCall, {
|
||||||
shouldRetry: (error: Error) => {
|
shouldRetry: (error: Error) => {
|
||||||
|
@ -354,12 +371,25 @@ export class GeminiChat {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const apiCall = () =>
|
const apiCall = () => {
|
||||||
this.contentGenerator.generateContentStream({
|
const modelToUse = this.config.getModel();
|
||||||
model: this.config.getModel(),
|
|
||||||
|
// Prevent Flash model calls immediately after quota error
|
||||||
|
if (
|
||||||
|
this.config.getQuotaErrorOccurred() &&
|
||||||
|
modelToUse === DEFAULT_GEMINI_FLASH_MODEL
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
'Please submit a new query to continue with the Flash model.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.contentGenerator.generateContentStream({
|
||||||
|
model: modelToUse,
|
||||||
contents: requestContents,
|
contents: requestContents,
|
||||||
config: { ...this.generationConfig, ...params.config },
|
config: { ...this.generationConfig, ...params.config },
|
||||||
});
|
});
|
||||||
|
};
|
||||||
|
|
||||||
// Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries
|
// Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries
|
||||||
// for transient issues internally before yielding the async generator, this retry will re-initiate
|
// for transient issues internally before yielding the async generator, this retry will re-initiate
|
||||||
|
|
|
@ -214,6 +214,8 @@ describe('editCorrector', () => {
|
||||||
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
|
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
|
||||||
configParams.alwaysSkipModificationConfirmation = skip;
|
configParams.alwaysSkipModificationConfirmation = skip;
|
||||||
}),
|
}),
|
||||||
|
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
|
||||||
|
setQuotaErrorOccurred: vi.fn(),
|
||||||
} as unknown as Config;
|
} as unknown as Config;
|
||||||
|
|
||||||
callCount = 0;
|
callCount = 0;
|
||||||
|
@ -654,6 +656,8 @@ describe('editCorrector', () => {
|
||||||
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
|
setAlwaysSkipModificationConfirmation: vi.fn((skip: boolean) => {
|
||||||
configParams.alwaysSkipModificationConfirmation = skip;
|
configParams.alwaysSkipModificationConfirmation = skip;
|
||||||
}),
|
}),
|
||||||
|
getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
|
||||||
|
setQuotaErrorOccurred: vi.fn(),
|
||||||
} as unknown as Config;
|
} as unknown as Config;
|
||||||
|
|
||||||
callCount = 0;
|
callCount = 0;
|
||||||
|
|
|
@ -41,14 +41,23 @@ export function isProQuotaExceededError(error: unknown): boolean {
|
||||||
// Check for Pro quota exceeded errors by looking for the specific pattern
|
// Check for Pro quota exceeded errors by looking for the specific pattern
|
||||||
// This will match patterns like:
|
// This will match patterns like:
|
||||||
// - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
|
// - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
|
||||||
// - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
|
// - "Quota exceeded for quota metric 'Gemini 2.5-preview Pro Requests'"
|
||||||
// - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
|
|
||||||
// - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
|
|
||||||
// We use string methods instead of regex to avoid ReDoS vulnerabilities
|
// We use string methods instead of regex to avoid ReDoS vulnerabilities
|
||||||
|
|
||||||
const checkMessage = (message: string): boolean =>
|
const checkMessage = (message: string): boolean => {
|
||||||
|
console.log('[DEBUG] isProQuotaExceededError checking message:', message);
|
||||||
|
const result =
|
||||||
message.includes("Quota exceeded for quota metric 'Gemini") &&
|
message.includes("Quota exceeded for quota metric 'Gemini") &&
|
||||||
message.includes("Pro Requests'");
|
message.includes("Pro Requests'");
|
||||||
|
console.log('[DEBUG] isProQuotaExceededError result:', result);
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Log the full error object to understand its structure
|
||||||
|
console.log(
|
||||||
|
'[DEBUG] isProQuotaExceededError - full error object:',
|
||||||
|
JSON.stringify(error, null, 2),
|
||||||
|
);
|
||||||
|
|
||||||
if (typeof error === 'string') {
|
if (typeof error === 'string') {
|
||||||
return checkMessage(error);
|
return checkMessage(error);
|
||||||
|
@ -62,6 +71,38 @@ export function isProQuotaExceededError(error: unknown): boolean {
|
||||||
return checkMessage(error.error.message);
|
return checkMessage(error.error.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if it's a Gaxios error with response data
|
||||||
|
if (error && typeof error === 'object' && 'response' in error) {
|
||||||
|
const gaxiosError = error as {
|
||||||
|
response?: {
|
||||||
|
data?: unknown;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
if (gaxiosError.response && gaxiosError.response.data) {
|
||||||
|
console.log(
|
||||||
|
'[DEBUG] isProQuotaExceededError - checking response data:',
|
||||||
|
gaxiosError.response.data,
|
||||||
|
);
|
||||||
|
if (typeof gaxiosError.response.data === 'string') {
|
||||||
|
return checkMessage(gaxiosError.response.data);
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
typeof gaxiosError.response.data === 'object' &&
|
||||||
|
gaxiosError.response.data !== null &&
|
||||||
|
'error' in gaxiosError.response.data
|
||||||
|
) {
|
||||||
|
const errorData = gaxiosError.response.data as {
|
||||||
|
error?: { message?: string };
|
||||||
|
};
|
||||||
|
return checkMessage(errorData.error?.message || '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
'[DEBUG] isProQuotaExceededError - no matching error format for:',
|
||||||
|
error,
|
||||||
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ export interface RetryOptions {
|
||||||
onPersistent429?: (
|
onPersistent429?: (
|
||||||
authType?: string,
|
authType?: string,
|
||||||
error?: unknown,
|
error?: unknown,
|
||||||
) => Promise<string | null>;
|
) => Promise<string | boolean | null>;
|
||||||
authType?: string;
|
authType?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,13 +102,16 @@ export async function retryWithBackoff<T>(
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
const fallbackModel = await onPersistent429(authType, error);
|
const fallbackModel = await onPersistent429(authType, error);
|
||||||
if (fallbackModel) {
|
if (fallbackModel !== false && fallbackModel !== null) {
|
||||||
// Reset attempt counter and try with new model
|
// Reset attempt counter and try with new model
|
||||||
attempt = 0;
|
attempt = 0;
|
||||||
consecutive429Count = 0;
|
consecutive429Count = 0;
|
||||||
currentDelay = initialDelayMs;
|
currentDelay = initialDelayMs;
|
||||||
// With the model updated, we continue to the next attempt
|
// With the model updated, we continue to the next attempt
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
// Fallback handler returned null/false, meaning don't continue - stop retry process
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
} catch (fallbackError) {
|
} catch (fallbackError) {
|
||||||
// If fallback fails, continue with original error
|
// If fallback fails, continue with original error
|
||||||
|
@ -126,13 +129,16 @@ export async function retryWithBackoff<T>(
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
const fallbackModel = await onPersistent429(authType, error);
|
const fallbackModel = await onPersistent429(authType, error);
|
||||||
if (fallbackModel) {
|
if (fallbackModel !== false && fallbackModel !== null) {
|
||||||
// Reset attempt counter and try with new model
|
// Reset attempt counter and try with new model
|
||||||
attempt = 0;
|
attempt = 0;
|
||||||
consecutive429Count = 0;
|
consecutive429Count = 0;
|
||||||
currentDelay = initialDelayMs;
|
currentDelay = initialDelayMs;
|
||||||
// With the model updated, we continue to the next attempt
|
// With the model updated, we continue to the next attempt
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
// Fallback handler returned null/false, meaning don't continue - stop retry process
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
} catch (fallbackError) {
|
} catch (fallbackError) {
|
||||||
// If fallback fails, continue with original error
|
// If fallback fails, continue with original error
|
||||||
|
@ -155,13 +161,16 @@ export async function retryWithBackoff<T>(
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
const fallbackModel = await onPersistent429(authType, error);
|
const fallbackModel = await onPersistent429(authType, error);
|
||||||
if (fallbackModel) {
|
if (fallbackModel !== false && fallbackModel !== null) {
|
||||||
// Reset attempt counter and try with new model
|
// Reset attempt counter and try with new model
|
||||||
attempt = 0;
|
attempt = 0;
|
||||||
consecutive429Count = 0;
|
consecutive429Count = 0;
|
||||||
currentDelay = initialDelayMs;
|
currentDelay = initialDelayMs;
|
||||||
// With the model updated, we continue to the next attempt
|
// With the model updated, we continue to the next attempt
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
// Fallback handler returned null/false, meaning don't continue - stop retry process
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
} catch (fallbackError) {
|
} catch (fallbackError) {
|
||||||
// If fallback fails, continue with original error
|
// If fallback fails, continue with original error
|
||||||
|
|
Loading…
Reference in New Issue