feat: Add rate limiting and automatic provider switching

- Add rate limit detection and parsing from HTTP 429 responses
- Implement automatic retry with exponential backoff for short-term rate limits
- Implement automatic provider switching for long-term rate limits
- Add circuit breaker pattern for failing providers
- Integrate with existing admin panel rate limit configuration
- Add allProviders parameter to LLM.stream calls to enable provider fallback

Rate limit behavior:
- Short-term (< 5 min): Retry with configured backoff strategy
- Long-term (≥ 5 min): Switch to next available provider
- Max retries: 3 (configurable via admin panel)
- Max wait time: 5 minutes (configurable via admin panel)
- Provider switching: Enabled by default (configurable via admin panel)

Provider priority:
1. Anthropic
2. OpenAI
3. Google
4. OpenRouter
5. Groq
6. xAI
7. Together AI
8. Perplexity
9. DeepInfra
10. Cerebras
11. Mistral
12. Cohere
13. Amazon Bedrock
14. Azure
15. GitHub Copilot
16. GitHub Copilot Enterprise
17. OpenCode
18. ZenMux
19. Google Vertex
20. GitLab
This commit is contained in:
southseact-3d
2026-02-08 14:37:02 +00:00
parent 2dc94310a6
commit ab7799ca58
11 changed files with 1428 additions and 113 deletions

View File

@@ -9344,9 +9344,158 @@ function buildCliFallbackModels(cli, preferredModel) {
return chain;
}
function classifyProviderError(error, provider) {
const statusCode = error.statusCode || error.code;
const errorMessage = (error.message || '').toLowerCase();
const providerPatterns = {
openai: {
transient: [500, 502, 503, 504, 529],
rateLimit: 429,
auth: [401, 402],
permission: 403,
userError: [400],
notFound: 404,
timeout: 408
},
anthropic: {
transient: [500, 529],
rateLimit: 429,
auth: 401,
permission: 403,
userError: [400, 413],
notFound: 404
},
openrouter: {
transient: [502, 503],
rateLimit: 429,
auth: [401, 402],
permission: 403,
userError: [400],
timeout: 408,
notFound: 404
},
chutes: {
transient: [500, 502, 503],
rateLimit: 429,
auth: 401,
permission: 403,
userError: [400, 413],
notFound: 404
},
nvidia: {
transient: [500, 502, 503],
rateLimit: 429,
auth: 401,
permission: 403,
userError: [400],
notFound: 404
},
together: {
transient: [500, 502, 503],
rateLimit: 429,
auth: [401, 402],
permission: 403,
userError: [400],
notFound: 404
},
fireworks: {
transient: [500, 502, 503],
rateLimit: 429,
auth: 401,
userError: [400],
notFound: 404
},
mistral: {
transient: [500, 502, 503],
rateLimit: 429,
auth: 401,
permission: 403,
userError: [400],
notFound: 404
},
groq: {
transient: [500, 502, 503],
rateLimit: 429,
auth: [401, 402],
permission: 403,
userError: [400, 413],
notFound: 404
},
google: {
transient: [500, 502, 503],
rateLimit: 429,
auth: 401,
permission: 403,
userError: [400, 413],
notFound: 404
},
default: {
transient: [500, 502, 503, 529],
rateLimit: 429,
auth: [401, 402],
permission: 403,
userError: [400, 413],
notFound: 404
}
};
const patterns = providerPatterns[provider] || providerPatterns.default;
if (error.isToolError) {
return { category: 'toolError', action: 'return', waitTime: 0 };
}
if (patterns.transient?.includes(statusCode)) {
return { category: 'transient', action: 'wait', waitTime: 30000 };
}
if (statusCode === patterns.rateLimit) {
return { category: 'rateLimit', action: 'wait', waitTime: 30000 };
}
if (patterns.auth?.includes(statusCode)) {
return { category: 'auth', action: 'switch', waitTime: 0 };
}
if (statusCode === patterns.permission) {
return { category: 'permission', action: 'return', waitTime: 0 };
}
if (patterns.userError?.includes(statusCode)) {
return { category: 'userError', action: 'return', waitTime: 0 };
}
if (statusCode === patterns.timeout) {
return { category: 'timeout', action: 'wait', waitTime: 30000 };
}
if (statusCode === patterns.notFound) {
return { category: 'notFound', action: 'wait', waitTime: 30000 };
}
if (statusCode >= 500) {
return { category: 'serverError', action: 'wait', waitTime: 30000 };
}
if (errorMessage.includes('model not found') || errorMessage.includes('unknown model')) {
return { category: 'modelNotFound', action: 'wait', waitTime: 30000 };
}
if (errorMessage.includes('insufficient credit') || errorMessage.includes('insufficient quota') || errorMessage.includes('payment required')) {
return { category: 'billing', action: 'switch', waitTime: 0 };
}
if (errorMessage.includes('context length exceeded') || errorMessage.includes('token limit exceeded') || errorMessage.includes('request too large')) {
return { category: 'userError', action: 'return', waitTime: 0 };
}
return { category: 'unknown', action: 'switch', waitTime: 0 };
}
function shouldFallbackCliError(err, message) {
if (!err) return false;
if (err.isToolError) {
log('Tool error detected - no fallback needed', {
error: err.message,
toolError: true
});
return false;
}
// First, check if this was actually a successful completion despite an error being thrown
// This can happen if the model completed but the process had a non-zero exit code
if (message && message.partialOutput && message.partialOutput.length > 200) {
@@ -9507,6 +9656,11 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
const attempts = [];
let lastError = null;
let switchedToBackup = false;
const continueAttempts = new Map();
const MAX_CONTINUE_ATTEMPTS = 3;
const CONTINUE_MESSAGE = '[CONTINUE] Please continue from where you left off.';
const lastErrorTypes = new Map();
log('Fallback sequence initiated', {
sessionId: session?.id,
@@ -9523,14 +9677,32 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
tried.add(key);
const limit = isProviderLimited(option.provider, option.model);
if (limit.limited) {
attempts.push({ model: option.model, provider: option.provider, error: `limit: ${limit.reason}` });
attempts.push({
model: option.model,
provider: option.provider,
error: `limit: ${limit.reason}`,
classification: 'rateLimit'
});
return null;
}
try {
resetMessageStreamingFields(message);
// When switching to backup model, preserve session and keep original content
let messageContent = content;
const modelKey = `${option.provider}:${option.model}`;
const continueCount = continueAttempts.get(modelKey) || 0;
if (continueCount > 0 && continueCount <= MAX_CONTINUE_ATTEMPTS) {
messageContent = `${CONTINUE_MESSAGE}\n\n${content}`;
log('Sending continue message', {
model: option.model,
provider: option.provider,
attempt: continueCount,
modelKey
});
}
// When switching to backup model, preserve session and keep original content
if (isBackup && !switchedToBackup && attempts.length > 0) {
switchedToBackup = true;
log('Switching to backup model with session continuity', {
@@ -9634,6 +9806,10 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
recordProviderUsage(option.provider, option.model, tokensUsed, 1);
// Reset counters on success
continueAttempts.delete(modelKey);
lastErrorTypes.delete(modelKey);
if (attempts.length) {
log('opencode succeeded after fallback', { attempts, model: option.model, provider: option.provider, cli: cliName, backup: isBackup });
}
@@ -9650,18 +9826,16 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
};
} catch (err) {
lastError = err;
attempts.push({
const errorData = {
model: option.model,
provider: option.provider,
error: err.message || String(err),
code: err.code || null,
earlyTermination: err.earlyTermination || false,
timestamp: new Date().toISOString()
});
};
if (err.earlyTermination) {
// Only allow fallback if there's no substantial partial output
// If there's substantial output, the model was working fine and shouldn't fallback
const partialOutputLength = (message?.partialOutput || '').length;
const hasSubstantialOutput = partialOutputLength > 500;
@@ -9675,16 +9849,84 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
return err;
}
log('Allowing automatic fallback due to early termination', {
const modelKey = `${option.provider}:${option.model}`;
const currentCount = continueAttempts.get(modelKey) || 0;
continueAttempts.set(modelKey, currentCount + 1);
log('Early termination detected', {
model: option.model,
provider: option.provider,
error: err.message,
partialOutputLength
continueAttempt: currentCount + 1,
maxAttempts: MAX_CONTINUE_ATTEMPTS
});
if (currentCount + 1 < MAX_CONTINUE_ATTEMPTS) {
errorData.earlyTermination = true;
errorData.continueAttempt = currentCount + 1;
errorData.willContinue = true;
attempts.push(errorData);
tried.delete(key);
return null;
}
log('Max continue attempts reached, switching model', {
model: option.model,
provider: option.provider,
totalAttempts: MAX_CONTINUE_ATTEMPTS
});
attempts.push(errorData);
return null;
}
if (!shouldFallbackCliError(err, message)) return err;
const classification = classifyProviderError(err, option.provider);
errorData.classification = classification.category;
const modelKey = `${option.provider}:${option.model}`;
const lastErrorType = lastErrorTypes.get(modelKey);
if (lastErrorType === classification.category &&
classification.category !== 'unknown') {
log('Repeated error type detected', {
model: option.model,
provider: option.provider,
errorType: classification.category
});
lastErrorTypes.set(modelKey, classification.category);
}
if (classification.action === 'return') {
log('User/permission error - returning to user', {
category: classification.category,
model: option.model,
provider: option.provider
});
err.willNotFallback = true;
attempts.push(errorData);
return err;
}
if (classification.action === 'wait') {
log(`Provider error (${classification.category}) - waiting ${classification.waitTime}ms`, {
model: option.model,
provider: option.provider,
category: classification.category,
waitTime: classification.waitTime
});
errorData.willWait = true;
errorData.waitTime = classification.waitTime;
attempts.push(errorData);
await new Promise(resolve => setTimeout(resolve, classification.waitTime));
return null;
}
errorData.immediateSwitch = true;
attempts.push(errorData);
return null;
}
};