feat: Add rate limiting and automatic provider switching
- Add rate limit detection and parsing from HTTP 429 responses - Implement automatic retry with exponential backoff for short-term rate limits - Implement automatic provider switching for long-term rate limits - Add circuit breaker pattern for failing providers - Integrate with existing admin panel rate limit configuration - Add allProviders parameter to LLM.stream calls to enable provider fallback Rate limit behavior: - Short-term (< 5 min): Retry with configured backoff strategy - Long-term (≥ 5 min): Switch to next available provider - Max retries: 3 (configurable via admin panel) - Max wait time: 5 minutes (configurable via admin panel) - Provider switching: Enabled by default (configurable via admin panel) Provider priority: 1. Anthropic 2. OpenAI 3. Google 4. OpenRouter 5. Groq 6. xAI 7. Together AI 8. Perplexity 9. DeepInfra 10. Cerebras 11. Mistral 12. Cohere 13. Amazon Bedrock 14. Azure 15. GitHub Copilot 16. GitHub Copilot Enterprise 17. OpenCode 18. ZenMux 19. Google Vertex 20. GitLab
This commit is contained in:
268
chat/server.js
268
chat/server.js
@@ -9344,9 +9344,158 @@ function buildCliFallbackModels(cli, preferredModel) {
|
||||
return chain;
|
||||
}
|
||||
|
||||
function classifyProviderError(error, provider) {
|
||||
const statusCode = error.statusCode || error.code;
|
||||
const errorMessage = (error.message || '').toLowerCase();
|
||||
|
||||
const providerPatterns = {
|
||||
openai: {
|
||||
transient: [500, 502, 503, 504, 529],
|
||||
rateLimit: 429,
|
||||
auth: [401, 402],
|
||||
permission: 403,
|
||||
userError: [400],
|
||||
notFound: 404,
|
||||
timeout: 408
|
||||
},
|
||||
anthropic: {
|
||||
transient: [500, 529],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
permission: 403,
|
||||
userError: [400, 413],
|
||||
notFound: 404
|
||||
},
|
||||
openrouter: {
|
||||
transient: [502, 503],
|
||||
rateLimit: 429,
|
||||
auth: [401, 402],
|
||||
permission: 403,
|
||||
userError: [400],
|
||||
timeout: 408,
|
||||
notFound: 404
|
||||
},
|
||||
chutes: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
permission: 403,
|
||||
userError: [400, 413],
|
||||
notFound: 404
|
||||
},
|
||||
nvidia: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
permission: 403,
|
||||
userError: [400],
|
||||
notFound: 404
|
||||
},
|
||||
together: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: [401, 402],
|
||||
permission: 403,
|
||||
userError: [400],
|
||||
notFound: 404
|
||||
},
|
||||
fireworks: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
userError: [400],
|
||||
notFound: 404
|
||||
},
|
||||
mistral: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
permission: 403,
|
||||
userError: [400],
|
||||
notFound: 404
|
||||
},
|
||||
groq: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: [401, 402],
|
||||
permission: 403,
|
||||
userError: [400, 413],
|
||||
notFound: 404
|
||||
},
|
||||
google: {
|
||||
transient: [500, 502, 503],
|
||||
rateLimit: 429,
|
||||
auth: 401,
|
||||
permission: 403,
|
||||
userError: [400, 413],
|
||||
notFound: 404
|
||||
},
|
||||
default: {
|
||||
transient: [500, 502, 503, 529],
|
||||
rateLimit: 429,
|
||||
auth: [401, 402],
|
||||
permission: 403,
|
||||
userError: [400, 413],
|
||||
notFound: 404
|
||||
}
|
||||
};
|
||||
|
||||
const patterns = providerPatterns[provider] || providerPatterns.default;
|
||||
|
||||
if (error.isToolError) {
|
||||
return { category: 'toolError', action: 'return', waitTime: 0 };
|
||||
}
|
||||
|
||||
if (patterns.transient?.includes(statusCode)) {
|
||||
return { category: 'transient', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
if (statusCode === patterns.rateLimit) {
|
||||
return { category: 'rateLimit', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
if (patterns.auth?.includes(statusCode)) {
|
||||
return { category: 'auth', action: 'switch', waitTime: 0 };
|
||||
}
|
||||
if (statusCode === patterns.permission) {
|
||||
return { category: 'permission', action: 'return', waitTime: 0 };
|
||||
}
|
||||
if (patterns.userError?.includes(statusCode)) {
|
||||
return { category: 'userError', action: 'return', waitTime: 0 };
|
||||
}
|
||||
if (statusCode === patterns.timeout) {
|
||||
return { category: 'timeout', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
if (statusCode === patterns.notFound) {
|
||||
return { category: 'notFound', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
|
||||
if (statusCode >= 500) {
|
||||
return { category: 'serverError', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
|
||||
if (errorMessage.includes('model not found') || errorMessage.includes('unknown model')) {
|
||||
return { category: 'modelNotFound', action: 'wait', waitTime: 30000 };
|
||||
}
|
||||
if (errorMessage.includes('insufficient credit') || errorMessage.includes('insufficient quota') || errorMessage.includes('payment required')) {
|
||||
return { category: 'billing', action: 'switch', waitTime: 0 };
|
||||
}
|
||||
if (errorMessage.includes('context length exceeded') || errorMessage.includes('token limit exceeded') || errorMessage.includes('request too large')) {
|
||||
return { category: 'userError', action: 'return', waitTime: 0 };
|
||||
}
|
||||
|
||||
return { category: 'unknown', action: 'switch', waitTime: 0 };
|
||||
}
|
||||
|
||||
function shouldFallbackCliError(err, message) {
|
||||
if (!err) return false;
|
||||
|
||||
if (err.isToolError) {
|
||||
log('Tool error detected - no fallback needed', {
|
||||
error: err.message,
|
||||
toolError: true
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
// First, check if this was actually a successful completion despite an error being thrown
|
||||
// This can happen if the model completed but the process had a non-zero exit code
|
||||
if (message && message.partialOutput && message.partialOutput.length > 200) {
|
||||
@@ -9507,6 +9656,11 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
|
||||
const attempts = [];
|
||||
let lastError = null;
|
||||
let switchedToBackup = false;
|
||||
|
||||
const continueAttempts = new Map();
|
||||
const MAX_CONTINUE_ATTEMPTS = 3;
|
||||
const CONTINUE_MESSAGE = '[CONTINUE] Please continue from where you left off.';
|
||||
const lastErrorTypes = new Map();
|
||||
|
||||
log('Fallback sequence initiated', {
|
||||
sessionId: session?.id,
|
||||
@@ -9523,14 +9677,32 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
|
||||
tried.add(key);
|
||||
const limit = isProviderLimited(option.provider, option.model);
|
||||
if (limit.limited) {
|
||||
attempts.push({ model: option.model, provider: option.provider, error: `limit: ${limit.reason}` });
|
||||
attempts.push({
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
error: `limit: ${limit.reason}`,
|
||||
classification: 'rateLimit'
|
||||
});
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
resetMessageStreamingFields(message);
|
||||
|
||||
// When switching to backup model, preserve session and keep original content
|
||||
let messageContent = content;
|
||||
const modelKey = `${option.provider}:${option.model}`;
|
||||
const continueCount = continueAttempts.get(modelKey) || 0;
|
||||
|
||||
if (continueCount > 0 && continueCount <= MAX_CONTINUE_ATTEMPTS) {
|
||||
messageContent = `${CONTINUE_MESSAGE}\n\n${content}`;
|
||||
log('Sending continue message', {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
attempt: continueCount,
|
||||
modelKey
|
||||
});
|
||||
}
|
||||
|
||||
// When switching to backup model, preserve session and keep original content
|
||||
if (isBackup && !switchedToBackup && attempts.length > 0) {
|
||||
switchedToBackup = true;
|
||||
log('Switching to backup model with session continuity', {
|
||||
@@ -9634,6 +9806,10 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
|
||||
|
||||
recordProviderUsage(option.provider, option.model, tokensUsed, 1);
|
||||
|
||||
// Reset counters on success
|
||||
continueAttempts.delete(modelKey);
|
||||
lastErrorTypes.delete(modelKey);
|
||||
|
||||
if (attempts.length) {
|
||||
log('opencode succeeded after fallback', { attempts, model: option.model, provider: option.provider, cli: cliName, backup: isBackup });
|
||||
}
|
||||
@@ -9650,18 +9826,16 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
attempts.push({
|
||||
|
||||
const errorData = {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
error: err.message || String(err),
|
||||
code: err.code || null,
|
||||
earlyTermination: err.earlyTermination || false,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
};
|
||||
|
||||
if (err.earlyTermination) {
|
||||
// Only allow fallback if there's no substantial partial output
|
||||
// If there's substantial output, the model was working fine and shouldn't fallback
|
||||
const partialOutputLength = (message?.partialOutput || '').length;
|
||||
const hasSubstantialOutput = partialOutputLength > 500;
|
||||
|
||||
@@ -9675,16 +9849,84 @@ async function sendToOpencodeWithFallback({ session, model, content, message, cl
|
||||
return err;
|
||||
}
|
||||
|
||||
log('Allowing automatic fallback due to early termination', {
|
||||
const modelKey = `${option.provider}:${option.model}`;
|
||||
const currentCount = continueAttempts.get(modelKey) || 0;
|
||||
continueAttempts.set(modelKey, currentCount + 1);
|
||||
|
||||
log('Early termination detected', {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
error: err.message,
|
||||
partialOutputLength
|
||||
continueAttempt: currentCount + 1,
|
||||
maxAttempts: MAX_CONTINUE_ATTEMPTS
|
||||
});
|
||||
|
||||
if (currentCount + 1 < MAX_CONTINUE_ATTEMPTS) {
|
||||
errorData.earlyTermination = true;
|
||||
errorData.continueAttempt = currentCount + 1;
|
||||
errorData.willContinue = true;
|
||||
attempts.push(errorData);
|
||||
|
||||
tried.delete(key);
|
||||
return null;
|
||||
}
|
||||
|
||||
log('Max continue attempts reached, switching model', {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
totalAttempts: MAX_CONTINUE_ATTEMPTS
|
||||
});
|
||||
|
||||
attempts.push(errorData);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!shouldFallbackCliError(err, message)) return err;
|
||||
|
||||
const classification = classifyProviderError(err, option.provider);
|
||||
errorData.classification = classification.category;
|
||||
|
||||
const modelKey = `${option.provider}:${option.model}`;
|
||||
const lastErrorType = lastErrorTypes.get(modelKey);
|
||||
|
||||
if (lastErrorType === classification.category &&
|
||||
classification.category !== 'unknown') {
|
||||
log('Repeated error type detected', {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
errorType: classification.category
|
||||
});
|
||||
lastErrorTypes.set(modelKey, classification.category);
|
||||
}
|
||||
|
||||
if (classification.action === 'return') {
|
||||
log('User/permission error - returning to user', {
|
||||
category: classification.category,
|
||||
model: option.model,
|
||||
provider: option.provider
|
||||
});
|
||||
err.willNotFallback = true;
|
||||
attempts.push(errorData);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (classification.action === 'wait') {
|
||||
log(`Provider error (${classification.category}) - waiting ${classification.waitTime}ms`, {
|
||||
model: option.model,
|
||||
provider: option.provider,
|
||||
category: classification.category,
|
||||
waitTime: classification.waitTime
|
||||
});
|
||||
|
||||
errorData.willWait = true;
|
||||
errorData.waitTime = classification.waitTime;
|
||||
attempts.push(errorData);
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, classification.waitTime));
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
errorData.immediateSwitch = true;
|
||||
attempts.push(errorData);
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user