25 lines
1.5 KiB
YAML
25 lines
1.5 KiB
YAML
router_settings:
|
|
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" - RECOMMENDED for best performance
|
|
# redis_host: <your-redis-host> # string
|
|
# redis_password: <your-redis-password> # string
|
|
# redis_port: <your-redis-port> # string
|
|
enable_pre_call_checks: true # bool - Before call is made check if a call is within model context window
|
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
|
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
|
|
disable_cooldowns: True # bool - Disable cooldowns for all models
|
|
enable_tag_filtering: True # bool - Use tag based routing for requests
|
|
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
|
|
"AuthenticationErrorRetries": 3,
|
|
"TimeoutErrorRetries": 3,
|
|
"RateLimitErrorRetries": 3,
|
|
"ContentPolicyViolationErrorRetries": 4,
|
|
"InternalServerErrorRetries": 4
|
|
}
|
|
allowed_fails_policy: {
|
|
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
|
|
"AuthenticationErrorAllowedFails": 10, # int
|
|
"TimeoutErrorAllowedFails": 12, # int
|
|
"RateLimitErrorAllowedFails": 10000, # int
|
|
"ContentPolicyViolationErrorAllowedFails": 15, # int
|
|
"InternalServerErrorAllowedFails": 20, # int
|
|
} |