Skip to content

Commit

Permalink
feat: Make list of retriable error codes configurable #358 (#359)
Browse files Browse the repository at this point in the history
Co-authored-by: Aliaksandr Stsiapanay <[email protected]>
  • Loading branch information
astsiapanay and astsiapanay authored Jun 10, 2024
1 parent b72dead commit ee435a7
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ Dynamic settings can include the following parameters:
| roles | API key or user roles. Each role may have limits to be associated with applications, models, assistants or addons. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more. |
| roles.<role_name> | `limits`: Limits for models, applications, or assistants. |
| roles.<role_name>.limits | `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting. If it's not set the default value is unlimited<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting. If it's not set the default value is unlimited. |
| retriableErrorCodes | List of retriable error codes for handling outages at LLM providers. |

## License

Expand Down
3 changes: 2 additions & 1 deletion sample/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -211,5 +211,6 @@
}
}
}
}
},
"retriableErrorCodes": [401, 403]
}
2 changes: 2 additions & 0 deletions src/main/java/com/epam/aidial/core/config/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

@Data
@JsonIgnoreProperties(ignoreUnknown = true)
Expand All @@ -20,6 +21,7 @@ public class Config {
private Assistants assistant = new Assistants();
private Map<String, Key> keys = new HashMap<>();
private Map<String, Role> roles = new HashMap<>();
private Set<Integer> retriableErrorCodes = Set.of();


public Deployment selectDeployment(String deploymentId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
@Slf4j
public class DeploymentPostController {

private static final Set<Integer> RETRIABLE_HTTP_CODES = Set.of(HttpStatus.TOO_MANY_REQUESTS.getCode(),
private static final Set<Integer> DEFAULT_RETRIABLE_HTTP_CODES = Set.of(HttpStatus.TOO_MANY_REQUESTS.getCode(),
HttpStatus.BAD_GATEWAY.getCode(), HttpStatus.GATEWAY_TIMEOUT.getCode(),
HttpStatus.SERVICE_UNAVAILABLE.getCode());

Expand Down Expand Up @@ -269,7 +269,7 @@ private void handleProxyResponse(HttpClientResponse proxyResponse) {
context.getDeployment().getEndpoint(), context.getUpstreamRoute().get().getEndpoint(),
proxyResponse.statusCode(), proxyResponse.headers().size());

if (context.getUpstreamRoute().hasNext() && RETRIABLE_HTTP_CODES.contains(proxyResponse.statusCode())) {
if (context.getUpstreamRoute().hasNext() && isRetriableError(proxyResponse.statusCode())) {
sendRequest(); // try next
return;
}
Expand All @@ -296,6 +296,11 @@ private void handleProxyResponse(HttpClientResponse proxyResponse) {
.onFailure(this::handleResponseError);
}

private boolean isRetriableError(int statusCode) {
return context.getUpstreamRoute().hasNext()
&& (DEFAULT_RETRIABLE_HTTP_CODES.contains(statusCode) || context.getConfig().getRetriableErrorCodes().contains(statusCode));
}

/**
* Called when proxy sent response from the origin to the client.
*/
Expand Down

0 comments on commit ee435a7

Please sign in to comment.