From 8594dc7c82ccf4c15423fab92b9da3cd74b3413a Mon Sep 17 00:00:00 2001 From: Artsiom Korzun <72259616+artsiomkorzun@users.noreply.github.com> Date: Thu, 26 Dec 2024 17:00:30 +0100 Subject: [PATCH] feat: add cache max size setting to ResourceService (#628) --- README.md | 3 +- .../com/epam/aidial/core/server/Proxy.java | 5 +-- .../src/main/resources/aidial.settings.json | 3 +- .../epam/aidial/core/server/ProxyTest.java | 7 ++++ .../aidial/core/server/ResourceApiTest.java | 42 ++++++++++++++++++- .../core/server/limiter/RateLimiterTest.java | 4 +- .../core/server/security/ApiKeyStoreTest.java | 2 +- .../server/token/TokenStatsTrackerTest.java | 2 +- .../src/test/resources/aidial.settings.json | 3 +- server/src/test/resources/response.txt | 7 +--- .../core/storage/service/ResourceService.java | 8 +++- 11 files changed, 65 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index e9d8efcd6..1ff095569 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,8 @@ Priority order: | storage.maxUploadedFileSize | 536870912 | No |Maximum size in bytes of uploaded file. If a size of uploaded file exceeds the limit the server returns HTTP code 413 | encryption.secret | - | No |Secret is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. | encryption.key | - | No |Key is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string. -| resources.maxSize | 1048576 | No |Max allowed size in bytes for a resource. +| resources.maxSize | 67108864 | No |Max allowed size in bytes for a resource. +| resources.maxSizeToCache | 1048576 | No |Max size in bytes for a resource to cache in Redis. | resources.syncPeriod | 60000 | No |Period in milliseconds, how frequently check for resources to sync. | resources.syncDelay | 120000 | No |Delay in milliseconds for a resource to be written back in object storage after last modification. | resources.syncBatch | 4096 | No |How many resources to sync in one go. diff --git a/server/src/main/java/com/epam/aidial/core/server/Proxy.java b/server/src/main/java/com/epam/aidial/core/server/Proxy.java index 4390cc794..7449864fd 100644 --- a/server/src/main/java/com/epam/aidial/core/server/Proxy.java +++ b/server/src/main/java/com/epam/aidial/core/server/Proxy.java @@ -65,9 +65,6 @@ public class Proxy implements Handler { public static final String HEADER_UPSTREAM_EXTRA_DATA = "X-UPSTREAM-EXTRA-DATA"; public static final String HEADER_UPSTREAM_ATTEMPTS = "X-UPSTREAM-ATTEMPTS"; public static final String HEADER_CONTENT_TYPE_APPLICATION_JSON = "application/json"; - - public static final int REQUEST_BODY_MAX_SIZE_BYTES = 16 * 1024 * 1024; - private static final Set ALLOWED_HTTP_METHODS = Set.of(HttpMethod.GET, HttpMethod.POST, HttpMethod.PUT, HttpMethod.DELETE, HttpMethod.HEAD); private final Vertx vertx; @@ -152,7 +149,7 @@ private void handleRequest(HttpServerRequest request) { } } else { // not only the case, Content-Length can be missing when Transfer-Encoding: chunked - if (contentLength > REQUEST_BODY_MAX_SIZE_BYTES) { + if (contentLength > resourceService.getMaxSize()) { respond(request, HttpStatus.REQUEST_ENTITY_TOO_LARGE, "Request body is too large"); return; } diff --git a/server/src/main/resources/aidial.settings.json b/server/src/main/resources/aidial.settings.json index 8967b9535..8aa0433b2 100644 --- a/server/src/main/resources/aidial.settings.json +++ b/server/src/main/resources/aidial.settings.json @@ -52,7 +52,8 @@ } }, "resources": { - "maxSize" : 1048576, + "maxSize" : 67108864, + "maxSizeToCache": 1048576, "syncPeriod": 60000, "syncDelay": 120000, "syncBatch": 4096, diff --git a/server/src/test/java/com/epam/aidial/core/server/ProxyTest.java b/server/src/test/java/com/epam/aidial/core/server/ProxyTest.java index c88600937..73c48190c 100644 --- a/server/src/test/java/com/epam/aidial/core/server/ProxyTest.java +++ b/server/src/test/java/com/epam/aidial/core/server/ProxyTest.java @@ -12,6 +12,7 @@ import com.epam.aidial.core.server.security.ExtractedClaims; import com.epam.aidial.core.storage.blobstore.BlobStorage; import com.epam.aidial.core.storage.http.HttpException; +import com.epam.aidial.core.storage.service.ResourceService; import io.vertx.core.Future; import io.vertx.core.MultiMap; import io.vertx.core.Vertx; @@ -29,6 +30,8 @@ import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; import java.util.LinkedHashMap; import java.util.List; @@ -54,6 +57,7 @@ import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) public class ProxyTest { @Mock @@ -72,6 +76,8 @@ public class ProxyTest { private AccessTokenValidator accessTokenValidator; @Mock private BlobStorage storage; + @Mock + private ResourceService resourceService; @Mock(answer = Answers.RETURNS_DEEP_STUBS) private HttpServerRequest request; @@ -84,6 +90,7 @@ public class ProxyTest { @BeforeEach public void beforeEach() { + when(resourceService.getMaxSize()).thenReturn(67108864); when(request.response()).thenReturn(response); when(request.getHeader(HttpHeaders.ACCESS_CONTROL_REQUEST_METHOD)).thenReturn(null); when(request.getHeader(HttpHeaders.ACCESS_CONTROL_REQUEST_HEADERS)).thenReturn(null); diff --git a/server/src/test/java/com/epam/aidial/core/server/ResourceApiTest.java b/server/src/test/java/com/epam/aidial/core/server/ResourceApiTest.java index eac7f1e15..5eb455793 100644 --- a/server/src/test/java/com/epam/aidial/core/server/ResourceApiTest.java +++ b/server/src/test/java/com/epam/aidial/core/server/ResourceApiTest.java @@ -161,8 +161,46 @@ void testMaxKeySize() { @Test void testMaxContentSize() { - Response response = resourceRequest(HttpMethod.PUT, "/folder/big", "1".repeat(1024 * 1024 + 1)); - verify(response, 413, "Resource size: 1048577 exceeds max limit: 1048576"); + Response response = resourceRequest(HttpMethod.PUT, "/folder/big", "1".repeat(64 * 1024 * 1024 + 1)); + verify(response, 413, "Request body is too large"); + } + + @Test + void testBigContentSize() { + String template = """ + { + "id": "conversation_id", + "name": "display_name", + "model": {"id": "model_id"}, + "prompt": "%s", + "temperature": 1, + "folderId": "folder1", + "messages": [], + "selectedAddons": ["R", "T", "G"], + "assistantModelId": "assistantId", + "lastActivityDate": 4848683153 + } + """; + String big = template.formatted("0".repeat(4 * 1024 * 1024)); + String small = template.formatted("12345"); + + Response response = resourceRequest(HttpMethod.PUT, "/folder/big", big); + verify(response, 200); + + response = resourceRequest(HttpMethod.GET, "/folder/big"); + verifyJson(response, 200, big); + + response = resourceRequest(HttpMethod.PUT, "/folder/big", small); + verify(response, 200); + + response = resourceRequest(HttpMethod.GET, "/folder/big"); + verifyJson(response, 200, small); + + response = resourceRequest(HttpMethod.DELETE, "/folder/big"); + verify(response, 200); + + response = resourceRequest(HttpMethod.GET, "/folder/big"); + verify(response, 404); } @Test diff --git a/server/src/test/java/com/epam/aidial/core/server/limiter/RateLimiterTest.java b/server/src/test/java/com/epam/aidial/core/server/limiter/RateLimiterTest.java index 85ce887c1..29965fb06 100644 --- a/server/src/test/java/com/epam/aidial/core/server/limiter/RateLimiterTest.java +++ b/server/src/test/java/com/epam/aidial/core/server/limiter/RateLimiterTest.java @@ -98,13 +98,13 @@ public static void afterAll() throws IOException { } @BeforeEach - public void beforeEach() throws Exception { + public void beforeEach() { RKeys keys = redissonClient.getKeys(); for (String key : keys.getKeys()) { keys.delete(key); } LockService lockService = new LockService(redissonClient, null); - ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256); + ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256); ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage, lockService, settings, null); rateLimiter = new RateLimiter(vertx, resourceService); diff --git a/server/src/test/java/com/epam/aidial/core/server/security/ApiKeyStoreTest.java b/server/src/test/java/com/epam/aidial/core/server/security/ApiKeyStoreTest.java index e954245bc..822669461 100644 --- a/server/src/test/java/com/epam/aidial/core/server/security/ApiKeyStoreTest.java +++ b/server/src/test/java/com/epam/aidial/core/server/security/ApiKeyStoreTest.java @@ -94,7 +94,7 @@ public void beforeEach() { keys.delete(key); } LockService lockService = new LockService(redissonClient, null); - ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256); + ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256); ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage, lockService, settings, null); store = new ApiKeyStore(resourceService, vertx); diff --git a/server/src/test/java/com/epam/aidial/core/server/token/TokenStatsTrackerTest.java b/server/src/test/java/com/epam/aidial/core/server/token/TokenStatsTrackerTest.java index 46ad4b4fa..a237f99bb 100644 --- a/server/src/test/java/com/epam/aidial/core/server/token/TokenStatsTrackerTest.java +++ b/server/src/test/java/com/epam/aidial/core/server/token/TokenStatsTrackerTest.java @@ -92,7 +92,7 @@ public void beforeEach() { keys.delete(key); } LockService lockService = new LockService(redissonClient, null); - ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256); + ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256); ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage, lockService, settings, null); tracker = new TokenStatsTracker(vertx, resourceService); diff --git a/server/src/test/resources/aidial.settings.json b/server/src/test/resources/aidial.settings.json index 8673f7e34..2b3027b9f 100644 --- a/server/src/test/resources/aidial.settings.json +++ b/server/src/test/resources/aidial.settings.json @@ -51,7 +51,8 @@ "key": "salt" }, "resources": { - "maxSize" : 1048576, + "maxSize" : 67108864, + "maxSizeToCache": 1048576, "syncPeriod": 60000, "syncDelay": 120000, "syncBatch": 4096, diff --git a/server/src/test/resources/response.txt b/server/src/test/resources/response.txt index 4d34bf271..5a42bf5dc 100644 --- a/server/src/test/resources/response.txt +++ b/server/src/test/resources/response.txt @@ -12,10 +12,5 @@ "content": "As an AI language model, I do not have emotions like humans. However, I am functioning well and ready to assist you. How can I help you today?" } } - ], - "usage": { - "completion_tokens": 33, - "prompt_tokens": 19, - "total_tokens": 52 - } + ] } diff --git a/storage/src/main/java/com/epam/aidial/core/storage/service/ResourceService.java b/storage/src/main/java/com/epam/aidial/core/storage/service/ResourceService.java index c57446b26..8712cec4e 100644 --- a/storage/src/main/java/com/epam/aidial/core/storage/service/ResourceService.java +++ b/storage/src/main/java/com/epam/aidial/core/storage/service/ResourceService.java @@ -90,6 +90,7 @@ public class ResourceService implements AutoCloseable { private final ResourceTopic topic; @Getter private final int maxSize; + private final int maxSizeToCache; private final TimerService.Timer syncTimer; private final long syncDelay; private final int syncBatch; @@ -109,6 +110,7 @@ public ResourceService(TimerService timerService, this.lockService = lockService; this.topic = new ResourceTopic(redis, "resource:" + BlobStorageUtil.toStoragePath(prefix, "topic")); this.maxSize = settings.maxSize; + this.maxSizeToCache = settings.maxSizeToCache(); this.syncDelay = settings.syncDelay; this.syncBatch = settings.syncBatch; this.cacheExpiration = Duration.ofMillis(settings.cacheExpiration); @@ -349,7 +351,7 @@ public ResourceStream getResourceStream(ResourceDescriptor resource, EtagHeader String contentType = metadata.getContentMetadata().getContentType(); Long length = metadata.getContentMetadata().getContentLength(); - if (length <= maxSize) { + if (length <= maxSizeToCache) { result = blobToResult(blob, metadata); redisPut(key, result); return ResourceStream.fromResult(result, etagHeader); @@ -391,7 +393,7 @@ private ResourceItemMetadata putResource( String newEtag = EtagBuilder.generateEtag(body); Result result = new Result(body, newEtag, createdAt, updatedAt, contentType, descriptor.getType().requireCompression(), (long) body.length, descriptor.getType().name(), false); - if (body.length <= maxSize) { + if (body.length <= maxSizeToCache) { redisPut(redisKey, result); if (metadata == null) { String blobKey = blobKey(descriptor); @@ -866,6 +868,7 @@ public record MultipartData( /** * @param maxSize - max allowed size in bytes for a resource. + * @param maxSizeToCache - max size in bytes to cache resource in Redis. * @param syncPeriod - period in milliseconds, how frequently check for resources to sync. * @param syncDelay - delay in milliseconds for a resource to be written back in object storage after last modification. * @param syncBatch - how many resources to sync in one go. @@ -875,6 +878,7 @@ public record MultipartData( @JsonIgnoreProperties(ignoreUnknown = true) public record Settings( int maxSize, + int maxSizeToCache, long syncPeriod, long syncDelay, int syncBatch,