Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Tiramisu Splitting Up Work] Tiramisu disk tier stats and disk tier integration #22

Open
wants to merge 18 commits into
base: tiramisu-serializer-impl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.indices;

import org.opensearch.action.search.SearchResponse;
import org.opensearch.client.Client;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.cache.tier.TierType;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.unit.ByteSizeValue;
import org.opensearch.index.cache.request.RequestCacheStats;
import org.opensearch.index.cache.request.ShardRequestCache;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.test.OpenSearchIntegTestCase;

import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse;

// This is a separate file from IndicesRequestCacheIT because we only want to run our test
// on a node with a maximum request cache size that we set.

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
public class IndicesRequestCacheDiskTierIT extends OpenSearchIntegTestCase {
public void testDiskTierStats() throws Exception {
int heapSizeBytes = 4729;
String node = internalCluster().startNode(
Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), new ByteSizeValue(heapSizeBytes))
);
Client client = client(node);

Settings.Builder indicesSettingBuilder = Settings.builder()
.put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0);

assertAcked(
client.admin().indices().prepareCreate("index").setMapping("k", "type=keyword").setSettings(indicesSettingBuilder).get()
);
indexRandom(true, client.prepareIndex("index").setSource("k", "hello"));
ensureSearchable("index");
SearchResponse resp;

resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + 0)).get();
int requestSize = (int) getCacheSizeBytes(client, "index", TierType.ON_HEAP);
assertTrue(heapSizeBytes > requestSize);
// If this fails, increase heapSizeBytes! We can't adjust it after getting the size of one query
// as the cache size setting is not dynamic

int numOnDisk = 5;
int numRequests = heapSizeBytes / requestSize + numOnDisk;
for (int i = 1; i < numRequests; i++) {
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + i)).get();
assertSearchResponse(resp);
IndicesRequestCacheIT.assertCacheState(client, "index", 0, i + 1, TierType.ON_HEAP, false);
IndicesRequestCacheIT.assertCacheState(client, "index", 0, i + 1, TierType.DISK, false);
}


long diskReachesSoFar = getDiskStatsAccumulator(client, "index").getTotalDiskReaches();
long tookTimeSoFar = getDiskStatsAccumulator(client, "index").getTotalGetTime();
// So far, disk-specific stats should be 0, as keystore has prevented any actual disk reaches
// assertEquals(diskReachesSoFar, 0); // TODO: Once keystore is integrated, this value should be 0
// assertEquals(getTimeSoFar, 0); // TODO: Once keystore is integrated, this value should be 0

// long tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 0, -1, 0); // TODO: Uncomment once keystore is integrated

// the first request, for "hello0", should have been evicted to the disk tier
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello0")).get();
IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 1, TierType.ON_HEAP, false);
IndicesRequestCacheIT.assertCacheState(client, "index", 1, numRequests, TierType.DISK, false);
tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 1 + diskReachesSoFar, tookTimeSoFar, -1);

// We make another actual request that should be in the disk tier. Disk specific stats should keep incrementing
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello1")).get();
IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 2, TierType.ON_HEAP, false);
IndicesRequestCacheIT.assertCacheState(client, "index", 2, numRequests, TierType.DISK, false);
tookTimeSoFar = assertDiskTierSpecificStats(client, "index", 2 + diskReachesSoFar, tookTimeSoFar, -1);

// A final request for something in neither tier shouldn't increment disk specific stats (once keystore is on)
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello" + numRequests)).get();
IndicesRequestCacheIT.assertCacheState(client, "index", 0, numRequests + 3, TierType.ON_HEAP, false);
IndicesRequestCacheIT.assertCacheState(client, "index", 2, numRequests + 1, TierType.DISK, false);
//assertDiskTierSpecificStats(client, "index", 2 + diskReachesSoFar, tookTimeSoFar, tookTimeSoFar);
// TODO: Uncomment once keystore is integrated

}

private long getCacheSizeBytes(Client client, String index, TierType tierType) {
RequestCacheStats requestCacheStats = client.admin()
.indices()
.prepareStats(index)
.setRequestCache(true)
.get()
.getTotal()
.getRequestCache();
return requestCacheStats.getMemorySizeInBytes(tierType);
}

private long assertDiskTierSpecificStats(Client client, String index, long totalDiskReaches, long totalGetTimeLowerBound, long totalGetTimeUpperBound) {
// set bounds to -1 to ignore them
ShardRequestCache.DiskStatsAccumulator specStats = getDiskStatsAccumulator(client, index);
assertEquals(totalDiskReaches, specStats.getTotalDiskReaches());
long tookTime = specStats.getTotalGetTime();
assertTrue(tookTime >= totalGetTimeLowerBound || totalGetTimeLowerBound < 0);
assertTrue(tookTime <= totalGetTimeUpperBound || totalGetTimeUpperBound < 0);
return tookTime; // Return for use in next check
}

private ShardRequestCache.DiskStatsAccumulator getDiskStatsAccumulator(Client client, String index) {
RequestCacheStats requestCacheStats = client.admin()
.indices()
.prepareStats(index)
.setRequestCache(true)
.get()
.getTotal()
.getRequestCache();
return requestCacheStats.getDiskSpecificStats();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -663,18 +663,30 @@ public void testCacheWithInvalidation() throws Exception {
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get();
assertSearchResponse(resp);
// Should expect hit as here as refresh didn't happen
assertCacheState(client, "index", 1, 1);
assertCacheState(client, "index", 1, 1, TierType.ON_HEAP, false);
assertCacheState(client, "index", 0, 1, TierType.DISK, false);
assertNumCacheEntries(client, "index", 1, TierType.ON_HEAP);

// Explicit refresh would invalidate cache
refresh();
// Hit same query again
resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get();
assertSearchResponse(resp);
// Should expect miss as key has changed due to change in IndexReader.CacheKey (due to refresh)
assertCacheState(client, "index", 1, 2);
assertCacheState(client, "index", 1, 2, TierType.ON_HEAP, false);
assertCacheState(client, "index", 0, 2, TierType.DISK, false);

//assertNumCacheEntries(client, "index", 1, TierType.ON_HEAP); // Evictions won't be 1 until the cache cleaner runs every minute
}

private static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) {
protected static void assertCacheState(
Client client,
String index,
long expectedHits,
long expectedMisses,
TierType tierType,
boolean enforceZeroEvictions
) {
RequestCacheStats requestCacheStats = client.admin()
.indices()
.prepareStats(index)
Expand All @@ -684,11 +696,35 @@ private static void assertCacheState(Client client, String index, long expectedH
.getRequestCache();
// Check the hit count and miss count together so if they are not
// correct we can see both values
assertEquals(
Arrays.asList(expectedHits, expectedMisses, 0L),
Arrays.asList(requestCacheStats.getHitCount(), requestCacheStats.getMissCount(), requestCacheStats.getEvictions())
);
if (enforceZeroEvictions) {
assertEquals(
Arrays.asList(expectedHits, expectedMisses, 0L),
Arrays.asList(
requestCacheStats.getHitCount(tierType),
requestCacheStats.getMissCount(tierType),
requestCacheStats.getEvictions(tierType)
)
);
} else {
assertEquals(
Arrays.asList(expectedHits, expectedMisses),
Arrays.asList(requestCacheStats.getHitCount(tierType), requestCacheStats.getMissCount(tierType))
);
}
}

protected static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) {
assertCacheState(client, index, expectedHits, expectedMisses, TierType.ON_HEAP, true);
}

protected static void assertNumCacheEntries(Client client, String index, long expectedEntries, TierType tierType) {
RequestCacheStats requestCacheStats = client.admin()
.indices()
.prepareStats(index)
.setRequestCache(true)
.get()
.getTotal()
.getRequestCache();
assertEquals(expectedEntries, requestCacheStats.getEntries(tierType));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.cache.tier;

import org.opensearch.common.cache.tier.TierType;

import java.util.Map;

/**
* Represents a cache value, with its associated tier type where it is stored,
* and tier-specific stats for an individual request stored in a map.
* @param <V> Type of value.
*/
public class CacheValue<V> {
V value;
TierType source;
TierRequestStats stats;

CacheValue(V value, TierType source, TierRequestStats stats) {
this.value = value;
this.source = source;
this.stats = stats;
}

public V getValue() {
return value;
}

public TierType getSource() {
return source;
}

public TierRequestStats getStats() {
return stats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/
public interface CachingTier<K, V> {

V get(K key);
CacheValue<V> get(K key);

void put(K key, V value);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.cache.tier;

/**
* A class created by disk tier implementations containing disk-specific stats for a single request.
*/
public class DiskTierRequestStats implements TierRequestStats {

private final long requestGetTimeNanos;
private final boolean requestReachedDisk;

public DiskTierRequestStats(long requestGetTimeNanos, boolean requestReachedDisk) {
this.requestReachedDisk = requestReachedDisk;
this.requestGetTimeNanos = requestGetTimeNanos;
}
@Override
public TierType getTierType() {
return TierType.DISK;
}

public long getRequestGetTimeNanos() {
return requestGetTimeNanos;
}

public boolean getRequestReachedDisk() {
return requestReachedDisk;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.ehcache.core.spi.service.FileBasedPersistenceContext;
import org.ehcache.spi.serialization.SerializerException;
import org.opensearch.OpenSearchException;
import org.opensearch.common.Randomness;
import org.opensearch.common.cache.RemovalListener;
import org.opensearch.common.cache.RemovalNotification;
import org.opensearch.common.cache.RemovalReason;
Expand All @@ -26,6 +27,8 @@
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.UUID;
import java.util.function.Supplier;

import org.ehcache.Cache;
Expand All @@ -50,7 +53,7 @@
public class EhCacheDiskCachingTier<K, V> implements DiskCachingTier<K, V> {

// A Cache manager can create many caches.
private static PersistentCacheManager cacheManager = null;
private PersistentCacheManager cacheManager;

// Disk cache
private Cache<K, byte[]> cache;
Expand Down Expand Up @@ -104,7 +107,7 @@ private EhCacheDiskCachingTier(Builder<K, V> builder) {
this.valueSerializer = Objects.requireNonNull(builder.valueSerializer, "Value serializer shouldn't be null");
this.ehCacheEventListener = new EhCacheEventListener<K, V>(this.valueSerializer);
this.maxWeightInBytes = builder.maxWeightInBytes;
this.storagePath = Objects.requireNonNull(builder.storagePath, "Storage path shouldn't be null");
this.storagePath = Objects.requireNonNull(builder.storagePath, "Storage path shouldn't be null") + UUID.randomUUID(); // temporary fix
if (builder.threadPoolAlias == null || builder.threadPoolAlias.isBlank()) {
this.threadPoolAlias = THREAD_POOL_ALIAS_PREFIX + "DiskWrite";
} else {
Expand All @@ -119,9 +122,6 @@ private EhCacheDiskCachingTier(Builder<K, V> builder) {
// Default value is 16 within Ehcache.
this.DISK_SEGMENTS = Setting.intSetting(builder.settingPrefix + ".ehcache.disk.segments", 16, 1, 32);

// In test cases, there might be leftover cache managers and caches hanging around, from nodes created in the test case setup
// Destroy them before recreating them
close();
cacheManager = buildCacheManager();
this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder);
}
Expand Down Expand Up @@ -192,8 +192,19 @@ private CacheEventListenerConfigurationBuilder getListenerConfiguration(Builder<
}

@Override
public V get(K key) {
return valueSerializer.deserialize(cache.get(key));
public CacheValue<V> get(K key) {
// Optimize it by adding key store.
boolean reachedDisk = true; // TODO: Change this once we combine this with keystore integration
long now = System.nanoTime(); // Nanoseconds required; milliseconds might be too slow on an SSD

V value = valueSerializer.deserialize(cache.get(key));

long tookTime = -1L; // This value will be ignored by stats accumulator if reachedDisk is false anyway
if (reachedDisk) {
tookTime = System.nanoTime() - now;
}
DiskTierRequestStats stats = new DiskTierRequestStats(tookTime, reachedDisk);
return new CacheValue<>(value, TierType.DISK, stats);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.cache.tier;

/**
* A class created by disk tier implementations containing disk-specific stats for a single request.
*/
public class OnHeapTierRequestStats implements TierRequestStats {
@Override
public TierType getTierType() {
return TierType.ON_HEAP;
}
}
Loading