Skip to content

Commit

Permalink
Don't include known bad blobs in BlobCache
Browse files Browse the repository at this point in the history
  • Loading branch information
grote committed Nov 8, 2024
1 parent ae77ebe commit 47c3e65
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 7 deletions.
20 changes: 13 additions & 7 deletions app/src/main/java/com/stevesoltys/seedvault/repo/BlobCache.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,20 @@ class BlobCache(
blobMap.clear()
MemoryLogger.log()
// create map of blobId to size of blob on backend
val blobIds = blobs.associate {
val allowedBlobIds = blobs.associate {
Pair(it.fileHandle.name, it.size.toInt())
}.toMutableMap()
// remove known bad blob IDs from allowedBlobIds
getDoNotUseBlobIds().forEach { knownBadId ->
if (allowedBlobIds.remove(knownBadId) != null) {
log.info { "Removed known bad blob: $knownBadId" }
}
}
// load local blob cache and include only blobs on backend
loadPersistentBlobCache(blobIds)
loadPersistentBlobCache(allowedBlobIds)
// build up mapping from chunkId to blob from available snapshots
snapshots.forEach { snapshot ->
onSnapshotLoaded(snapshot, blobIds)
onSnapshotLoaded(snapshot, allowedBlobIds)
}
MemoryLogger.log()
}
Expand Down Expand Up @@ -167,14 +173,14 @@ class BlobCache(

/**
* Used for populating local [blobMap] cache.
* Adds mapping from chunkId to [Blob], if it exists on backend, i.e. part of [blobIds]
* and its size matches the one on backend, i.e. value of [blobIds].
* Adds mapping from chunkId to [Blob], if it exists on backend, i.e. part of [allowedBlobIds]
* and its size matches the one on backend, i.e. value of [allowedBlobIds].
*/
private fun onSnapshotLoaded(snapshot: Snapshot, blobIds: Map<String, Int>) {
private fun onSnapshotLoaded(snapshot: Snapshot, allowedBlobIds: Map<String, Int>) {
snapshot.blobsMap.forEach { (chunkId, blob) ->
// check if referenced blob still exists on backend
val blobId = blob.id.hexFromProto()
val sizeOnBackend = blobIds[blobId]
val sizeOnBackend = allowedBlobIds[blobId]
if (sizeOnBackend == blob.length) {
// only add blob to our mapping, if it still exists
blobMap.putIfAbsent(chunkId, blob)?.let { previous ->
Expand Down
35 changes: 35 additions & 0 deletions app/src/test/java/com/stevesoltys/seedvault/repo/BlobCacheTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ internal class BlobCacheTest : TransportTest() {

// read saved blobs from cache
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
every {
strictContext.openFileInput(DO_NOT_USE_FILE_NAME)
} throws FileNotFoundException()
cache.populateCache(listOf(fileInfo1, fileInfo2), emptyList())

// now both blobs are in the map
Expand All @@ -62,6 +65,9 @@ internal class BlobCacheTest : TransportTest() {
BlobCache(strictContext).let { cache ->
// read saved blobs from cache
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
every {
strictContext.openFileInput(DO_NOT_USE_FILE_NAME)
} throws FileNotFoundException()
cache.populateCache(listOf(fileInfo2), emptyList()) // fileInfo1 is missing

// now only blob2 gets used, because blob1 wasn't on backend
Expand All @@ -80,6 +86,9 @@ internal class BlobCacheTest : TransportTest() {
BlobCache(strictContext).let { cache ->
// read saved blobs from cache
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
every {
strictContext.openFileInput(DO_NOT_USE_FILE_NAME)
} throws FileNotFoundException()
cache.populateCache(listOf(info, fileInfo2), emptyList()) // info has different size now

// now only blob2 gets used, because blob1 wasn't on backend
Expand All @@ -88,6 +97,32 @@ internal class BlobCacheTest : TransportTest() {
}
}

@Test
fun `cached blob doesn't get used if known bad on do-not-use list`(@TempDir tmpDir: Path) {
val file = File(tmpDir.toString(), "tmpCache")
val doNotUseFile = File(tmpDir.toString(), "doNotUse")
BlobCache(strictContext).apply {
saveTwoBlobsToCache(file)
every { strictContext.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_APPEND) } answers {
FileOutputStream(doNotUseFile, true)
}
doNotUseBlob(blob1.id)
}

BlobCache(strictContext).let { cache ->
// read saved blobs from cache
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
every {
strictContext.openFileInput(DO_NOT_USE_FILE_NAME)
} answers { doNotUseFile.inputStream() }
cache.populateCache(listOf(fileInfo1, fileInfo2), emptyList())

// now only blob2 gets used, because blob1 is on do-not-use-list
assertNull(cache[chunkId1])
assertEquals(blob2, cache[chunkId2])
}
}

@Test
fun `blobs from snapshot get added to cache`() {
assertEquals(blob1, snapshot.blobsMap[chunkId1])
Expand Down

0 comments on commit 47c3e65

Please sign in to comment.