From 5891024c6208e214210fb39504b8ebf111495040 Mon Sep 17 00:00:00 2001 From: Torsten Grote Date: Wed, 13 Nov 2024 14:59:39 -0300 Subject: [PATCH] Store actual chunk size when repopulating ChunksCache We didn't need this info before, but for integrity checking the chunk size becomes important. --- .../calyxos/backup/storage/backup/Backup.kt | 11 ++++---- .../storage/backup/ChunksCacheRepopulater.kt | 27 +++++++++---------- .../backup/storage/backup/FileBackup.kt | 4 +-- .../backup/storage/backup/SmallFileBackup.kt | 2 +- .../backup/ChunksCacheRepopulaterTest.kt | 12 ++++----- 5 files changed, 27 insertions(+), 29 deletions(-) diff --git a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/Backup.kt b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/Backup.kt index ad8548b4e..6524ec8b7 100644 --- a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/Backup.kt +++ b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/Backup.kt @@ -99,13 +99,12 @@ internal class Backup( try { // get available chunks, so we do not need to rely solely on local cache // for checking if a chunk already exists on storage - val chunkIds = ArrayList() + val availableChunkIds = mutableMapOf() val topLevelFolder = TopLevelFolder.fromAndroidId(androidId) backend.list(topLevelFolder, FileBackupFileType.Blob::class) { fileInfo -> - chunkIds.add(fileInfo.fileHandle.name) + availableChunkIds[fileInfo.fileHandle.name] = fileInfo.size } - val availableChunkIds = chunkIds.toHashSet() - if (!chunksCache.areAllAvailableChunksCached(db, availableChunkIds)) { + if (!chunksCache.areAllAvailableChunksCached(db, availableChunkIds.keys)) { cacheRepopulater.repopulate(streamKey, availableChunkIds) } @@ -123,7 +122,7 @@ internal class Backup( // with its old (unreferenced) chunks eventually deleted. // If (one of) its chunk(s) is missing, it will count as changed and chunked again. duration = measure { - backupFiles(scanResult, availableChunkIds, backupObserver) + backupFiles(scanResult, availableChunkIds.keys, backupObserver) } Log.e(TAG, "Changed files backup took $duration") } finally { @@ -134,7 +133,7 @@ internal class Backup( @Throws(IOException::class, GeneralSecurityException::class) private suspend fun backupFiles( filesResult: FileScannerResult, - availableChunkIds: HashSet, + availableChunkIds: Set, backupObserver: BackupObserver?, ) { val startTime = System.currentTimeMillis() diff --git a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulater.kt b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulater.kt index 2edfd5cbb..e48d24679 100644 --- a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulater.kt +++ b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulater.kt @@ -6,11 +6,11 @@ package org.calyxos.backup.storage.backup import android.util.Log +import org.calyxos.backup.storage.SnapshotRetriever import org.calyxos.backup.storage.db.CachedChunk import org.calyxos.backup.storage.db.Db -import org.calyxos.backup.storage.measure -import org.calyxos.backup.storage.SnapshotRetriever import org.calyxos.backup.storage.getCurrentBackupSnapshots +import org.calyxos.backup.storage.measure import org.calyxos.seedvault.core.backends.Backend import org.calyxos.seedvault.core.backends.FileBackupFileType import java.io.IOException @@ -27,7 +27,7 @@ internal class ChunksCacheRepopulater( private val snapshotRetriever: SnapshotRetriever, ) { - suspend fun repopulate(streamKey: ByteArray, availableChunkIds: HashSet) { + suspend fun repopulate(streamKey: ByteArray, availableChunkIds: Map) { Log.i(TAG, "Starting to repopulate chunks cache") try { repopulateInternal(streamKey, availableChunkIds) @@ -40,7 +40,7 @@ internal class ChunksCacheRepopulater( @Throws(IOException::class) private suspend fun repopulateInternal( streamKey: ByteArray, - availableChunkIds: HashSet, + availableChunkIds: Map, ) { val start = System.currentTimeMillis() val snapshots = @@ -62,7 +62,7 @@ internal class ChunksCacheRepopulater( Log.i(TAG, "Repopulating chunks cache took $repopulateDuration") // delete chunks that are not references by any snapshot anymore - val chunksToDelete = availableChunkIds.subtract(cachedChunks.map { it.id }.toSet()) + val chunksToDelete = availableChunkIds.keys.subtract(cachedChunks.map { it.id }.toSet()) val deletionDuration = measure { chunksToDelete.forEach { chunkId -> val handle = FileBackupFileType.Blob(androidId, chunkId) @@ -74,7 +74,7 @@ internal class ChunksCacheRepopulater( private fun getCachedChunks( snapshots: List, - availableChunks: HashSet, + availableChunks: Map, ): Collection { val chunkMap = HashMap() snapshots.forEach { snapshot -> @@ -85,25 +85,24 @@ internal class ChunksCacheRepopulater( snapshot.documentFilesList.forEach { file -> file.chunkIdsList.forEach { chunkId -> chunksInSnapshot.add(chunkId) } } - addCachedChunksToMap(snapshot.timeStart, availableChunks, chunkMap, chunksInSnapshot) + addCachedChunksToMap(snapshot, availableChunks, chunkMap, chunksInSnapshot) } return chunkMap.values } private fun addCachedChunksToMap( - snapshotTimeStamp: Long, - availableChunks: HashSet, + snapshot: BackupSnapshot, + availableChunks: Map, chunkMap: HashMap, chunksInSnapshot: HashSet, ) = chunksInSnapshot.forEach { chunkId -> - if (!availableChunks.contains(chunkId)) { - Log.w(TAG, "ChunkId $chunkId referenced in $snapshotTimeStamp, but not in storage.") + val size = availableChunks[chunkId] + if (size == null) { + Log.w(TAG, "ChunkId $chunkId referenced in ${snapshot.timeStart}, but not in storage.") return@forEach } val cachedChunk = chunkMap.getOrElse(chunkId) { - // TODO get actual chunk size (isn't used for anything critical, yet) - val size = 0L - CachedChunk(chunkId, 0, size) + CachedChunk(chunkId, 0, size, snapshot.version.toByte()) } chunkMap[chunkId] = cachedChunk.copy(refCount = cachedChunk.refCount + 1) } diff --git a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/FileBackup.kt b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/FileBackup.kt index 6845e4129..ba24e0c5b 100644 --- a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/FileBackup.kt +++ b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/FileBackup.kt @@ -31,7 +31,7 @@ internal class FileBackup( suspend fun backupFiles( files: List, - availableChunkIds: HashSet, + availableChunkIds: Set, backupObserver: BackupObserver?, ): BackupResult { val chunkIds = HashSet() @@ -76,7 +76,7 @@ internal class FileBackup( @Throws(IOException::class, GeneralSecurityException::class) private suspend fun backupFile( file: ContentFile, - availableChunkIds: HashSet, + availableChunkIds: Set, ): FileBackupResult { val cachedFile = filesCache.getByUri(file.uri) val missingChunkIds = cachedFile?.chunks?.minus(availableChunkIds) ?: emptyList() diff --git a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/SmallFileBackup.kt b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/SmallFileBackup.kt index a1b7b4930..02b79a2f9 100644 --- a/storage/lib/src/main/java/org/calyxos/backup/storage/backup/SmallFileBackup.kt +++ b/storage/lib/src/main/java/org/calyxos/backup/storage/backup/SmallFileBackup.kt @@ -30,7 +30,7 @@ internal class SmallFileBackup( suspend fun backupFiles( files: List, - availableChunkIds: HashSet, + availableChunkIds: Set, backupObserver: BackupObserver?, ): BackupResult { val chunkIds = HashSet() diff --git a/storage/lib/src/test/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulaterTest.kt b/storage/lib/src/test/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulaterTest.kt index 17b656ed2..57b3005c6 100644 --- a/storage/lib/src/test/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulaterTest.kt +++ b/storage/lib/src/test/java/org/calyxos/backup/storage/backup/ChunksCacheRepopulaterTest.kt @@ -14,14 +14,14 @@ import io.mockk.mockk import io.mockk.mockkStatic import io.mockk.slot import kotlinx.coroutines.runBlocking +import org.calyxos.backup.storage.SnapshotRetriever import org.calyxos.backup.storage.api.StoredSnapshot import org.calyxos.backup.storage.db.CachedChunk import org.calyxos.backup.storage.db.ChunksCache import org.calyxos.backup.storage.db.Db +import org.calyxos.backup.storage.getCurrentBackupSnapshots import org.calyxos.backup.storage.getRandomString import org.calyxos.backup.storage.mockLog -import org.calyxos.backup.storage.SnapshotRetriever -import org.calyxos.backup.storage.getCurrentBackupSnapshots import org.calyxos.seedvault.core.backends.Backend import org.calyxos.seedvault.core.backends.FileBackupFileType.Blob import org.junit.Assert.assertEquals @@ -59,7 +59,7 @@ internal class ChunksCacheRepopulaterTest { val chunk3 = getRandomString(6) // not referenced by any snapshot val chunk4 = getRandomString(6) // in 1 snapshot val chunk5 = getRandomString(6) // in 1 snapshot, but not available in storage - val availableChunkIds = hashSetOf(chunk1, chunk2, chunk3, chunk4) + val availableChunkIds = mapOf(chunk1 to 3L, chunk2 to 5L, chunk3 to 23L, chunk4 to 42L) val snapshot1 = BackupSnapshot.newBuilder() .setTimeStart(Random.nextLong()) .addMediaFiles(BackupMediaFile.newBuilder().addChunkIds(chunk1)) @@ -77,9 +77,9 @@ internal class ChunksCacheRepopulaterTest { val storedSnapshot2 = StoredSnapshot("bar", snapshot2.timeStart) val storedSnapshots = listOf(storedSnapshot1, storedSnapshot2) val cachedChunks = listOf( - CachedChunk(chunk1, 2, 0), - CachedChunk(chunk2, 2, 0), - CachedChunk(chunk4, 1, 0), + CachedChunk(chunk1, 2, availableChunkIds[chunk1]!!), + CachedChunk(chunk2, 2, availableChunkIds[chunk2]!!), + CachedChunk(chunk4, 1, availableChunkIds[chunk4]!!), ) // chunk3 is not referenced and should get deleted val cachedChunksSlot = slot>()