Skip to content

Commit

Permalink
Store actual chunk size when repopulating ChunksCache
Browse files Browse the repository at this point in the history
We didn't need this info before, but for integrity checking the chunk size becomes important.
  • Loading branch information
grote committed Nov 13, 2024
1 parent 47c3e65 commit 5891024
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,12 @@ internal class Backup(
try {
// get available chunks, so we do not need to rely solely on local cache
// for checking if a chunk already exists on storage
val chunkIds = ArrayList<String>()
val availableChunkIds = mutableMapOf<String, Long>()
val topLevelFolder = TopLevelFolder.fromAndroidId(androidId)
backend.list(topLevelFolder, FileBackupFileType.Blob::class) { fileInfo ->
chunkIds.add(fileInfo.fileHandle.name)
availableChunkIds[fileInfo.fileHandle.name] = fileInfo.size
}
val availableChunkIds = chunkIds.toHashSet()
if (!chunksCache.areAllAvailableChunksCached(db, availableChunkIds)) {
if (!chunksCache.areAllAvailableChunksCached(db, availableChunkIds.keys)) {
cacheRepopulater.repopulate(streamKey, availableChunkIds)
}

Expand All @@ -123,7 +122,7 @@ internal class Backup(
// with its old (unreferenced) chunks eventually deleted.
// If (one of) its chunk(s) is missing, it will count as changed and chunked again.
duration = measure {
backupFiles(scanResult, availableChunkIds, backupObserver)
backupFiles(scanResult, availableChunkIds.keys, backupObserver)
}
Log.e(TAG, "Changed files backup took $duration")
} finally {
Expand All @@ -134,7 +133,7 @@ internal class Backup(
@Throws(IOException::class, GeneralSecurityException::class)
private suspend fun backupFiles(
filesResult: FileScannerResult,
availableChunkIds: HashSet<String>,
availableChunkIds: Set<String>,
backupObserver: BackupObserver?,
) {
val startTime = System.currentTimeMillis()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
package org.calyxos.backup.storage.backup

import android.util.Log
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.db.CachedChunk
import org.calyxos.backup.storage.db.Db
import org.calyxos.backup.storage.measure
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.getCurrentBackupSnapshots
import org.calyxos.backup.storage.measure
import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.backends.FileBackupFileType
import java.io.IOException
Expand All @@ -27,7 +27,7 @@ internal class ChunksCacheRepopulater(
private val snapshotRetriever: SnapshotRetriever,
) {

suspend fun repopulate(streamKey: ByteArray, availableChunkIds: HashSet<String>) {
suspend fun repopulate(streamKey: ByteArray, availableChunkIds: Map<String, Long>) {
Log.i(TAG, "Starting to repopulate chunks cache")
try {
repopulateInternal(streamKey, availableChunkIds)
Expand All @@ -40,7 +40,7 @@ internal class ChunksCacheRepopulater(
@Throws(IOException::class)
private suspend fun repopulateInternal(
streamKey: ByteArray,
availableChunkIds: HashSet<String>,
availableChunkIds: Map<String, Long>,
) {
val start = System.currentTimeMillis()
val snapshots =
Expand All @@ -62,7 +62,7 @@ internal class ChunksCacheRepopulater(
Log.i(TAG, "Repopulating chunks cache took $repopulateDuration")

// delete chunks that are not references by any snapshot anymore
val chunksToDelete = availableChunkIds.subtract(cachedChunks.map { it.id }.toSet())
val chunksToDelete = availableChunkIds.keys.subtract(cachedChunks.map { it.id }.toSet())
val deletionDuration = measure {
chunksToDelete.forEach { chunkId ->
val handle = FileBackupFileType.Blob(androidId, chunkId)
Expand All @@ -74,7 +74,7 @@ internal class ChunksCacheRepopulater(

private fun getCachedChunks(
snapshots: List<BackupSnapshot>,
availableChunks: HashSet<String>,
availableChunks: Map<String, Long>,
): Collection<CachedChunk> {
val chunkMap = HashMap<String, CachedChunk>()
snapshots.forEach { snapshot ->
Expand All @@ -85,25 +85,24 @@ internal class ChunksCacheRepopulater(
snapshot.documentFilesList.forEach { file ->
file.chunkIdsList.forEach { chunkId -> chunksInSnapshot.add(chunkId) }
}
addCachedChunksToMap(snapshot.timeStart, availableChunks, chunkMap, chunksInSnapshot)
addCachedChunksToMap(snapshot, availableChunks, chunkMap, chunksInSnapshot)
}
return chunkMap.values
}

private fun addCachedChunksToMap(
snapshotTimeStamp: Long,
availableChunks: HashSet<String>,
snapshot: BackupSnapshot,
availableChunks: Map<String, Long>,
chunkMap: HashMap<String, CachedChunk>,
chunksInSnapshot: HashSet<String>,
) = chunksInSnapshot.forEach { chunkId ->
if (!availableChunks.contains(chunkId)) {
Log.w(TAG, "ChunkId $chunkId referenced in $snapshotTimeStamp, but not in storage.")
val size = availableChunks[chunkId]
if (size == null) {
Log.w(TAG, "ChunkId $chunkId referenced in ${snapshot.timeStart}, but not in storage.")
return@forEach
}
val cachedChunk = chunkMap.getOrElse(chunkId) {
// TODO get actual chunk size (isn't used for anything critical, yet)
val size = 0L
CachedChunk(chunkId, 0, size)
CachedChunk(chunkId, 0, size, snapshot.version.toByte())
}
chunkMap[chunkId] = cachedChunk.copy(refCount = cachedChunk.refCount + 1)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ internal class FileBackup(

suspend fun backupFiles(
files: List<ContentFile>,
availableChunkIds: HashSet<String>,
availableChunkIds: Set<String>,
backupObserver: BackupObserver?,
): BackupResult {
val chunkIds = HashSet<String>()
Expand Down Expand Up @@ -76,7 +76,7 @@ internal class FileBackup(
@Throws(IOException::class, GeneralSecurityException::class)
private suspend fun backupFile(
file: ContentFile,
availableChunkIds: HashSet<String>,
availableChunkIds: Set<String>,
): FileBackupResult {
val cachedFile = filesCache.getByUri(file.uri)
val missingChunkIds = cachedFile?.chunks?.minus(availableChunkIds) ?: emptyList()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ internal class SmallFileBackup(

suspend fun backupFiles(
files: List<ContentFile>,
availableChunkIds: HashSet<String>,
availableChunkIds: Set<String>,
backupObserver: BackupObserver?,
): BackupResult {
val chunkIds = HashSet<String>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ import io.mockk.mockk
import io.mockk.mockkStatic
import io.mockk.slot
import kotlinx.coroutines.runBlocking
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.api.StoredSnapshot
import org.calyxos.backup.storage.db.CachedChunk
import org.calyxos.backup.storage.db.ChunksCache
import org.calyxos.backup.storage.db.Db
import org.calyxos.backup.storage.getCurrentBackupSnapshots
import org.calyxos.backup.storage.getRandomString
import org.calyxos.backup.storage.mockLog
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.getCurrentBackupSnapshots
import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.backends.FileBackupFileType.Blob
import org.junit.Assert.assertEquals
Expand Down Expand Up @@ -59,7 +59,7 @@ internal class ChunksCacheRepopulaterTest {
val chunk3 = getRandomString(6) // not referenced by any snapshot
val chunk4 = getRandomString(6) // in 1 snapshot
val chunk5 = getRandomString(6) // in 1 snapshot, but not available in storage
val availableChunkIds = hashSetOf(chunk1, chunk2, chunk3, chunk4)
val availableChunkIds = mapOf(chunk1 to 3L, chunk2 to 5L, chunk3 to 23L, chunk4 to 42L)
val snapshot1 = BackupSnapshot.newBuilder()
.setTimeStart(Random.nextLong())
.addMediaFiles(BackupMediaFile.newBuilder().addChunkIds(chunk1))
Expand All @@ -77,9 +77,9 @@ internal class ChunksCacheRepopulaterTest {
val storedSnapshot2 = StoredSnapshot("bar", snapshot2.timeStart)
val storedSnapshots = listOf(storedSnapshot1, storedSnapshot2)
val cachedChunks = listOf(
CachedChunk(chunk1, 2, 0),
CachedChunk(chunk2, 2, 0),
CachedChunk(chunk4, 1, 0),
CachedChunk(chunk1, 2, availableChunkIds[chunk1]!!),
CachedChunk(chunk2, 2, availableChunkIds[chunk2]!!),
CachedChunk(chunk4, 1, availableChunkIds[chunk4]!!),
) // chunk3 is not referenced and should get deleted
val cachedChunksSlot = slot<Collection<CachedChunk>>()

Expand Down

0 comments on commit 5891024

Please sign in to comment.