Skip to content

Commit

Permalink
WIP Files Backup Checker
Browse files Browse the repository at this point in the history
  • Loading branch information
grote committed Nov 14, 2024
1 parent 5891024 commit 98f1e8d
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/

package org.calyxos.backup.storage.api

import org.calyxos.backup.storage.backup.BackupSnapshot

public sealed class CheckerResult {

public data class Success(
val snapshots: List<BackupSnapshot>,
val percent: Int,
val size: Long,
) : CheckerResult()

public data class Error(
/**
* This number is greater than the size of [snapshots],
* if we could not read/decrypt one or more snapshots.
*/
val existingSnapshots: Int,
val snapshots: List<BackupSnapshot>,
val missingChunkIds: Set<String>,
val malformedChunkIds: Set<String>,
) : CheckerResult() {
public fun isBadChunkId(chunkId: String): Boolean {
return missingChunkIds.contains(chunkId) || malformedChunkIds.contains(chunkId)
}
}

public data class GeneralError(val e: Exception) : CheckerResult()
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.backup.Backup
import org.calyxos.backup.storage.backup.BackupSnapshot
import org.calyxos.backup.storage.backup.ChunksCacheRepopulater
import org.calyxos.backup.storage.check.Checker
import org.calyxos.backup.storage.db.Db
import org.calyxos.backup.storage.getCurrentBackupSnapshots
import org.calyxos.backup.storage.getMediaType
Expand Down Expand Up @@ -88,6 +89,9 @@ public class StorageBackup(
private val pruner by lazy {
Pruner(db, retention, pluginGetter, androidId, keyManager, snapshotRetriever)
}
private val checker by lazy {
Checker(db, pluginGetter, snapshotRetriever, keyManager, chunksCacheRepopulater, androidId)
}

private val backupRunning = AtomicBoolean(false)
private val restoreRunning = AtomicBoolean(false)
Expand Down Expand Up @@ -245,4 +249,15 @@ public class StorageBackup(
}
}

public fun getBackupSize(): Long {
return db.getChunksCache().getSizeOfCachedChunks()
}

public suspend fun checkBackups(percent: Int, backupObserver: BackupObserver?): CheckerResult {
check(percent in 0..100) { "Invalid percentage: $percent" }
return withContext(dispatcher) {
checker.check(percent, backupObserver)
}
}

}
170 changes: 170 additions & 0 deletions storage/lib/src/main/java/org/calyxos/backup/storage/check/Checker.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/

package org.calyxos.backup.storage.check

import android.util.Log
import com.google.protobuf.InvalidProtocolBufferException
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.api.BackupObserver
import org.calyxos.backup.storage.api.CheckerResult
import org.calyxos.backup.storage.api.StoredSnapshot
import org.calyxos.backup.storage.backup.Backup.Companion.VERSION
import org.calyxos.backup.storage.backup.ChunksCacheRepopulater
import org.calyxos.backup.storage.crypto.ChunkCrypto
import org.calyxos.backup.storage.crypto.StreamCrypto
import org.calyxos.backup.storage.db.Db
import org.calyxos.backup.storage.restore.readVersion
import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.backends.FileBackupFileType
import org.calyxos.seedvault.core.backends.TopLevelFolder
import org.calyxos.seedvault.core.crypto.KeyManager
import org.calyxos.seedvault.core.toHexString
import java.io.IOException
import java.security.GeneralSecurityException
import javax.crypto.Mac
import kotlin.math.roundToLong

private val TAG = Checker::class.simpleName

internal class Checker(
private val db: Db,
private val backendGetter: () -> Backend,
private val snapshotRetriever: SnapshotRetriever,
private val keyManager: KeyManager,
private val cacheRepopulater: ChunksCacheRepopulater,
private val androidId: String,
private val streamCrypto: StreamCrypto = StreamCrypto,
private val chunkCrypto: ChunkCrypto = ChunkCrypto,
) {

private val backend get() = backendGetter()

private val streamKey
get() = try {
streamCrypto.deriveStreamKey(keyManager.getMainKey())
} catch (e: GeneralSecurityException) {
throw AssertionError(e)
}
private val mac
get() = try {
chunkCrypto.getMac(ChunkCrypto.deriveChunkIdKey(keyManager.getMainKey()))
} catch (e: GeneralSecurityException) {
throw AssertionError(e)
}

fun getBackupSize(): Long {
return db.getChunksCache().getSizeOfCachedChunks()
}

@Throws(
IOException::class,
GeneralSecurityException::class,
InvalidProtocolBufferException::class,
)
suspend fun check(percent: Int, backupObserver: BackupObserver?): CheckerResult {
check(percent in 0..100) { "Invalid percentage: $percent" }

// get all snapshots and blobs on storage
val topLevelFolder = TopLevelFolder("$androidId.sv")
val storedSnapshots = mutableListOf<StoredSnapshot>()
val availableChunkIds = mutableMapOf<String, Long>()
backend.list(
topLevelFolder,
FileBackupFileType.Snapshot::class,
FileBackupFileType.Blob::class,
) { fileInfo ->
when (fileInfo.fileHandle) {
is FileBackupFileType.Snapshot -> {
val handle = fileInfo.fileHandle as FileBackupFileType.Snapshot
val storedSnapshot = StoredSnapshot(handle.topLevelFolder.name, handle.time)
storedSnapshots.add(storedSnapshot)
}
is FileBackupFileType.Blob ->
availableChunkIds[fileInfo.fileHandle.name] = fileInfo.size
else -> error("Unexpected FileHandle: $fileInfo")
}
}
// ensure our local ChunksCache is up to date
if (!db.getChunksCache().areAllAvailableChunksCached(db, availableChunkIds.keys)) {
Log.i(TAG, "Not all available chunks cached, rebuild local cache...")
cacheRepopulater.repopulate(streamKey, availableChunkIds)
}
// parse snapshots
val snapshots = storedSnapshots.mapNotNull {
try {
snapshotRetriever.getSnapshot(streamKey, it)
} catch (e: Exception) {
Log.e(TAG, "Error getting snapshot for $it: ", e)
null
}
}
Log.i(TAG, "Found ${storedSnapshots.size} snapshots, ${snapshots.size} readable.")
// get all referenced chunkIds
val referencedChunkIds = mutableSetOf<String>()
snapshots.forEach { snapshot ->
snapshot.mediaFilesList.forEach { referencedChunkIds.addAll(it.chunkIdsList) }
snapshot.documentFilesList.forEach { referencedChunkIds.addAll(it.chunkIdsList) }
}
// calculate chunks that are missing
val missingChunkIds = referencedChunkIds - availableChunkIds.keys
Log.i(
TAG, "Found ${referencedChunkIds.size} referenced chunks, " +
"${missingChunkIds.size} missing."
)

val chunkIdMac = mac // keep a copy of the mac
val badChunks = mutableSetOf<String>()
checkBlobSample(referencedChunkIds, percent).forEach { chunkId ->
try {
val readId = checkChunk(chunkId, chunkIdMac)
if (readId != chunkId) {
Log.w(TAG, "Wrong chunkId $readId for $chunkId")
badChunks.add(chunkId)
} else {
Log.i(TAG, "Checked chunkId $chunkId")
}
} catch (e: Exception) {
Log.e(TAG, "Error checking chunk $chunkId: ", e)
badChunks.add(chunkId)
}
}
if (missingChunkIds.isEmpty() && badChunks.isEmpty() &&
snapshots.size == storedSnapshots.size && storedSnapshots.size > 0
) {
CheckerResult.Success(snapshots, percent, 0L) // TODO size
}
return CheckerResult.Error(storedSnapshots.size, snapshots, missingChunkIds, badChunks)
}

private fun checkBlobSample(referencedChunkIds: Set<String>, percent: Int): List<String> {
val size = getBackupSize()
val targetSize = (size * (percent.toDouble() / 100)).roundToLong()
val blobSample = mutableListOf<String>()
val iterator = referencedChunkIds.shuffled().iterator()
var currentSize = 0L
while (currentSize < targetSize && iterator.hasNext()) {
val chunkId = iterator.next()
blobSample.add(chunkId)
// we ensure cache consistency above, so chunks not in cache don't exist anymore
currentSize += db.getChunksCache().get(chunkId)?.size ?: 0L
}
return blobSample
}

private suspend fun checkChunk(chunkId: String, chunkIdMac: Mac): String {
val handle = FileBackupFileType.Blob(androidId, chunkId)
val cachedChunk = db.getChunksCache().get(chunkId)
// if chunk is not in DB, it isn't available on backend, so missing version doesn't matter
val version = cachedChunk?.version ?: VERSION
return backend.load(handle).use { inputStream ->
inputStream.readVersion(version.toInt())
val ad = streamCrypto.getAssociatedDataForChunk(chunkId, version)
streamCrypto.newDecryptingStream(streamKey, inputStream, ad).use { decryptedStream ->
chunkIdMac.doFinal(decryptedStream.readAllBytes()).toHexString()
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ internal interface ChunksCache {
@Query("SELECT COUNT(id) FROM CachedChunk WHERE id IN (:ids)")
fun getNumberOfCachedChunks(ids: Collection<String>): Int

@Query("SELECT SUM(size) FROM CachedChunk WHERE ref_count > 0")
fun getSizeOfCachedChunks(): Long

@Query("SELECT * FROM CachedChunk WHERE ref_count <= 0")
fun getUnreferencedChunks(): List<CachedChunk>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
package org.calyxos.backup.storage.prune

import android.util.Log
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.api.BackupObserver
import org.calyxos.backup.storage.api.StoredSnapshot
import org.calyxos.backup.storage.crypto.StreamCrypto
import org.calyxos.backup.storage.db.Db
import org.calyxos.backup.storage.measure
import org.calyxos.backup.storage.SnapshotRetriever
import org.calyxos.backup.storage.getCurrentBackupSnapshots
import org.calyxos.backup.storage.measure
import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.backends.FileBackupFileType
import org.calyxos.seedvault.core.crypto.KeyManager
Expand All @@ -24,14 +24,14 @@ private val TAG = Pruner::class.java.simpleName
internal class Pruner(
private val db: Db,
private val retentionManager: RetentionManager,
private val storagePluginGetter: () -> Backend,
private val backendGetter: () -> Backend,
private val androidId: String,
keyManager: KeyManager,
private val snapshotRetriever: SnapshotRetriever,
streamCrypto: StreamCrypto = StreamCrypto,
) {

private val backend get() = storagePluginGetter()
private val backend get() = backendGetter()
private val chunksCache = db.getChunksCache()
private val streamKey = try {
streamCrypto.deriveStreamKey(keyManager.getMainKey())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ internal class PrunerTest {
private val pruner = Pruner(
db = db,
retentionManager = retentionManager,
storagePluginGetter = backendGetter,
backendGetter = backendGetter,
androidId = androidId,
keyManager = keyManager,
snapshotRetriever = snapshotRetriever,
Expand Down

0 comments on commit 98f1e8d

Please sign in to comment.