Skip to content

Commit

Permalink
Store ciphertext size in the DB instead of the plaintext size
Browse files Browse the repository at this point in the history
This is useful as a first cheap check to see if backup data on the backend still has the expected size.
The plaintext size wasn't really used for anything anyway. Existing installation that still store the plaintext size will log different file size as suspicious and prefer those chunks in checks, but there shouldn't be other negative consequences from this.
  • Loading branch information
grote committed Nov 27, 2024
1 parent e5ba72e commit da26663
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ internal class ChunkWriter(
var writtenBytes = 0L
chunks.forEach { chunk ->
val cachedChunk = chunksCache.get(chunk.id)
// TODO missing chunks used by several files will get uploaded several times
val isMissing = chunk.id in missingChunkIds
val notCached = cachedChunk == null
if (isMissing) Log.w(TAG, "Chunk ${chunk.id} is missing (cached: ${!notCached})")
Expand All @@ -58,14 +59,16 @@ internal class ChunkWriter(
}
if (notCached) chunksCache.insert(chunk.toCachedChunk())
writtenChunks++
writtenBytes += chunk.size
writtenBytes += chunk.plaintextSize
} else { // chunk already uploaded
val skipped = inputStream.skip(chunk.size)
check(chunk.size == skipped) { "skipping error" }
val skipped = inputStream.skip(chunk.plaintextSize)
check(chunk.plaintextSize == skipped) { "skipping error" }
}
}
val endByte = inputStream.read()
check(endByte == -1) { "Stream did continue with $endByte" }
// FIXME the writtenBytes are based on plaintext size, not ciphertext size
// However, they don't seem to be really used for anything at the moment.
return ChunkWriterResult(writtenChunks, writtenBytes)
}

Expand All @@ -89,14 +92,14 @@ internal class ChunkWriter(
) {
var totalBytesRead = 0L
do {
val sizeLeft = (chunk.size - totalBytesRead).toInt()
val sizeLeft = (chunk.plaintextSize - totalBytesRead).toInt()
val bytesRead = inputStream.read(buffer, 0, min(bufferSize, sizeLeft))
if (bytesRead == -1) throw IOException("unexpected end of stream for ${chunk.id}")
outputStream.write(buffer, 0, bytesRead)
totalBytesRead += bytesRead
} while (bytesRead >= 0 && totalBytesRead < chunk.size)
check(totalBytesRead == chunk.size) {
"copyChunkFromInputStream: $totalBytesRead != ${chunk.size}"
} while (bytesRead >= 0 && totalBytesRead < chunk.plaintextSize)
check(totalBytesRead == chunk.plaintextSize) {
"copyChunkFromInputStream: $totalBytesRead != ${chunk.plaintextSize}"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.calyxos.backup.storage.backup

import org.calyxos.backup.storage.db.CachedChunk
import org.calyxos.seedvault.core.crypto.CoreCrypto
import org.calyxos.seedvault.core.toHexString
import java.io.IOException
import java.io.InputStream
Expand All @@ -15,9 +16,14 @@ import kotlin.math.min
internal data class Chunk(
val id: String,
val offset: Long,
val size: Long,
val plaintextSize: Long,
) {
fun toCachedChunk() = CachedChunk(id, 0, size)
fun toCachedChunk() = CachedChunk(
id = id,
refCount = 0,
// FIXME sometimes, the ciphertext size is not as expected
size = 1 + CoreCrypto.expectedCiphertextSize(plaintextSize),
)
}

internal class Chunker(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import org.calyxos.backup.storage.restore.readVersion
import org.calyxos.seedvault.core.backends.FileBackupFileType
import org.calyxos.seedvault.core.backends.IBackendManager
import org.calyxos.seedvault.core.backends.TopLevelFolder
import org.calyxos.seedvault.core.crypto.CoreCrypto
import org.calyxos.seedvault.core.crypto.KeyManager
import org.calyxos.seedvault.core.toHexString
import java.io.IOException
Expand Down Expand Up @@ -219,11 +218,8 @@ internal class Checker(
Log.i(TAG, "Chunk ${chunk.id} known to be corrupted.")
return@forEach
}
val plaintextSize = chunk?.size ?: 0L
// expectedSize is one version byte plus ciphertext size
val expectedSize = 1 + CoreCrypto.expectedCiphertextSize(plaintextSize)
if (size != expectedSize) {
Log.i(TAG, "Expected size $expectedSize, but chunk had $size: $chunkId")
if (size != chunk?.size) {
Log.i(TAG, "Expected size ${chunk?.size}, but chunk had $size: $chunkId")
suspiciousChunkIds.add(chunkId)
}
}
Expand Down Expand Up @@ -257,7 +253,7 @@ internal class Checker(
val size = getBackupSize()
val targetSize = (size * (percent.toDouble() / 100)).roundToLong()
val blobSample = mutableListOf<String>()
val priorityChunksIds = referencedChunkIds.intersect(suspiciousChunkIds)
val priorityChunksIds = referencedChunkIds.intersect(suspiciousChunkIds).shuffled()
val iterator = (priorityChunksIds + referencedChunkIds.shuffled()).iterator()
var currentSize = 0L
while (currentSize < targetSize && iterator.hasNext()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ internal data class CachedChunk(
* Note that this is *not* about how many files across various snapshots are referencing it.
*/
@ColumnInfo(name = "ref_count") val refCount: Long,
/**
* The ciphertext size of the chunk on disk, including the version byte.
* Not the plaintext size.
*/
val size: Long,
val version: Byte = Backup.VERSION,
@ColumnInfo(defaultValue = "0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,15 @@ internal class ChunkWriterTest {
}

// check that output matches chunk data
assertEquals(1 + chunks[0].size.toInt(), chunk1Output.size())
assertEquals(1 + chunks[0].plaintextSize.toInt(), chunk1Output.size())
assertArrayEquals(
chunk1Bytes,
chunk1Output.toByteArray().copyOfRange(1, 1 + chunks[0].size.toInt())
chunk1Output.toByteArray().copyOfRange(1, 1 + chunks[0].plaintextSize.toInt())
)
assertEquals(1 + chunks[2].size.toInt(), chunk3Output.size())
assertEquals(1 + chunks[2].plaintextSize.toInt(), chunk3Output.size())
assertArrayEquals(
chunk3Bytes,
chunk3Output.toByteArray().copyOfRange(1, 1 + chunks[2].size.toInt())
chunk3Output.toByteArray().copyOfRange(1, 1 + chunks[2].plaintextSize.toInt())
)
}

Expand Down

0 comments on commit da26663

Please sign in to comment.