Skip to content

Commit

Permalink
Add in-toto format with hashes of shards as subjects (#267)
Browse files Browse the repository at this point in the history
This converts model serialization manifests that record every model file
shard hash into an in-toto payload that can then be passed to Sigstore's
`sign_intoto` for signing to generate a Sigstore `Bundle` (if using
Sigstore).

This time, we record every hash as part of the subject instead of in the
payload. We require verifiers to be aware of this and acknowledge that
verifiers that only check subject by subject (that is, they check if the
hash of a passed in argument is in the list of subjects and don't check
if all the hashes are present), can fail to fully detect if the model
integrity is compromised by renaming one file in the model,
interchanging two file names, deleting a file, or reordering two shards.
The signing library will have additional checks for this, but verifying
the signature with other tools might result in invalid results.

Signed-off-by: Mihai Maruseac <[email protected]>
  • Loading branch information
mihaimaruseac authored Aug 2, 2024
1 parent c662544 commit b4a4f6a
Show file tree
Hide file tree
Showing 9 changed files with 368 additions and 0 deletions.
105 changes: 105 additions & 0 deletions model_signing/signing/in_toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,108 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
manifest, predicate_type=cls.predicate_type
)
return cls(statement)


class ShardDigestsIntotoPayload(IntotoPayload):
"""In-toto payload where the subjects are the model shards themselves.
This payload is supposed to be used for manifests where every file shard in
the model is matched with a digest. Because existing tooling only supports
established hashing algorithms, we annotate every subject with the actual
hash algorithm used to compute the file digest, and use "sha256" as the
algorithm name in the digest itself.
Example:
```json
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "d0/d1/d2/d3/d4/f0:0:16",
"digest": {
"sha256": "6efa14..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f1:0:16",
"digest": {
"sha256": "a9bc14..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f2:0:16",
"digest": {
"sha256": "5f597e..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f3:0:16",
"digest": {
"sha256": "eaf677..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
}
],
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
```
If the annotation for a subject is missing, or it does not contain
actual_hash_algorithm, it should be assumed that the digest is computed via
the algorithm listed in the digest dictionary (i.e., sha256).
See also https://github.com/sigstore/sigstore-python/issues/1018.
"""

predicate_type: Final[str] = (
"https://model_signing/ShardDigests/v0.1"
)

def __init__(self, statement: statement.Statement):
"""Builds an instance of this in-toto payload.
Don't call this directly in production. Use `from_manifest()` instead.
Args:
statement: The DSSE statement representing this in-toto payload.
"""
self.statement = statement

@classmethod
@override
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
"""Converts a manifest to the signing payload used for signing.
The manifest must be one where every model shard is paired with its own
digest. Currently, this is only `ShardLevelManifest`.
Args:
manifest: the manifest to convert to signing payload.
Returns:
An instance of `DigestOfDigestsIntotoPayload`.
Raises:
TypeError: If the manifest is not `ShardLevelManifest`.
"""
if not isinstance(manifest, manifest_module.ShardLevelManifest):
raise TypeError("Only ShardLevelManifest is supported")

statement = _convert_descriptors_to_direct_statement(
manifest, predicate_type=cls.predicate_type
)
return cls(statement)
62 changes: 62 additions & 0 deletions model_signing/signing/in_toto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,65 @@ def test_only_runs_on_expected_manifest_types(self):
match="Only FileLevelManifest is supported",
):
in_toto.DigestsIntotoPayload.from_manifest(manifest)


class TestShardDigestsIntotoPayload:

def _hasher_factory(
self, path: pathlib.Path, start: int, end: int
) -> file.ShardedFileHasher:
return file.ShardedFileHasher(
path, memory.SHA256(), start=start, end=end
)

@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
def test_known_models(self, request, model_fixture_name):
# Set up variables (arrange)
testdata_path = request.path.parent / "testdata"
test_path = testdata_path / "in_toto"
test_class_path = test_path / "TestShardDigestsIntotoPayload"
golden_path = test_class_path / model_fixture_name
should_update = request.config.getoption("update_goldens")
model = request.getfixturevalue(model_fixture_name)

# Compute payload (act)
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(model)
payload = in_toto.ShardDigestsIntotoPayload.from_manifest(manifest)

# Compare with golden, or write to golden (approximately "assert")
if should_update:
with open(golden_path, "w", encoding="utf-8") as f:
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
else:
with open(golden_path, "r", encoding="utf-8") as f:
json_contents = f.read()
proto = json_format.Parse(
json_contents, statement_pb2.Statement()
)

assert payload.statement.pb == proto

def test_produces_valid_statements(self, sample_model_folder):
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(sample_model_folder)

payload = in_toto.ShardDigestsIntotoPayload.from_manifest(
manifest
)

payload.statement.validate()

def test_only_runs_on_expected_manifest_types(self):
digest = hashing.Digest("test", b"test_digest")
manifest = manifest_module.DigestManifest(digest)

with pytest.raises(
TypeError,
match="Only ShardLevelManifest is supported",
):
in_toto.ShardDigestsIntotoPayload.from_manifest(manifest)
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "d0/d1/d2/d3/d4/f0:0:16",
"digest": {
"sha256": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f1:0:16",
"digest": {
"sha256": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f2:0:16",
"digest": {
"sha256": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/d1/d2/d3/d4/f3:0:16",
"digest": {
"sha256": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
}
],
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"_type": "https://in-toto.io/Statement/v1",
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"_type": "https://in-toto.io/Statement/v1",
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"_type": "https://in-toto.io/Statement/v1",
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": ".:0:22",
"digest": {
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
}
],
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "d0/f00:0:23",
"digest": {
"sha256": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/f01:0:23",
"digest": {
"sha256": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d0/f02:0:23",
"digest": {
"sha256": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d1/f10:0:23",
"digest": {
"sha256": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d1/f11:0:23",
"digest": {
"sha256": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "d1/f12:0:23",
"digest": {
"sha256": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "f0:0:24",
"digest": {
"sha256": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "f1:0:24",
"digest": {
"sha256": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "f2:0:24",
"digest": {
"sha256": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
},
{
"name": "f3:0:24",
"digest": {
"sha256": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
}
],
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "symlink_file:0:22",
"digest": {
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
}
}
],
"predicateType": "https://model_signing/ShardDigests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}

0 comments on commit b4a4f6a

Please sign in to comment.