diff --git a/model_signing/signing/in_toto.py b/model_signing/signing/in_toto.py index c108b446..2249a013 100644 --- a/model_signing/signing/in_toto.py +++ b/model_signing/signing/in_toto.py @@ -481,3 +481,108 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: manifest, predicate_type=cls.predicate_type ) return cls(statement) + + +class ShardDigestsIntotoPayload(IntotoPayload): + """In-toto payload where the subjects are the model shards themselves. + + This payload is supposed to be used for manifests where every file shard in + the model is matched with a digest. Because existing tooling only supports + established hashing algorithms, we annotate every subject with the actual + hash algorithm used to compute the file digest, and use "sha256" as the + algorithm name in the digest itself. + + Example: + ```json + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "d0/d1/d2/d3/d4/f0:0:16", + "digest": { + "sha256": "6efa14..." + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f1:0:16", + "digest": { + "sha256": "a9bc14..." + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f2:0:16", + "digest": { + "sha256": "5f597e..." + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f3:0:16", + "digest": { + "sha256": "eaf677..." + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + } + ], + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } + } + ``` + + If the annotation for a subject is missing, or it does not contain + actual_hash_algorithm, it should be assumed that the digest is computed via + the algorithm listed in the digest dictionary (i.e., sha256). + + See also https://github.com/sigstore/sigstore-python/issues/1018. + """ + + predicate_type: Final[str] = ( + "https://model_signing/ShardDigests/v0.1" + ) + + def __init__(self, statement: statement.Statement): + """Builds an instance of this in-toto payload. + + Don't call this directly in production. Use `from_manifest()` instead. + + Args: + statement: The DSSE statement representing this in-toto payload. + """ + self.statement = statement + + @classmethod + @override + def from_manifest(cls, manifest: manifest_module.Manifest) -> Self: + """Converts a manifest to the signing payload used for signing. + + The manifest must be one where every model shard is paired with its own + digest. Currently, this is only `ShardLevelManifest`. + + Args: + manifest: the manifest to convert to signing payload. + + Returns: + An instance of `DigestOfDigestsIntotoPayload`. + + Raises: + TypeError: If the manifest is not `ShardLevelManifest`. + """ + if not isinstance(manifest, manifest_module.ShardLevelManifest): + raise TypeError("Only ShardLevelManifest is supported") + + statement = _convert_descriptors_to_direct_statement( + manifest, predicate_type=cls.predicate_type + ) + return cls(statement) diff --git a/model_signing/signing/in_toto_test.py b/model_signing/signing/in_toto_test.py index e97b1ae5..90626631 100644 --- a/model_signing/signing/in_toto_test.py +++ b/model_signing/signing/in_toto_test.py @@ -264,3 +264,65 @@ def test_only_runs_on_expected_manifest_types(self): match="Only FileLevelManifest is supported", ): in_toto.DigestsIntotoPayload.from_manifest(manifest) + + +class TestShardDigestsIntotoPayload: + + def _hasher_factory( + self, path: pathlib.Path, start: int, end: int + ) -> file.ShardedFileHasher: + return file.ShardedFileHasher( + path, memory.SHA256(), start=start, end=end + ) + + @pytest.mark.parametrize("model_fixture_name", test_support.all_test_models) + def test_known_models(self, request, model_fixture_name): + # Set up variables (arrange) + testdata_path = request.path.parent / "testdata" + test_path = testdata_path / "in_toto" + test_class_path = test_path / "TestShardDigestsIntotoPayload" + golden_path = test_class_path / model_fixture_name + should_update = request.config.getoption("update_goldens") + model = request.getfixturevalue(model_fixture_name) + + # Compute payload (act) + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(model) + payload = in_toto.ShardDigestsIntotoPayload.from_manifest(manifest) + + # Compare with golden, or write to golden (approximately "assert") + if should_update: + with open(golden_path, "w", encoding="utf-8") as f: + f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n") + else: + with open(golden_path, "r", encoding="utf-8") as f: + json_contents = f.read() + proto = json_format.Parse( + json_contents, statement_pb2.Statement() + ) + + assert payload.statement.pb == proto + + def test_produces_valid_statements(self, sample_model_folder): + serializer = serialize_by_file_shard.ManifestSerializer( + self._hasher_factory, allow_symlinks=True + ) + manifest = serializer.serialize(sample_model_folder) + + payload = in_toto.ShardDigestsIntotoPayload.from_manifest( + manifest + ) + + payload.statement.validate() + + def test_only_runs_on_expected_manifest_types(self): + digest = hashing.Digest("test", b"test_digest") + manifest = manifest_module.DigestManifest(digest) + + with pytest.raises( + TypeError, + match="Only ShardLevelManifest is supported", + ): + in_toto.ShardDigestsIntotoPayload.from_manifest(manifest) diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/deep_model_folder b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/deep_model_folder new file mode 100644 index 00000000..6c228aeb --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/deep_model_folder @@ -0,0 +1,45 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "d0/d1/d2/d3/d4/f0:0:16", + "digest": { + "sha256": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f1:0:16", + "digest": { + "sha256": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f2:0:16", + "digest": { + "sha256": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/d1/d2/d3/d4/f3:0:16", + "digest": { + "sha256": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + } + ], + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_file b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_file new file mode 100644 index 00000000..4fa1c7fe --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_file @@ -0,0 +1,7 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_folder b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_folder new file mode 100644 index 00000000..4fa1c7fe --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/empty_model_folder @@ -0,0 +1,7 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/model_folder_with_empty_file b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/model_folder_with_empty_file new file mode 100644 index 00000000..4fa1c7fe --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/model_folder_with_empty_file @@ -0,0 +1,7 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_file b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_file new file mode 100644 index 00000000..bd839714 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_file @@ -0,0 +1,18 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": ".:0:22", + "digest": { + "sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + } + ], + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_folder b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_folder new file mode 100644 index 00000000..bc455dc8 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/sample_model_folder @@ -0,0 +1,99 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "d0/f00:0:23", + "digest": { + "sha256": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/f01:0:23", + "digest": { + "sha256": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d0/f02:0:23", + "digest": { + "sha256": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d1/f10:0:23", + "digest": { + "sha256": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d1/f11:0:23", + "digest": { + "sha256": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "d1/f12:0:23", + "digest": { + "sha256": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "f0:0:24", + "digest": { + "sha256": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "f1:0:24", + "digest": { + "sha256": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "f2:0:24", + "digest": { + "sha256": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + }, + { + "name": "f3:0:24", + "digest": { + "sha256": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + } + ], + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +} diff --git a/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/symlink_model_folder b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/symlink_model_folder new file mode 100644 index 00000000..13776db7 --- /dev/null +++ b/model_signing/signing/testdata/in_toto/TestShardDigestsIntotoPayload/symlink_model_folder @@ -0,0 +1,18 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "symlink_file:0:22", + "digest": { + "sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b" + }, + "annotations": { + "actual_hash_algorithm": "file-sha256-1000000" + } + } + ], + "predicateType": "https://model_signing/ShardDigests/v0.1", + "predicate": { + "unused": "Unused, just passed due to API requirements" + } +}