From ea71faf1fa1e4095f7d2b77aa88d3562aeaf103b Mon Sep 17 00:00:00 2001 From: ChickenLover Date: Mon, 15 Jul 2024 15:31:12 +0700 Subject: [PATCH] add keccak tree builder (#555) --- docs/docs/icicle/golang-bindings/keccak.md | 18 +- docs/docs/icicle/rust-bindings/keccak.md | 18 +- examples/c++/multi-gpu-poseidon/example.cu | 2 +- examples/c++/poseidon/example.cu | 8 +- examples/rust/poseidon/src/main.rs | 4 +- icicle/include/api/babybear.h | 10 +- icicle/include/api/bls12_377.h | 10 +- icicle/include/api/bls12_381.h | 10 +- icicle/include/api/bn254.h | 12 +- icicle/include/api/bw6_761.h | 10 +- icicle/include/api/grumpkin.h | 10 +- icicle/include/api/hash.h | 18 +- icicle/include/api/m31.h | 8 +- icicle/include/api/stark252.h | 8 +- .../include/api/templates/fields/poseidon.h | 2 +- .../include/api/templates/fields/poseidon2.h | 2 +- icicle/include/api/templates/fields/tree.h | 8 +- icicle/include/hash/hash.cuh | 32 +-- icicle/include/hash/keccak/keccak.cuh | 36 +-- icicle/include/merkle-tree/merkle.cuh | 10 +- icicle/include/poseidon/poseidon.cuh | 6 +- icicle/include/poseidon2/poseidon2.cuh | 6 +- icicle/src/hash/keccak/.gitignore | 3 +- icicle/src/hash/keccak/Makefile | 6 +- icicle/src/hash/keccak/extern.cu | 35 ++- icicle/src/hash/keccak/keccak.cu | 262 ++++++++++++++++-- icicle/src/hash/keccak/kernels.cu | 233 ---------------- icicle/src/hash/keccak/test.cu | 2 +- icicle/src/hash/keccak/test_tree.cu | 91 ++++++ icicle/src/merkle-tree/extern.cu | 4 +- icicle/src/merkle-tree/extern_mmcs.cu | 4 +- icicle/src/merkle-tree/merkle.cu | 99 ++++--- icicle/src/merkle-tree/mmcs.cu | 12 +- icicle/src/merkle-tree/tests/merkle/Makefile | 3 +- icicle/src/merkle-tree/tests/merkle/test.cu | 1 + .../tests/merkle/test_poseidon2.cu | 15 +- icicle/src/poseidon/extern.cu | 2 +- icicle/src/poseidon/test.cu | 2 +- icicle/src/poseidon2/extern.cu | 2 +- icicle/src/poseidon2/test.cu | 2 +- icicle/src/poseidon2/test_m31.cu | 2 +- wrappers/golang/core/sponge.go | 6 +- .../bls12377/poseidon/include/poseidon.h | 4 +- .../curves/bls12377/poseidon/poseidon.go | 8 +- .../curves/bls12377/tests/poseidon_test.go | 2 +- .../bls12381/poseidon/include/poseidon.h | 4 +- .../curves/bls12381/poseidon/poseidon.go | 8 +- .../curves/bls12381/tests/poseidon_test.go | 2 +- .../curves/bn254/poseidon/include/poseidon.h | 4 +- .../golang/curves/bn254/poseidon/poseidon.go | 8 +- .../curves/bn254/tests/poseidon_test.go | 2 +- .../curves/bw6761/poseidon/include/poseidon.h | 4 +- .../golang/curves/bw6761/poseidon/poseidon.go | 8 +- .../curves/bw6761/tests/poseidon_test.go | 2 +- .../grumpkin/poseidon/include/poseidon.h | 4 +- .../curves/grumpkin/poseidon/poseidon.go | 8 +- .../curves/grumpkin/tests/poseidon_test.go | 2 +- wrappers/golang/hash/keccak/hasher.go | 16 +- wrappers/golang/hash/keccak/include/keccak.h | 6 +- .../golang/hash/keccak/tests/hasher_test.go | 8 +- .../poseidon/templates/poseidon.go.tmpl | 8 +- .../poseidon/templates/poseidon.h.tmpl | 4 +- .../poseidon/templates/poseidon_test.go.tmpl | 2 +- wrappers/rust/icicle-core/src/hash.rs | 32 +-- wrappers/rust/icicle-core/src/poseidon/mod.rs | 19 +- .../rust/icicle-core/src/poseidon2/mod.rs | 32 +-- .../icicle-bls12-377/src/poseidon/mod.rs | 2 +- .../icicle-bls12-381/src/poseidon/mod.rs | 2 +- .../icicle-bn254/src/poseidon/mod.rs | 2 +- .../icicle-bn254/src/poseidon2/mod.rs | 2 +- .../icicle-grumpkin/src/poseidon/mod.rs | 2 +- .../icicle-babybear/src/poseidon2/mod.rs | 2 +- wrappers/rust/icicle-hash/src/keccak/mod.rs | 133 +++++---- wrappers/rust/icicle-hash/src/keccak/tests.rs | 47 ++++ 74 files changed, 779 insertions(+), 644 deletions(-) delete mode 100644 icicle/src/hash/keccak/kernels.cu create mode 100644 icicle/src/hash/keccak/test_tree.cu diff --git a/docs/docs/icicle/golang-bindings/keccak.md b/docs/docs/icicle/golang-bindings/keccak.md index fd81f7adc..1ecc80de1 100644 --- a/docs/docs/icicle/golang-bindings/keccak.md +++ b/docs/docs/icicle/golang-bindings/keccak.md @@ -25,7 +25,7 @@ func main() { input := createHostSliceFromHexString("1725b6") outHost256 := make(core.HostSlice[uint8], 32) - cfg := keccak.GetDefaultKeccakConfig() + cfg := keccak.GetDefaultHashConfig() e := keccak.Keccak256(input, int32(input.Len()), 1, outHost256, &cfg) if e.CudaErrorCode != cr.CudaSuccess { panic("Keccak256 hashing failed") @@ -49,8 +49,8 @@ func main() { ## Keccak Methods ```go -func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError -func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError +func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError +func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError ``` ### Parameters @@ -59,18 +59,18 @@ func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int3 - **`inputBlockSize`**: An integer specifying the size of the input data for a single hash. - **`numberOfBlocks`**: An integer specifying the number of results in the hash batch. - **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory. -- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation. +- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation. ### Return Value - **`CudaError`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation. -## KeccakConfig +## HashConfig -The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware. +The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware. ```go -type KeccakConfig struct { +type HashConfig struct { Ctx cr.DeviceContext areInputsOnDevice bool areOutputsOnDevice bool @@ -87,8 +87,8 @@ type KeccakConfig struct { ### Default Configuration -Use `GetDefaultKeccakConfig` to obtain a default configuration, which can then be customized as needed. +Use `GetDefaultHashConfig` to obtain a default configuration, which can then be customized as needed. ```go -func GetDefaultKeccakConfig() KeccakConfig +func GetDefaultHashConfig() HashConfig ``` \ No newline at end of file diff --git a/docs/docs/icicle/rust-bindings/keccak.md b/docs/docs/icicle/rust-bindings/keccak.md index a2d648183..9c8b231c7 100644 --- a/docs/docs/icicle/rust-bindings/keccak.md +++ b/docs/docs/icicle/rust-bindings/keccak.md @@ -4,7 +4,7 @@ ```rust use icicle_cuda_runtime::memory::{DeviceVec, HostSlice}; -use icicle_hash::keccak::{keccak256, KeccakConfig}; +use icicle_hash::keccak::{keccak256, HashConfig}; use rand::{self, Rng}; fn main() { @@ -14,7 +14,7 @@ fn main() { let input = HostSlice::::from_slice(initial_data.as_slice()); let mut output = DeviceVec::::cuda_malloc(32).unwrap(); - let mut config = KeccakConfig::default(); + let mut config = HashConfig::default(); keccak256(input, initial_data.len() as i32, 1, &mut output[..], &mut config).expect("Failed to execute keccak256 hashing"); let mut output_host = vec![0_u8; 32]; @@ -32,7 +32,7 @@ pub fn keccak256( input_block_size: i32, number_of_blocks: i32, output: &mut (impl HostOrDeviceSlice + ?Sized), - config: &mut KeccakConfig, + config: &mut HashConfig, ) -> IcicleResult<()> pub fn keccak512( @@ -40,7 +40,7 @@ pub fn keccak512( input_block_size: i32, number_of_blocks: i32, output: &mut (impl HostOrDeviceSlice + ?Sized), - config: &mut KeccakConfig, + config: &mut HashConfig, ) -> IcicleResult<()> ``` @@ -50,18 +50,18 @@ pub fn keccak512( - **`input_block_size`**: An integer specifying the size of the input data for a single hash. - **`number_of_blocks`**: An integer specifying the number of results in the hash batch. - **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory. -- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation. +- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation. ### Return Value - **`IcicleResult`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation. -## KeccakConfig +## HashConfig -The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware. +The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware. ```rust -pub struct KeccakConfig<'a> { +pub struct HashConfig<'a> { pub ctx: DeviceContext<'a>, pub are_inputs_on_device: bool, pub are_outputs_on_device: bool, @@ -81,7 +81,7 @@ pub struct KeccakConfig<'a> { Example initialization with default settings: ```rust -let default_config = KeccakConfig::default(); +let default_config = HashConfig::default(); ``` Customizing the configuration: diff --git a/examples/c++/multi-gpu-poseidon/example.cu b/examples/c++/multi-gpu-poseidon/example.cu index 054c7868c..fcc3d5275 100644 --- a/examples/c++/multi-gpu-poseidon/example.cu +++ b/examples/c++/multi-gpu-poseidon/example.cu @@ -35,7 +35,7 @@ void threadPoseidon( std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl; return; } - SpongeConfig column_config = default_sponge_config(ctx); + HashConfig column_config = default_hash_config(ctx); cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config); checkCudaError(err); } diff --git a/examples/c++/poseidon/example.cu b/examples/c++/poseidon/example.cu index c75e30217..edc408a03 100644 --- a/examples/c++/poseidon/example.cu +++ b/examples/c++/poseidon/example.cu @@ -16,7 +16,7 @@ inline uint32_t tree_index(uint32_t level, uint32_t offset) { return (1 << level // We assume the tree has leaves already set, compute all other levels void build_tree( - const uint32_t tree_height, scalar_t* tree, Poseidon &poseidon, SpongeConfig &config) + const uint32_t tree_height, scalar_t* tree, Poseidon &poseidon, HashConfig &config) { for (uint32_t level = tree_height - 1; level > 0; level--) { const uint32_t next_level = level - 1; @@ -67,7 +67,7 @@ uint32_t validate_proof( const uint32_t* proof_lr, const scalar_t* proof_hash, Poseidon &poseidon, - SpongeConfig &config) + HashConfig &config) { scalar_t hashes_in[2], hash_out[1], level_hash; level_hash = hash; @@ -112,12 +112,12 @@ int main(int argc, char* argv[]) std::cout << "Hashing blocks into tree leaves..." << std::endl; Poseidon poseidon(data_arity, ctx); - SpongeConfig config = default_sponge_config(ctx); + HashConfig config = default_hash_config(ctx); poseidon.hash_many(data, &tree[tree_index(leaf_level, 0)], tree_width, data_arity, 1, config); std::cout << "3. Building Merkle tree" << std::endl; Poseidon tree_poseidon(tree_arity, ctx); - SpongeConfig tree_config = default_sponge_config(ctx); + HashConfig tree_config = default_hash_config(ctx); build_tree(tree_height, tree, tree_poseidon, tree_config); std::cout << "4. Generate membership proof" << std::endl; diff --git a/examples/rust/poseidon/src/main.rs b/examples/rust/poseidon/src/main.rs index f767bc95f..17dd36f3d 100644 --- a/examples/rust/poseidon/src/main.rs +++ b/examples/rust/poseidon/src/main.rs @@ -2,7 +2,7 @@ use icicle_bls12_381::curve::ScalarField as F; use icicle_cuda_runtime::device_context::DeviceContext; -use icicle_core::hash::{SpongeHash, SpongeConfig}; +use icicle_core::hash::{SpongeHash, HashConfig}; use icicle_core::poseidon::Poseidon; use icicle_core::traits::FieldImpl; use icicle_cuda_runtime::memory::HostSlice; @@ -32,7 +32,7 @@ fn main() { ); let ctx = DeviceContext::default(); let poseidon = Poseidon::load(arity, &ctx).unwrap(); - let config = SpongeConfig::default(); + let config = HashConfig::default(); println!( "---------------------- Input size 2^{}={} ------------------------", diff --git a/icicle/include/api/babybear.h b/icicle/include/api/babybear.h index deaa464ad..64815ceb5 100644 --- a/icicle/include/api/babybear.h +++ b/icicle/include/api/babybear.h @@ -49,7 +49,7 @@ extern "C" cudaError_t babybear_poseidon2_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t babybear_poseidon2_delete_cuda(poseidon2::Poseidon2* poseidon, device_context::DeviceContext& ctx); @@ -59,16 +59,16 @@ extern "C" cudaError_t babybear_build_merkle_tree( babybear::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t babybear_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, babybear::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t babybear_mul_cuda( diff --git a/icicle/include/api/bls12_377.h b/icicle/include/api/bls12_377.h index adde436f9..75a113d28 100644 --- a/icicle/include/api/bls12_377.h +++ b/icicle/include/api/bls12_377.h @@ -71,16 +71,16 @@ extern "C" cudaError_t bls12_377_build_merkle_tree( bls12_377::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bls12_377_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, bls12_377::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bls12_377_poseidon_create_cuda( @@ -108,7 +108,7 @@ extern "C" cudaError_t bls12_377_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t bls12_377_poseidon_delete_cuda(poseidon::Poseidon* poseidon); diff --git a/icicle/include/api/bls12_381.h b/icicle/include/api/bls12_381.h index 35b615c3a..9f1a49cd0 100644 --- a/icicle/include/api/bls12_381.h +++ b/icicle/include/api/bls12_381.h @@ -71,16 +71,16 @@ extern "C" cudaError_t bls12_381_build_merkle_tree( bls12_381::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bls12_381_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, bls12_381::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bls12_381_poseidon_create_cuda( @@ -108,7 +108,7 @@ extern "C" cudaError_t bls12_381_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t bls12_381_poseidon_delete_cuda(poseidon::Poseidon* poseidon); diff --git a/icicle/include/api/bn254.h b/icicle/include/api/bn254.h index f3a9cb7ca..1a9a46b5b 100644 --- a/icicle/include/api/bn254.h +++ b/icicle/include/api/bn254.h @@ -97,7 +97,7 @@ extern "C" cudaError_t bn254_poseidon2_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t bn254_poseidon2_delete_cuda(poseidon2::Poseidon2* poseidon, device_context::DeviceContext& ctx); @@ -107,16 +107,16 @@ extern "C" cudaError_t bn254_build_merkle_tree( bn254::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bn254_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, bn254::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bn254_poseidon_create_cuda( @@ -144,7 +144,7 @@ extern "C" cudaError_t bn254_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t bn254_poseidon_delete_cuda(poseidon::Poseidon* poseidon); diff --git a/icicle/include/api/bw6_761.h b/icicle/include/api/bw6_761.h index 8d290189d..531e4514f 100644 --- a/icicle/include/api/bw6_761.h +++ b/icicle/include/api/bw6_761.h @@ -71,16 +71,16 @@ extern "C" cudaError_t bw6_761_build_merkle_tree( bw6_761::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bw6_761_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, bw6_761::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t bw6_761_poseidon_create_cuda( @@ -108,7 +108,7 @@ extern "C" cudaError_t bw6_761_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t bw6_761_poseidon_delete_cuda(poseidon::Poseidon* poseidon); diff --git a/icicle/include/api/grumpkin.h b/icicle/include/api/grumpkin.h index d40a08827..40241e591 100644 --- a/icicle/include/api/grumpkin.h +++ b/icicle/include/api/grumpkin.h @@ -44,16 +44,16 @@ extern "C" cudaError_t grumpkin_build_merkle_tree( grumpkin::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t grumpkin_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, grumpkin::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t grumpkin_poseidon_create_cuda( @@ -81,7 +81,7 @@ extern "C" cudaError_t grumpkin_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t grumpkin_poseidon_delete_cuda(poseidon::Poseidon* poseidon); diff --git a/icicle/include/api/hash.h b/icicle/include/api/hash.h index a85e1b6c7..ffa40d812 100644 --- a/icicle/include/api/hash.h +++ b/icicle/include/api/hash.h @@ -6,11 +6,25 @@ #include #include "gpu-utils/device_context.cuh" #include "hash/keccak/keccak.cuh" +#include "merkle-tree/merkle.cuh" extern "C" cudaError_t - keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config); + keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config); extern "C" cudaError_t - keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config); + keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config); +extern "C" cudaError_t build_keccak256_merkle_tree_cuda( + const uint8_t* leaves, + uint64_t* digests, + unsigned int height, + unsigned int input_block_len, + const merkle_tree::TreeBuilderConfig& tree_config); + +extern "C" cudaError_t build_keccak512_merkle_tree_cuda( + const uint8_t* leaves, + uint64_t* digests, + unsigned int height, + unsigned int input_block_len, + const merkle_tree::TreeBuilderConfig& tree_config); #endif \ No newline at end of file diff --git a/icicle/include/api/m31.h b/icicle/include/api/m31.h index a0e38be6e..277268a1e 100644 --- a/icicle/include/api/m31.h +++ b/icicle/include/api/m31.h @@ -19,16 +19,16 @@ extern "C" cudaError_t m31_build_merkle_tree( m31::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t m31_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, m31::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t m31_mul_cuda( diff --git a/icicle/include/api/stark252.h b/icicle/include/api/stark252.h index 5271b0b25..867f98e69 100644 --- a/icicle/include/api/stark252.h +++ b/icicle/include/api/stark252.h @@ -20,16 +20,16 @@ extern "C" cudaError_t stark252_build_merkle_tree( stark252::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t stark252_mmcs_commit_cuda( const matrix::Matrix* leaves, unsigned int number_of_inputs, stark252::scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t stark252_mul_cuda( diff --git a/icicle/include/api/templates/fields/poseidon.h b/icicle/include/api/templates/fields/poseidon.h index faa2f5a16..f8ea1ae4e 100644 --- a/icicle/include/api/templates/fields/poseidon.h +++ b/icicle/include/api/templates/fields/poseidon.h @@ -23,7 +23,7 @@ extern "C" cudaError_t ${FIELD}_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t ${FIELD}_poseidon_delete_cuda(poseidon::Poseidon<${FIELD}::scalar_t>* poseidon); \ No newline at end of file diff --git a/icicle/include/api/templates/fields/poseidon2.h b/icicle/include/api/templates/fields/poseidon2.h index 30e4cf9b6..a6d5ea15f 100644 --- a/icicle/include/api/templates/fields/poseidon2.h +++ b/icicle/include/api/templates/fields/poseidon2.h @@ -28,7 +28,7 @@ extern "C" cudaError_t ${FIELD}_poseidon2_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg); + hash::HashConfig& cfg); extern "C" cudaError_t ${FIELD}_poseidon2_delete_cuda(poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon, device_context::DeviceContext& ctx); \ No newline at end of file diff --git a/icicle/include/api/templates/fields/tree.h b/icicle/include/api/templates/fields/tree.h index c3ca20f83..06f102975 100644 --- a/icicle/include/api/templates/fields/tree.h +++ b/icicle/include/api/templates/fields/tree.h @@ -3,14 +3,14 @@ extern "C" cudaError_t ${FIELD}_build_merkle_tree( ${FIELD}::scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression, - const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer, + const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression, + const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer, const merkle_tree::TreeBuilderConfig& tree_config); extern "C" cudaError_t ${FIELD}_mmcs_commit_cuda( const matrix::Matrix<${FIELD}::scalar_t>* leaves, unsigned int number_of_inputs, ${FIELD}::scalar_t* digests, - const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher, - const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression, + const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher, + const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression, const merkle_tree::TreeBuilderConfig& tree_config); \ No newline at end of file diff --git a/icicle/include/hash/hash.cuh b/icicle/include/hash/hash.cuh index 3c8324141..338d337dd 100644 --- a/icicle/include/hash/hash.cuh +++ b/icicle/include/hash/hash.cuh @@ -16,10 +16,10 @@ using matrix::Matrix; namespace hash { /** - * @struct SpongeConfig - * Encodes sponge hash operations parameters. + * @struct HashConfig + * Encodes hash operations parameters. */ - struct SpongeConfig { + struct HashConfig { device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */ bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */ bool @@ -31,14 +31,14 @@ namespace hash { }; /** - * A function that returns the default value of [SpongeConfig](@ref SpongeConfig) for the [SpongeHasher](@ref - * SpongeHasher) class. - * @return Default value of [SpongeConfig](@ref SpongeConfig). + * A function that returns the default value of [HashConfig](@ref HashConfig) for the [Hasher](@ref + * Hasher) class. + * @return Default value of [HashConfig](@ref HashConfig). */ - static SpongeConfig - default_sponge_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context()) + static HashConfig + default_hash_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context()) { - SpongeConfig config = { + HashConfig config = { ctx, // ctx false, // are_inputs_on_device false, // are_outputs_on_device @@ -48,16 +48,15 @@ namespace hash { } /** - * @class SpongeHasher + * @class Hasher * - * Can be inherited by a cryptographic permutation function to create a - * [sponge](https://en.wikipedia.org/wiki/Sponge_function) construction out of it. + * An interface containing methods for hashing * * @tparam PreImage type of inputs elements * @tparam Image type of state elements. Also used to describe the type of hash output */ template - class SpongeHasher + class Hasher { public: /// @brief the width of permutation state @@ -72,7 +71,7 @@ namespace hash { /// @brief start squeezing from this offset. Used with domain separation. const unsigned int offset; - SpongeHasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset) + Hasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset) : width(width), preimage_max_length(preimage_max_length), rate(rate), offset(offset) { assert( @@ -105,7 +104,6 @@ namespace hash { return cudaError_t::cudaSuccess; } - /// @brief Permute aligned input and do squeeze /// @param input pointer to input allocated on-device /// @param out pointer to output allocated on-device cudaError_t compress_many( @@ -113,7 +111,7 @@ namespace hash { Image* out, unsigned int number_of_states, unsigned int output_len, - const SpongeConfig& cfg) const + const HashConfig& cfg) const { return hash_many((const PreImage*)input, out, number_of_states, width, output_len, cfg); } @@ -136,7 +134,7 @@ namespace hash { unsigned int number_of_states, unsigned int input_len, unsigned int output_len, - const SpongeConfig& cfg) const + const HashConfig& cfg) const { const PreImage* d_input; PreImage* d_alloc_input; diff --git a/icicle/include/hash/keccak/keccak.cuh b/icicle/include/hash/keccak/keccak.cuh index f05d3736e..1a916288d 100644 --- a/icicle/include/hash/keccak/keccak.cuh +++ b/icicle/include/hash/keccak/keccak.cuh @@ -11,31 +11,19 @@ using namespace hash; namespace keccak { - /** - * @struct KeccakConfig - * Struct that encodes various Keccak parameters. - */ - struct KeccakConfig { - device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */ - bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */ - bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */ - bool is_async; /**< Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be - * non-blocking and you'd need to synchronize it explicitly by running - * `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash - * function will block the current CPU thread. */ - }; - - KeccakConfig default_keccak_config() + class Keccak : public Hasher { - device_context::DeviceContext ctx = device_context::get_default_device_context(); - KeccakConfig config = { - ctx, // ctx - false, // are_inputes_on_device - false, // are_outputs_on_device - false, // is_async - }; - return config; - } + public: + cudaError_t run_hash_many_kernel( + const uint8_t* input, + uint64_t* output, + unsigned int number_of_states, + unsigned int input_len, + unsigned int output_len, + const device_context::DeviceContext& ctx) const override; + + Keccak(unsigned int rate) : Hasher(25, 25, rate, 0) {} + }; } // namespace keccak #endif \ No newline at end of file diff --git a/icicle/include/merkle-tree/merkle.cuh b/icicle/include/merkle-tree/merkle.cuh index 68e729ce1..ba50ff760 100644 --- a/icicle/include/merkle-tree/merkle.cuh +++ b/icicle/include/merkle-tree/merkle.cuh @@ -111,8 +111,10 @@ namespace merkle_tree { cudaError_t build_merkle_tree( const Leaf* inputs, Digest* digests, - const SpongeHasher& compression, - const SpongeHasher& bottom_layer, + unsigned int height, + unsigned int input_block_len, + const Hasher& compression, + const Hasher& bottom_layer, const TreeBuilderConfig& config); template @@ -120,8 +122,8 @@ namespace merkle_tree { const Matrix* inputs, const unsigned int number_of_inputs, Digest* digests, - const SpongeHasher& hasher, - const SpongeHasher& compression, + const Hasher& hasher, + const Hasher& compression, const TreeBuilderConfig& tree_config); } // namespace merkle_tree diff --git a/icicle/include/poseidon/poseidon.cuh b/icicle/include/poseidon/poseidon.cuh index 8025057d0..24727c21e 100644 --- a/icicle/include/poseidon/poseidon.cuh +++ b/icicle/include/poseidon/poseidon.cuh @@ -20,7 +20,7 @@ using namespace hash; */ namespace poseidon { template - class Poseidon : public SpongeHasher + class Poseidon : public Hasher { public: const std::size_t device_id; @@ -65,7 +65,7 @@ namespace poseidon { const S* sparse_matrices, const S domain_tag, device_context::DeviceContext& ctx) - : SpongeHasher(arity + 1, arity, arity, 1), device_id(ctx.device_id) + : Hasher(arity + 1, arity, arity, 1), device_id(ctx.device_id) { PoseidonConstants constants; CHK_STICKY(create_optimized_poseidon_constants( @@ -75,7 +75,7 @@ namespace poseidon { } Poseidon(int arity, device_context::DeviceContext& ctx) - : SpongeHasher(arity + 1, arity, arity, 1), device_id(ctx.device_id) + : Hasher(arity + 1, arity, arity, 1), device_id(ctx.device_id) { PoseidonConstants constants{}; CHK_STICKY(init_optimized_poseidon_constants(arity, ctx, &constants)); diff --git a/icicle/include/poseidon2/poseidon2.cuh b/icicle/include/poseidon2/poseidon2.cuh index e81d88b4c..016740e7a 100644 --- a/icicle/include/poseidon2/poseidon2.cuh +++ b/icicle/include/poseidon2/poseidon2.cuh @@ -23,7 +23,7 @@ using matrix::Matrix; */ namespace poseidon2 { template - class Poseidon2 : public hash::SpongeHasher + class Poseidon2 : public hash::Hasher { static const int POSEIDON_BLOCK_SIZE = 32; @@ -144,7 +144,7 @@ namespace poseidon2 { MdsType mds_type, DiffusionStrategy diffusion, device_context::DeviceContext& ctx) - : hash::SpongeHasher(width, width, rate, 0), device_id(ctx.device_id) + : hash::Hasher(width, width, rate, 0), device_id(ctx.device_id) { Poseidon2Constants constants; CHK_STICKY(create_poseidon2_constants( @@ -159,7 +159,7 @@ namespace poseidon2 { MdsType mds_type, DiffusionStrategy diffusion, device_context::DeviceContext& ctx) - : hash::SpongeHasher(width, width, rate, 0), device_id(ctx.device_id) + : hash::Hasher(width, width, rate, 0), device_id(ctx.device_id) { Poseidon2Constants constants; CHK_STICKY(init_poseidon2_constants(width, mds_type, diffusion, ctx, &constants)); diff --git a/icicle/src/hash/keccak/.gitignore b/icicle/src/hash/keccak/.gitignore index bdbfc9de6..8059bd2f2 100644 --- a/icicle/src/hash/keccak/.gitignore +++ b/icicle/src/hash/keccak/.gitignore @@ -1 +1,2 @@ -test_keccak \ No newline at end of file +test_keccak +test_keccak_tree \ No newline at end of file diff --git a/icicle/src/hash/keccak/Makefile b/icicle/src/hash/keccak/Makefile index 347efd98b..c401bf7cd 100644 --- a/icicle/src/hash/keccak/Makefile +++ b/icicle/src/hash/keccak/Makefile @@ -1,6 +1,10 @@ +test_keccak_tree: test_tree.cu keccak.cu ../../merkle-tree/merkle.cu + nvcc -DMERKLE_DEBUG -o test_keccak_tree -I../../../include test_tree.cu + ./test_keccak_tree + test_keccak: test.cu keccak.cu nvcc -o test_keccak -I../../../include test.cu ./test_keccak clear: - rm test_keccak \ No newline at end of file + rm test_keccak test_keccak_tree \ No newline at end of file diff --git a/icicle/src/hash/keccak/extern.cu b/icicle/src/hash/keccak/extern.cu index 40339d218..54ddeced6 100644 --- a/icicle/src/hash/keccak/extern.cu +++ b/icicle/src/hash/keccak/extern.cu @@ -4,17 +4,44 @@ #include "hash/hash.cuh" #include "hash/keccak/keccak.cuh" #include "keccak.cu" +#include "../../merkle-tree/merkle.cu" +#include "merkle-tree/merkle.cuh" namespace keccak { extern "C" cudaError_t - keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config) + keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config) { - return keccak_hash<512, 256>(input, input_block_size, number_of_blocks, output, config); + return Keccak(136).hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, 4, config); } extern "C" cudaError_t - keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config) + keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config) { - return keccak_hash<1024, 512>(input, input_block_size, number_of_blocks, output, config); + return Keccak(72).hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, 8, config); } + + extern "C" cudaError_t build_keccak256_merkle_tree_cuda( + const uint8_t* leaves, + uint64_t* digests, + unsigned int height, + unsigned int input_block_len, + const merkle_tree::TreeBuilderConfig& tree_config) + { + Keccak keccak(136); + return merkle_tree::build_merkle_tree( + leaves, digests, height, input_block_len, keccak, keccak, tree_config); + } + + extern "C" cudaError_t build_keccak512_merkle_tree_cuda( + const uint8_t* leaves, + uint64_t* digests, + unsigned int height, + unsigned int input_block_len, + const merkle_tree::TreeBuilderConfig& tree_config) + { + Keccak keccak(72); + return merkle_tree::build_merkle_tree( + leaves, digests, height, input_block_len, keccak, keccak, tree_config); + } + } // namespace keccak \ No newline at end of file diff --git a/icicle/src/hash/keccak/keccak.cu b/icicle/src/hash/keccak/keccak.cu index 074940122..e2754bd70 100644 --- a/icicle/src/hash/keccak/keccak.cu +++ b/icicle/src/hash/keccak/keccak.cu @@ -1,50 +1,256 @@ #include #include "gpu-utils/device_context.cuh" #include "gpu-utils/error_handler.cuh" +#include "gpu-utils/modifiers.cuh" #include "hash/hash.cuh" #include "hash/keccak/keccak.cuh" -#include "kernels.cu" using namespace hash; namespace keccak { - template - cudaError_t - keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config) + using u64 = uint64_t; + +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) + +#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \ + { \ + t = ROTL64((d0 ^ d1 ^ d2 ^ d3 ^ d4), 1) ^ (c0 ^ c1 ^ c2 ^ c3 ^ c4); \ + } + +#define THETA( \ + s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ + s43, s44) \ + { \ + TH_ELT(t0, s40, s41, s42, s43, s44, s10, s11, s12, s13, s14); \ + TH_ELT(t1, s00, s01, s02, s03, s04, s20, s21, s22, s23, s24); \ + TH_ELT(t2, s10, s11, s12, s13, s14, s30, s31, s32, s33, s34); \ + TH_ELT(t3, s20, s21, s22, s23, s24, s40, s41, s42, s43, s44); \ + TH_ELT(t4, s30, s31, s32, s33, s34, s00, s01, s02, s03, s04); \ + s00 ^= t0; \ + s01 ^= t0; \ + s02 ^= t0; \ + s03 ^= t0; \ + s04 ^= t0; \ + \ + s10 ^= t1; \ + s11 ^= t1; \ + s12 ^= t1; \ + s13 ^= t1; \ + s14 ^= t1; \ + \ + s20 ^= t2; \ + s21 ^= t2; \ + s22 ^= t2; \ + s23 ^= t2; \ + s24 ^= t2; \ + \ + s30 ^= t3; \ + s31 ^= t3; \ + s32 ^= t3; \ + s33 ^= t3; \ + s34 ^= t3; \ + \ + s40 ^= t4; \ + s41 ^= t4; \ + s42 ^= t4; \ + s43 ^= t4; \ + s44 ^= t4; \ + } + +#define RHOPI( \ + s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ + s43, s44) \ + { \ + t0 = ROTL64(s10, (uint64_t)1); \ + s10 = ROTL64(s11, (uint64_t)44); \ + s11 = ROTL64(s41, (uint64_t)20); \ + s41 = ROTL64(s24, (uint64_t)61); \ + s24 = ROTL64(s42, (uint64_t)39); \ + s42 = ROTL64(s04, (uint64_t)18); \ + s04 = ROTL64(s20, (uint64_t)62); \ + s20 = ROTL64(s22, (uint64_t)43); \ + s22 = ROTL64(s32, (uint64_t)25); \ + s32 = ROTL64(s43, (uint64_t)8); \ + s43 = ROTL64(s34, (uint64_t)56); \ + s34 = ROTL64(s03, (uint64_t)41); \ + s03 = ROTL64(s40, (uint64_t)27); \ + s40 = ROTL64(s44, (uint64_t)14); \ + s44 = ROTL64(s14, (uint64_t)2); \ + s14 = ROTL64(s31, (uint64_t)55); \ + s31 = ROTL64(s13, (uint64_t)45); \ + s13 = ROTL64(s01, (uint64_t)36); \ + s01 = ROTL64(s30, (uint64_t)28); \ + s30 = ROTL64(s33, (uint64_t)21); \ + s33 = ROTL64(s23, (uint64_t)15); \ + s23 = ROTL64(s12, (uint64_t)10); \ + s12 = ROTL64(s21, (uint64_t)6); \ + s21 = ROTL64(s02, (uint64_t)3); \ + s02 = t0; \ + } + +#define KHI( \ + s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ + s43, s44) \ + { \ + t0 = s00 ^ (~s10 & s20); \ + t1 = s10 ^ (~s20 & s30); \ + t2 = s20 ^ (~s30 & s40); \ + t3 = s30 ^ (~s40 & s00); \ + t4 = s40 ^ (~s00 & s10); \ + s00 = t0; \ + s10 = t1; \ + s20 = t2; \ + s30 = t3; \ + s40 = t4; \ + \ + t0 = s01 ^ (~s11 & s21); \ + t1 = s11 ^ (~s21 & s31); \ + t2 = s21 ^ (~s31 & s41); \ + t3 = s31 ^ (~s41 & s01); \ + t4 = s41 ^ (~s01 & s11); \ + s01 = t0; \ + s11 = t1; \ + s21 = t2; \ + s31 = t3; \ + s41 = t4; \ + \ + t0 = s02 ^ (~s12 & s22); \ + t1 = s12 ^ (~s22 & s32); \ + t2 = s22 ^ (~s32 & s42); \ + t3 = s32 ^ (~s42 & s02); \ + t4 = s42 ^ (~s02 & s12); \ + s02 = t0; \ + s12 = t1; \ + s22 = t2; \ + s32 = t3; \ + s42 = t4; \ + \ + t0 = s03 ^ (~s13 & s23); \ + t1 = s13 ^ (~s23 & s33); \ + t2 = s23 ^ (~s33 & s43); \ + t3 = s33 ^ (~s43 & s03); \ + t4 = s43 ^ (~s03 & s13); \ + s03 = t0; \ + s13 = t1; \ + s23 = t2; \ + s33 = t3; \ + s43 = t4; \ + \ + t0 = s04 ^ (~s14 & s24); \ + t1 = s14 ^ (~s24 & s34); \ + t2 = s24 ^ (~s34 & s44); \ + t3 = s34 ^ (~s44 & s04); \ + t4 = s44 ^ (~s04 & s14); \ + s04 = t0; \ + s14 = t1; \ + s24 = t2; \ + s34 = t3; \ + s44 = t4; \ + } + +#define IOTA(element, rc) \ + { \ + element ^= rc; \ + } + + __device__ const u64 RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, + 0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009, + 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, + 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; + + __device__ void keccakf(u64 s[25]) + { + u64 t0, t1, t2, t3, t4; + + for (int i = 0; i < 24; i++) { + THETA( + s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], + s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); + RHOPI( + s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], + s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); + KHI( + s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], + s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); + IOTA(s[0], RC[i]); + } + } + + template + __global__ void + keccak_hash_blocks(const uint8_t* input, int input_block_size, int output_len, int number_of_blocks, uint64_t* output) { - CHK_INIT_IF_RETURN(); - cudaStream_t& stream = config.ctx.stream; - - uint8_t* input_device; - if (config.are_inputs_on_device) { - input_device = input; - } else { - CHK_IF_RETURN(cudaMallocAsync(&input_device, number_of_blocks * input_block_size, stream)); - CHK_IF_RETURN( - cudaMemcpyAsync(input_device, input, number_of_blocks * input_block_size, cudaMemcpyHostToDevice, stream)); + int sid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (sid >= number_of_blocks) { return; } + + const uint8_t* b_input = input + sid * input_block_size; + uint64_t* b_output = output + sid * output_len; + uint64_t state[25] = {}; // Initialize with zeroes + + int input_len = input_block_size; + + // absorb + while (input_len >= R) { + for (int i = 0; i < R; i += 8) { + state[i / 8] ^= *(uint64_t*)(b_input + i); + } + keccakf(state); + b_input += R; + input_len -= R; } - uint8_t* output_device; - if (config.are_outputs_on_device) { - output_device = output; - } else { - CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_blocks * (D / 8), stream)); + // last block (if any) + uint8_t last_block[R]; + for (int i = 0; i < input_len; i++) { + last_block[i] = b_input[i]; } - int number_of_threads = 512; - int number_of_gpu_blocks = (number_of_blocks - 1) / number_of_threads + 1; - keccak_hash_blocks<<>>( - input_device, input_block_size, number_of_blocks, output_device); + // pad 10*1 + last_block[input_len] = 1; + for (int i = 0; i < R - input_len - 1; i++) { + last_block[input_len + i + 1] = 0; + } + // last bit + last_block[R - 1] |= 0x80; + + for (int i = 0; i < R; i += 8) { + state[i / 8] ^= *(uint64_t*)(last_block + i); + } + keccakf(state); - if (!config.are_inputs_on_device) CHK_IF_RETURN(cudaFreeAsync(input_device, stream)); + for (int i = 0; i < output_len; i++) { + b_output[i] = state[i]; + } + } + + cudaError_t Keccak::run_hash_many_kernel( + const uint8_t* input, + uint64_t* output, + unsigned int number_of_states, + unsigned int input_len, + unsigned int output_len, + const device_context::DeviceContext& ctx) const + { + int number_of_threads = 256; + int number_of_gpu_blocks = (number_of_states - 1) / number_of_threads + 1; - if (!config.are_outputs_on_device) { - CHK_IF_RETURN(cudaMemcpyAsync(output, output_device, number_of_blocks * (D / 8), cudaMemcpyDeviceToHost, stream)); - CHK_IF_RETURN(cudaFreeAsync(output_device, stream)); + switch (rate) { + case 136: + keccak_hash_blocks<136><<>>( + input, input_len, output_len, number_of_states, output); + break; + case 72: + keccak_hash_blocks<72><<>>( + input, input_len, output_len, number_of_states, output); + break; + default: + THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "KeccakHash: #rate must be one of [136, 72]"); } - if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream)); + CHK_IF_RETURN(cudaPeekAtLastError()); return CHK_LAST(); } } // namespace keccak \ No newline at end of file diff --git a/icicle/src/hash/keccak/kernels.cu b/icicle/src/hash/keccak/kernels.cu deleted file mode 100644 index 5d3e4864e..000000000 --- a/icicle/src/hash/keccak/kernels.cu +++ /dev/null @@ -1,233 +0,0 @@ -#pragma once -#ifndef KECCAK_KERNELS_H -#define KECCAK_KERNELS_H - -#include -#include "gpu-utils/modifiers.cuh" - -namespace keccak { - using u64 = uint64_t; - -#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) - -#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \ - { \ - t = ROTL64((d0 ^ d1 ^ d2 ^ d3 ^ d4), 1) ^ (c0 ^ c1 ^ c2 ^ c3 ^ c4); \ - } - -#define THETA( \ - s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ - s43, s44) \ - { \ - TH_ELT(t0, s40, s41, s42, s43, s44, s10, s11, s12, s13, s14); \ - TH_ELT(t1, s00, s01, s02, s03, s04, s20, s21, s22, s23, s24); \ - TH_ELT(t2, s10, s11, s12, s13, s14, s30, s31, s32, s33, s34); \ - TH_ELT(t3, s20, s21, s22, s23, s24, s40, s41, s42, s43, s44); \ - TH_ELT(t4, s30, s31, s32, s33, s34, s00, s01, s02, s03, s04); \ - s00 ^= t0; \ - s01 ^= t0; \ - s02 ^= t0; \ - s03 ^= t0; \ - s04 ^= t0; \ - \ - s10 ^= t1; \ - s11 ^= t1; \ - s12 ^= t1; \ - s13 ^= t1; \ - s14 ^= t1; \ - \ - s20 ^= t2; \ - s21 ^= t2; \ - s22 ^= t2; \ - s23 ^= t2; \ - s24 ^= t2; \ - \ - s30 ^= t3; \ - s31 ^= t3; \ - s32 ^= t3; \ - s33 ^= t3; \ - s34 ^= t3; \ - \ - s40 ^= t4; \ - s41 ^= t4; \ - s42 ^= t4; \ - s43 ^= t4; \ - s44 ^= t4; \ - } - -#define RHOPI( \ - s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ - s43, s44) \ - { \ - t0 = ROTL64(s10, (uint64_t)1); \ - s10 = ROTL64(s11, (uint64_t)44); \ - s11 = ROTL64(s41, (uint64_t)20); \ - s41 = ROTL64(s24, (uint64_t)61); \ - s24 = ROTL64(s42, (uint64_t)39); \ - s42 = ROTL64(s04, (uint64_t)18); \ - s04 = ROTL64(s20, (uint64_t)62); \ - s20 = ROTL64(s22, (uint64_t)43); \ - s22 = ROTL64(s32, (uint64_t)25); \ - s32 = ROTL64(s43, (uint64_t)8); \ - s43 = ROTL64(s34, (uint64_t)56); \ - s34 = ROTL64(s03, (uint64_t)41); \ - s03 = ROTL64(s40, (uint64_t)27); \ - s40 = ROTL64(s44, (uint64_t)14); \ - s44 = ROTL64(s14, (uint64_t)2); \ - s14 = ROTL64(s31, (uint64_t)55); \ - s31 = ROTL64(s13, (uint64_t)45); \ - s13 = ROTL64(s01, (uint64_t)36); \ - s01 = ROTL64(s30, (uint64_t)28); \ - s30 = ROTL64(s33, (uint64_t)21); \ - s33 = ROTL64(s23, (uint64_t)15); \ - s23 = ROTL64(s12, (uint64_t)10); \ - s12 = ROTL64(s21, (uint64_t)6); \ - s21 = ROTL64(s02, (uint64_t)3); \ - s02 = t0; \ - } - -#define KHI( \ - s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \ - s43, s44) \ - { \ - t0 = s00 ^ (~s10 & s20); \ - t1 = s10 ^ (~s20 & s30); \ - t2 = s20 ^ (~s30 & s40); \ - t3 = s30 ^ (~s40 & s00); \ - t4 = s40 ^ (~s00 & s10); \ - s00 = t0; \ - s10 = t1; \ - s20 = t2; \ - s30 = t3; \ - s40 = t4; \ - \ - t0 = s01 ^ (~s11 & s21); \ - t1 = s11 ^ (~s21 & s31); \ - t2 = s21 ^ (~s31 & s41); \ - t3 = s31 ^ (~s41 & s01); \ - t4 = s41 ^ (~s01 & s11); \ - s01 = t0; \ - s11 = t1; \ - s21 = t2; \ - s31 = t3; \ - s41 = t4; \ - \ - t0 = s02 ^ (~s12 & s22); \ - t1 = s12 ^ (~s22 & s32); \ - t2 = s22 ^ (~s32 & s42); \ - t3 = s32 ^ (~s42 & s02); \ - t4 = s42 ^ (~s02 & s12); \ - s02 = t0; \ - s12 = t1; \ - s22 = t2; \ - s32 = t3; \ - s42 = t4; \ - \ - t0 = s03 ^ (~s13 & s23); \ - t1 = s13 ^ (~s23 & s33); \ - t2 = s23 ^ (~s33 & s43); \ - t3 = s33 ^ (~s43 & s03); \ - t4 = s43 ^ (~s03 & s13); \ - s03 = t0; \ - s13 = t1; \ - s23 = t2; \ - s33 = t3; \ - s43 = t4; \ - \ - t0 = s04 ^ (~s14 & s24); \ - t1 = s14 ^ (~s24 & s34); \ - t2 = s24 ^ (~s34 & s44); \ - t3 = s34 ^ (~s44 & s04); \ - t4 = s44 ^ (~s04 & s14); \ - s04 = t0; \ - s14 = t1; \ - s24 = t2; \ - s34 = t3; \ - s44 = t4; \ - } - -#define IOTA(element, rc) \ - { \ - element ^= rc; \ - } - - __device__ const u64 RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; - - __device__ void keccakf(u64 s[25]) - { - u64 t0, t1, t2, t3, t4; - - for (int i = 0; i < 24; i++) { - THETA( - s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], - s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); - RHOPI( - s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], - s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); - KHI( - s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8], - s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]); - IOTA(s[0], RC[i]); - } - } - - template - __global__ void keccak_hash_blocks(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output) - { - int bid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (bid >= number_of_blocks) { return; } - - const int r_bits = 1600 - C; - const int r_bytes = r_bits / 8; - const int d_bytes = D / 8; - - uint8_t* b_input = input + bid * input_block_size; - uint8_t* b_output = output + bid * d_bytes; - uint64_t state[25] = {}; // Initialize with zeroes - - int input_len = input_block_size; - - // absorb - while (input_len >= r_bytes) { - // #pragma unroll - for (int i = 0; i < r_bytes; i += 8) { - state[i / 8] ^= *(uint64_t*)(b_input + i); - } - keccakf(state); - b_input += r_bytes; - input_len -= r_bytes; - } - - // last block (if any) - uint8_t last_block[r_bytes]; - for (int i = 0; i < input_len; i++) { - last_block[i] = b_input[i]; - } - - // pad 10*1 - last_block[input_len] = 1; - for (int i = 0; i < r_bytes - input_len - 1; i++) { - last_block[input_len + i + 1] = 0; - } - // last bit - last_block[r_bytes - 1] |= 0x80; - - // #pragma unroll - for (int i = 0; i < r_bytes; i += 8) { - state[i / 8] ^= *(uint64_t*)(last_block + i); - } - keccakf(state); - -#pragma unroll - for (int i = 0; i < d_bytes; i += 8) { - *(uint64_t*)(b_output + i) = state[i / 8]; - } - } -} // namespace keccak - -#endif \ No newline at end of file diff --git a/icicle/src/hash/keccak/test.cu b/icicle/src/hash/keccak/test.cu index 2268820d1..03293f1ea 100644 --- a/icicle/src/hash/keccak/test.cu +++ b/icicle/src/hash/keccak/test.cu @@ -50,7 +50,7 @@ int main(int argc, char* argv[]) uint8_t* out_ptr = static_cast(malloc(number_of_blocks * (D / 8))); START_TIMER(keccak_timer); - KeccakConfig config = default_keccak_config(); + HashConfig config = default_hash_config(); keccak256_cuda(in_ptr, input_block_size, number_of_blocks, out_ptr, config); END_TIMER(keccak_timer, "Keccak") diff --git a/icicle/src/hash/keccak/test_tree.cu b/icicle/src/hash/keccak/test_tree.cu new file mode 100644 index 000000000..ed1de95fe --- /dev/null +++ b/icicle/src/hash/keccak/test_tree.cu @@ -0,0 +1,91 @@ +#include "gpu-utils/device_context.cuh" +#include "merkle-tree/merkle.cuh" +#include "extern.cu" + +#ifndef __CUDA_ARCH__ +#include +#include +#include +#include +#include + +using namespace keccak; + +#define D 256 + +#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); +#define END_TIMER(timer, msg) \ + printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); + +void uint8_to_hex_string(const uint8_t* values, int size) +{ + std::stringstream ss; + + for (int i = 0; i < size; ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') << (int)values[i]; + } + + std::string hexString = ss.str(); + std::cout << hexString << std::endl; +} + +#define A 2 + +int main(int argc, char* argv[]) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + /// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements + uint32_t input_block_len = 136; + uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 10; + uint32_t number_of_leaves = pow(A, tree_height); + uint32_t total_number_of_leaves = number_of_leaves * input_block_len; + + /// Use keep_rows to specify how many rows do you want to store + int keep_rows = argc > 2 ? atoi(argv[2]) : 7; + size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1); + + /// Fill leaves with scalars [0, 1, ... 2^tree_height - 1] + START_TIMER(timer_allocation); + uint8_t input = 0; + uint8_t* leaves = static_cast(malloc(total_number_of_leaves)); + for (uint64_t i = 0; i < total_number_of_leaves; i++) { + leaves[i] = (uint8_t)i; + } + END_TIMER(timer_allocation, "Allocated memory for leaves: "); + + /// Allocate memory for digests of {keep_rows} rows of a tree + START_TIMER(timer_digests); + size_t digests_mem = digests_len * sizeof(uint64_t); + uint64_t* digests = static_cast(malloc(digests_mem)); + END_TIMER(timer_digests, "Allocated memory for digests"); + + std::cout << "Memory for leaves = " << total_number_of_leaves / 1024 / 1024 << " MB; " + << total_number_of_leaves / 1024 / 1024 / 1024 << " GB" << std::endl; + std::cout << "Number of leaves = " << number_of_leaves << std::endl; + std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl; + std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024 + << " GB" << std::endl; + std::cout << "Number of digest elements = " << digests_len << std::endl; + + std::cout << "Total RAM consumption = " << (digests_mem + total_number_of_leaves) / 1024 / 1024 << " MB; " + << (digests_mem + total_number_of_leaves) / 1024 / 1024 / 1024 << " GB" << std::endl; + + merkle_tree::TreeBuilderConfig config = merkle_tree::default_merkle_config(); + config.arity = A; + config.keep_rows = keep_rows; + START_TIMER(keccak_timer); + build_keccak256_merkle_tree_cuda(leaves, digests, tree_height, input_block_len, config); + END_TIMER(keccak_timer, "Keccak") + + for (int i = 0; i < digests_len; i++) { + uint64_t root = digests[i]; + std::cout << root << std::endl; + // assert(root == expected[i]); + } + free(digests); + free(leaves); +} + +#endif \ No newline at end of file diff --git a/icicle/src/merkle-tree/extern.cu b/icicle/src/merkle-tree/extern.cu index a3d082c81..060afe870 100644 --- a/icicle/src/merkle-tree/extern.cu +++ b/icicle/src/merkle-tree/extern.cu @@ -15,8 +15,8 @@ namespace merkle_tree { scalar_t* digests, unsigned int height, unsigned int input_block_len, - const hash::SpongeHasher* compression, - const hash::SpongeHasher* bottom_layer, + const hash::Hasher* compression, + const hash::Hasher* bottom_layer, const TreeBuilderConfig& tree_config) { return build_merkle_tree( diff --git a/icicle/src/merkle-tree/extern_mmcs.cu b/icicle/src/merkle-tree/extern_mmcs.cu index 299c1942b..3b6fc5ccb 100644 --- a/icicle/src/merkle-tree/extern_mmcs.cu +++ b/icicle/src/merkle-tree/extern_mmcs.cu @@ -17,8 +17,8 @@ namespace merkle_tree { const Matrix* leaves, unsigned int number_of_inputs, scalar_t* digests, - const hash::SpongeHasher* hasher, - const hash::SpongeHasher* compression, + const hash::Hasher* hasher, + const hash::Hasher* compression, const TreeBuilderConfig& tree_config) { return mmcs_commit(leaves, number_of_inputs, digests, *hasher, *compression, tree_config); diff --git a/icicle/src/merkle-tree/merkle.cu b/icicle/src/merkle-tree/merkle.cu index 07da4b528..53f3b8f6f 100644 --- a/icicle/src/merkle-tree/merkle.cu +++ b/icicle/src/merkle-tree/merkle.cu @@ -78,37 +78,37 @@ namespace merkle_tree { template cudaError_t build_merkle_subtree( const L* leaves, - D* states, + L* d_leaves, D* digests, size_t subtree_idx, size_t subtree_height, - L* big_tree_digests, + D* big_tree_digests, size_t start_segment_size, size_t start_segment_offset, uint64_t keep_rows, uint64_t input_block_len, - const SpongeHasher& bottom_layer, - const SpongeHasher& compression, + const Hasher& bottom_layer, + const Hasher& compression, const TreeBuilderConfig& tree_config, device_context::DeviceContext& ctx) { uint64_t arity = tree_config.arity; - SpongeConfig sponge_config = default_sponge_config(ctx); - sponge_config.are_inputs_on_device = true; - sponge_config.are_outputs_on_device = true; - sponge_config.is_async = true; + HashConfig hash_config = default_hash_config(ctx); + hash_config.are_inputs_on_device = true; + hash_config.are_outputs_on_device = true; + hash_config.is_async = true; size_t bottom_layer_states = pow(arity, subtree_height); if (!tree_config.are_inputs_on_device) { CHK_IF_RETURN(cudaMemcpyAsync( - states, leaves, bottom_layer_states * input_block_len * sizeof(L), cudaMemcpyHostToDevice, ctx.stream)); + d_leaves, leaves, bottom_layer_states * input_block_len * sizeof(L), cudaMemcpyHostToDevice, ctx.stream)); } bottom_layer.hash_many( - tree_config.are_inputs_on_device ? leaves : states, digests, bottom_layer_states, input_block_len, - tree_config.digest_elements, sponge_config); + tree_config.are_inputs_on_device ? leaves : d_leaves, digests, bottom_layer_states, input_block_len, + tree_config.digest_elements, hash_config); uint64_t number_of_states = bottom_layer_states / arity; size_t segment_size = start_segment_size; @@ -123,21 +123,24 @@ namespace merkle_tree { } segment_size /= arity; subtree_height--; - swap(&digests, &states); + + D* prev_layer = digests; + D* next_layer = (D*)d_leaves; while (number_of_states > 0) { - CHK_IF_RETURN( - compression.compress_many(states, digests, number_of_states, tree_config.digest_elements, sponge_config)); + CHK_IF_RETURN(compression.run_hash_many_kernel( + (L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity, + tree_config.digest_elements, hash_config.ctx)); if (!keep_rows || subtree_height < keep_rows) { D* digests_with_offset = big_tree_digests + segment_offset + subtree_idx * number_of_states * tree_config.digest_elements; CHK_IF_RETURN(cudaMemcpyAsync( - digests_with_offset, digests, number_of_states * tree_config.digest_elements * sizeof(D), + digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D), cudaMemcpyDeviceToHost, ctx.stream)); segment_offset += segment_size; } - if (number_of_states > 1) { swap(&digests, &states); } + swap(&prev_layer, &next_layer); segment_size /= arity; subtree_height--; number_of_states /= arity; @@ -152,17 +155,13 @@ namespace merkle_tree { D* digests, unsigned int height, unsigned int input_block_len, - const SpongeHasher& compression, - const SpongeHasher& bottom_layer, + const Hasher& compression, + const Hasher& bottom_layer, const TreeBuilderConfig& tree_config) { CHK_INIT_IF_RETURN(); cudaStream_t& stream = tree_config.ctx.stream; - if (input_block_len * sizeof(L) > bottom_layer.rate * sizeof(D)) - THROW_ICICLE_ERR( - IcicleError_t::InvalidArgument, - "Sponge construction at the bottom of the tree doesn't support inputs bigger than hash rate"); if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements) THROW_ICICLE_ERR( IcicleError_t::InvalidArgument, @@ -176,24 +175,21 @@ namespace merkle_tree { uint64_t number_of_subtrees = 1; uint64_t subtree_height = height; uint64_t subtree_bottom_layer_states = number_of_bottom_layer_states; - uint64_t subtree_states_size = subtree_bottom_layer_states * bottom_layer.width; - - uint64_t subtree_digests_size; - if (compression.width != compression.preimage_max_length) { - // In that case, the states on layer 1 will require extending the states by (width / preimage_max_len) factor - subtree_digests_size = - subtree_states_size * bottom_layer.preimage_max_length / bottom_layer.width * tree_config.digest_elements; - } else { - subtree_digests_size = subtree_states_size / bottom_layer.width * tree_config.digest_elements; - } - size_t subtree_memory_required = sizeof(D) * (subtree_states_size + subtree_digests_size); + uint64_t subtree_leaves_size = subtree_bottom_layer_states * input_block_len; + uint64_t subtree_digests_size = subtree_bottom_layer_states * tree_config.digest_elements; + + size_t subtree_d_leaves_memory = std::max( + tree_config.are_inputs_on_device ? 0 : (sizeof(L) * subtree_leaves_size), + subtree_digests_size * sizeof(D) / tree_config.arity); + size_t subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory; while (subtree_memory_required > STREAM_CHUNK_SIZE) { number_of_subtrees *= tree_config.arity; subtree_height--; subtree_bottom_layer_states /= tree_config.arity; - subtree_states_size /= tree_config.arity; subtree_digests_size /= tree_config.arity; - subtree_memory_required = sizeof(D) * (subtree_states_size + subtree_digests_size); + subtree_leaves_size /= tree_config.arity; + subtree_d_leaves_memory /= tree_config.arity; + subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory; } int cap_height = height - subtree_height; size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements; @@ -221,19 +217,18 @@ namespace merkle_tree { std::cout << "Height of a subtree = " << subtree_height << std::endl; std::cout << "Cutoff height = " << height - subtree_height << std::endl; std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_states << std::endl; - std::cout << "State of a subtree = " << subtree_states_size << std::endl; std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl; - std::cout << "Size of 1 subtree states = " << subtree_states_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl; std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl; std::cout << "Cap height = " << cap_height << std::endl; std::cout << "Enabling caps mode? " << caps_mode << std::endl; + std::cout << "Allocated " << subtree_d_leaves_memory << " bytes for d_leaves" << std::endl; #endif // Allocate memory for the leaves and digests // These are shared by streams in a pool - D *states_ptr, *digests_ptr; - CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_states_size * number_of_streams * sizeof(D), stream)); - CHK_IF_RETURN(cudaMemsetAsync(states_ptr, 0, subtree_states_size * number_of_streams * sizeof(D), stream)); + L* d_leaves_ptr; + D* digests_ptr; + CHK_IF_RETURN(cudaMallocAsync(&d_leaves_ptr, subtree_d_leaves_memory * number_of_streams, stream)); CHK_IF_RETURN(cudaMallocAsync(&digests_ptr, subtree_digests_size * number_of_streams * sizeof(D), stream)); // Wait for these allocations to finish CHK_IF_RETURN(cudaStreamSynchronize(stream)); @@ -244,7 +239,7 @@ namespace merkle_tree { cudaStream_t subtree_stream = streams[stream_idx]; const L* subtree_leaves = leaves + subtree_idx * subtree_bottom_layer_states * input_block_len; - D* subtree_state = states_ptr + stream_idx * subtree_states_size; + L* subtree_d_leaves = (L*)((unsigned char*)d_leaves_ptr + stream_idx * subtree_d_leaves_memory); D* subtree_digests = digests_ptr + stream_idx * subtree_digests_size; int subtree_keep_rows = 0; @@ -257,7 +252,7 @@ namespace merkle_tree { uint64_t start_segment_size = number_of_bottom_layer_states * tree_config.digest_elements; cudaError_t subtree_result = build_merkle_subtree( subtree_leaves, // leaves - subtree_state, // state + subtree_d_leaves, // d_leves subtree_digests, // digests subtree_idx, // subtree_idx subtree_height, // subtree_height @@ -278,10 +273,6 @@ namespace merkle_tree { CHK_IF_RETURN(cudaStreamSynchronize(streams[i])); } - SpongeConfig sponge_config = default_sponge_config(tree_config.ctx); - sponge_config.are_inputs_on_device = tree_config.are_inputs_on_device; - sponge_config.are_outputs_on_device = true; - sponge_config.is_async = true; // Finish the top-level tree if any if (cap_height > 0) { size_t start_segment_size = caps_len / tree_config.arity; @@ -295,25 +286,29 @@ namespace merkle_tree { } } CHK_IF_RETURN(cudaMemcpyAsync( - states_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D), + d_leaves_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D), (caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream)); uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements; + D* prev_layer = (D*)d_leaves_ptr; + D* next_layer = digests_ptr; + size_t segment_size = start_segment_size; size_t segment_offset = start_segment_offset; while (number_of_states > 0) { - CHK_IF_RETURN(compression.compress_many( - states_ptr, digests_ptr, number_of_states, tree_config.digest_elements, sponge_config)); + CHK_IF_RETURN(compression.run_hash_many_kernel( + (L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity, + tree_config.digest_elements, tree_config.ctx)); if (!tree_config.keep_rows || cap_height < tree_config.keep_rows + (int)caps_mode) { D* digests_with_offset = digests + segment_offset; CHK_IF_RETURN(cudaMemcpyAsync( - digests_with_offset, digests_ptr, number_of_states * tree_config.digest_elements * sizeof(D), + digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D), cudaMemcpyDeviceToHost, stream)); segment_offset += segment_size; } - if (number_of_states > 1) { swap(&digests_ptr, &states_ptr); } + swap(&prev_layer, &next_layer); segment_size /= tree_config.arity; cap_height--; @@ -322,7 +317,7 @@ namespace merkle_tree { if (caps_mode) { free(caps); } } - CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream)); + CHK_IF_RETURN(cudaFreeAsync(d_leaves_ptr, stream)); CHK_IF_RETURN(cudaFreeAsync(digests_ptr, stream)); if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream)); for (size_t i = 0; i < number_of_streams; i++) { diff --git a/icicle/src/merkle-tree/mmcs.cu b/icicle/src/merkle-tree/mmcs.cu index 5121a9a20..f7acbb653 100644 --- a/icicle/src/merkle-tree/mmcs.cu +++ b/icicle/src/merkle-tree/mmcs.cu @@ -16,10 +16,10 @@ namespace merkle_tree { uint64_t number_of_rows, D* digests, unsigned int digest_elements, - const SpongeHasher& hasher, + const Hasher& hasher, const device_context::DeviceContext& ctx) { - SpongeConfig sponge_config = default_sponge_config(ctx); + HashConfig sponge_config = default_hash_config(ctx); sponge_config.are_inputs_on_device = true; sponge_config.are_outputs_on_device = true; sponge_config.is_async = true; @@ -57,8 +57,8 @@ namespace merkle_tree { unsigned int keep_rows; // Number of rows to keep bool are_inputs_on_device; bool caps_mode; - const SpongeHasher* hasher = nullptr; - const SpongeHasher* compression = nullptr; + const Hasher* hasher = nullptr; + const Hasher* compression = nullptr; const device_context::DeviceContext* ctx = nullptr; }; @@ -189,8 +189,8 @@ namespace merkle_tree { const Matrix* inputs, const unsigned int number_of_inputs, D* digests, - const SpongeHasher& hasher, - const SpongeHasher& compression, + const Hasher& hasher, + const Hasher& compression, const TreeBuilderConfig& tree_config) { CHK_INIT_IF_RETURN(); diff --git a/icicle/src/merkle-tree/tests/merkle/Makefile b/icicle/src/merkle-tree/tests/merkle/Makefile index b3e3cfbc6..15b491f65 100644 --- a/icicle/src/merkle-tree/tests/merkle/Makefile +++ b/icicle/src/merkle-tree/tests/merkle/Makefile @@ -8,8 +8,7 @@ merkle_bls.o: ../../extern.cu ../../merkle.cu poseidon.o: ../../../poseidon/extern.cu nvcc -o poseidon.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -c ../../../poseidon/extern.cu - -test_merkle: poseidon2.o merkle.o +test_merkle: test_poseidon2.cu poseidon2.o merkle.o nvcc -o test_merkle -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o merkle.o test_poseidon2.cu ./test_merkle diff --git a/icicle/src/merkle-tree/tests/merkle/test.cu b/icicle/src/merkle-tree/tests/merkle/test.cu index 544e23c4d..0fb348673 100644 --- a/icicle/src/merkle-tree/tests/merkle/test.cu +++ b/icicle/src/merkle-tree/tests/merkle/test.cu @@ -5,6 +5,7 @@ #include #include +#define DEBUG #include "merkle-tree/merkle.cuh" #include "poseidon/poseidon.cuh" diff --git a/icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu b/icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu index 7bcb444ce..7bd35c92a 100644 --- a/icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu +++ b/icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu @@ -89,16 +89,17 @@ int main(int argc, char* argv[]) // } scalar_t expected[64] = { - {1198029810}, {1114813365}, {241588005}, {735332587}, {201392606}, {623383436}, {60086186}, {1225304654}, - {1501472115}, {891216097}, {184481194}, {855632748}, {1503541944}, {1483537725}, {1023563730}, {698957505}, - {1322038939}, {1132881200}, {104782797}, {68847168}, {420051722}, {126069919}, {1350263697}, {1711085395}, - {1322038939}, {1132881200}, {104782797}, {68847168}, {420051722}, {126069919}, {1350263697}, {1711085395}, - {1019525203}, {127215304}, {1199733491}, {1473997036}, {548538385}, {364347137}, {570748364}, {426431873}, - {926562920}, {6278762}, {1894248581}, {1304248433}, {1635020421}, {719342960}, {1373719279}, {700539301}, - {708916911}, {925660920}, {994927540}, {1925434995}, {208534303}, {69614512}, {1701199215}, {1825115630}}; + {876845485}, {1982055884}, {1232961929}, {1502814326}, {1731913687}, {351564698}, {449044700}, {656218013}, + {1616800877}, {1324365320}, {651075613}, {1679193452}, {218302636}, {283697394}, {1141456517}, {253630808}, + {936036237}, {1020969125}, {597252945}, {32839064}, {957901845}, {1137914369}, {155933167}, {986924657}, + {1553746264}, {1007314324}, {1208763331}, {110389244}, {118704360}, {607471513}, {834479233}, {914998571}, + {1086906039}, {1673233108}, {431115765}, {233068973}, {1974449092}, {1296268875}, {538093590}, {104288129}, + {1011605567}, {53314351}, {1461404090}, {870754513}, {1212389386}, {1363519118}, {799527383}, {1258384762}, + {678820782}, {1940801563}, {887764924}, {1006362075}, {2003940909}, {1213396717}, {1332793191}, {440259232}}; for (int i = 0; i < digests_len; i++) { scalar_t root = digests[i]; + // std::cout << root << std::endl; assert(root == expected[i]); } free(digests); diff --git a/icicle/src/poseidon/extern.cu b/icicle/src/poseidon/extern.cu index d9c8211ae..16145bb21 100644 --- a/icicle/src/poseidon/extern.cu +++ b/icicle/src/poseidon/extern.cu @@ -52,7 +52,7 @@ namespace poseidon { unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - const SpongeConfig& cfg) + const HashConfig& cfg) { return poseidon->hash_many(inputs, output, number_of_states, input_block_len, output_len, cfg); } diff --git a/icicle/src/poseidon/test.cu b/icicle/src/poseidon/test.cu index 8617b7e48..461b57ae0 100644 --- a/icicle/src/poseidon/test.cu +++ b/icicle/src/poseidon/test.cu @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) scalar_t* out_ptr = static_cast(malloc(number_of_blocks * sizeof(scalar_t))); - SpongeConfig cfg = default_sponge_config(); + HashConfig cfg = default_hash_config(); START_TIMER(poseidon_timer); poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, A, 1, cfg); diff --git a/icicle/src/poseidon2/extern.cu b/icicle/src/poseidon2/extern.cu index d5ff39328..7741d29a4 100644 --- a/icicle/src/poseidon2/extern.cu +++ b/icicle/src/poseidon2/extern.cu @@ -56,7 +56,7 @@ namespace poseidon2 { unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - hash::SpongeConfig& cfg) + hash::HashConfig& cfg) { return poseidon->hash_many(inputs, output, number_of_states, input_block_len, output_len, cfg); } diff --git a/icicle/src/poseidon2/test.cu b/icicle/src/poseidon2/test.cu index 5e78dee4b..c98bfd63e 100644 --- a/icicle/src/poseidon2/test.cu +++ b/icicle/src/poseidon2/test.cu @@ -42,7 +42,7 @@ int main(int argc, char* argv[]) scalar_t* out_ptr = static_cast(malloc(number_of_blocks * sizeof(scalar_t))); - SpongeConfig cfg = default_sponge_config(); + HashConfig cfg = default_hash_config(); START_TIMER(poseidon_timer); poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, T, 1, cfg); diff --git a/icicle/src/poseidon2/test_m31.cu b/icicle/src/poseidon2/test_m31.cu index 8d39e943c..378d281f0 100644 --- a/icicle/src/poseidon2/test_m31.cu +++ b/icicle/src/poseidon2/test_m31.cu @@ -36,7 +36,7 @@ int main(int argc, char* argv[]) scalar_t* out_ptr = static_cast(malloc(number_of_blocks * sizeof(scalar_t))); scalar_t input = scalar_t::zero(); - hash::SpongeConfig cfg = hash::default_sponge_config(); + hash::HashConfig cfg = hash::default_hash_config(); size_t number_of_repetitions = argc > 2 ? 1 << atoi(argv[2]) : 32; diff --git a/wrappers/golang/core/sponge.go b/wrappers/golang/core/sponge.go index aec2c674b..2e2a84ef0 100644 --- a/wrappers/golang/core/sponge.go +++ b/wrappers/golang/core/sponge.go @@ -6,7 +6,7 @@ import ( cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" ) -type SpongeConfig struct { +type HashConfig struct { /// Details related to the device such as its id and stream. Ctx cr.DeviceContext @@ -31,9 +31,9 @@ type SpongeConfig struct { IsAsync bool } -func GetDefaultSpongeConfig() SpongeConfig { +func GetDefaultHashConfig() HashConfig { ctx, _ := cr.GetDefaultDeviceContext() - return SpongeConfig{ + return HashConfig{ ctx, false, false, diff --git a/wrappers/golang/curves/bls12377/poseidon/include/poseidon.h b/wrappers/golang/curves/bls12377/poseidon/include/poseidon.h index eb31b50d5..49a1c18a4 100644 --- a/wrappers/golang/curves/bls12377/poseidon/include/poseidon.h +++ b/wrappers/golang/curves/bls12377/poseidon/include/poseidon.h @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t bls12_377_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t bls12_377_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t bls12_377_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/curves/bls12377/poseidon/poseidon.go b/wrappers/golang/curves/bls12377/poseidon/poseidon.go index da57bbdec..62c23121c 100644 --- a/wrappers/golang/curves/bls12377/poseidon/poseidon.go +++ b/wrappers/golang/curves/bls12377/poseidon/poseidon.go @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.bls12_377_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/curves/bls12377/tests/poseidon_test.go b/wrappers/golang/curves/bls12377/tests/poseidon_test.go index 773fe868d..d14954090 100644 --- a/wrappers/golang/curves/bls12377/tests/poseidon_test.go +++ b/wrappers/golang/curves/bls12377/tests/poseidon_test.go @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := bls12_377.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/golang/curves/bls12381/poseidon/include/poseidon.h b/wrappers/golang/curves/bls12381/poseidon/include/poseidon.h index 2bc41ddae..9b32d5f13 100644 --- a/wrappers/golang/curves/bls12381/poseidon/include/poseidon.h +++ b/wrappers/golang/curves/bls12381/poseidon/include/poseidon.h @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t bls12_381_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t bls12_381_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t bls12_381_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/curves/bls12381/poseidon/poseidon.go b/wrappers/golang/curves/bls12381/poseidon/poseidon.go index 615898f25..2439c621f 100644 --- a/wrappers/golang/curves/bls12381/poseidon/poseidon.go +++ b/wrappers/golang/curves/bls12381/poseidon/poseidon.go @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.bls12_381_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/curves/bls12381/tests/poseidon_test.go b/wrappers/golang/curves/bls12381/tests/poseidon_test.go index 55fe02dec..7ef7c62e9 100644 --- a/wrappers/golang/curves/bls12381/tests/poseidon_test.go +++ b/wrappers/golang/curves/bls12381/tests/poseidon_test.go @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := bls12_381.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/golang/curves/bn254/poseidon/include/poseidon.h b/wrappers/golang/curves/bn254/poseidon/include/poseidon.h index 2bce4006c..045c27f72 100644 --- a/wrappers/golang/curves/bn254/poseidon/include/poseidon.h +++ b/wrappers/golang/curves/bn254/poseidon/include/poseidon.h @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t bn254_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t bn254_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t bn254_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/curves/bn254/poseidon/poseidon.go b/wrappers/golang/curves/bn254/poseidon/poseidon.go index 23744d0ba..9239060ee 100644 --- a/wrappers/golang/curves/bn254/poseidon/poseidon.go +++ b/wrappers/golang/curves/bn254/poseidon/poseidon.go @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.bn254_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/curves/bn254/tests/poseidon_test.go b/wrappers/golang/curves/bn254/tests/poseidon_test.go index 24c80ec88..fd0241eb6 100644 --- a/wrappers/golang/curves/bn254/tests/poseidon_test.go +++ b/wrappers/golang/curves/bn254/tests/poseidon_test.go @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := bn254.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/golang/curves/bw6761/poseidon/include/poseidon.h b/wrappers/golang/curves/bw6761/poseidon/include/poseidon.h index 0bfcb554e..381579341 100644 --- a/wrappers/golang/curves/bw6761/poseidon/include/poseidon.h +++ b/wrappers/golang/curves/bw6761/poseidon/include/poseidon.h @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t bw6_761_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t bw6_761_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t bw6_761_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/curves/bw6761/poseidon/poseidon.go b/wrappers/golang/curves/bw6761/poseidon/poseidon.go index 7d75ddd4c..aa8842874 100644 --- a/wrappers/golang/curves/bw6761/poseidon/poseidon.go +++ b/wrappers/golang/curves/bw6761/poseidon/poseidon.go @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.bw6_761_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/curves/bw6761/tests/poseidon_test.go b/wrappers/golang/curves/bw6761/tests/poseidon_test.go index c9222202f..54956fb0e 100644 --- a/wrappers/golang/curves/bw6761/tests/poseidon_test.go +++ b/wrappers/golang/curves/bw6761/tests/poseidon_test.go @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := bw6_761.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/golang/curves/grumpkin/poseidon/include/poseidon.h b/wrappers/golang/curves/grumpkin/poseidon/include/poseidon.h index 6263a9830..139aed78a 100644 --- a/wrappers/golang/curves/grumpkin/poseidon/include/poseidon.h +++ b/wrappers/golang/curves/grumpkin/poseidon/include/poseidon.h @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t grumpkin_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t grumpkin_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t grumpkin_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/curves/grumpkin/poseidon/poseidon.go b/wrappers/golang/curves/grumpkin/poseidon/poseidon.go index 994960667..b8ffecda5 100644 --- a/wrappers/golang/curves/grumpkin/poseidon/poseidon.go +++ b/wrappers/golang/curves/grumpkin/poseidon/poseidon.go @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.grumpkin_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/curves/grumpkin/tests/poseidon_test.go b/wrappers/golang/curves/grumpkin/tests/poseidon_test.go index f869fc6ca..525212454 100644 --- a/wrappers/golang/curves/grumpkin/tests/poseidon_test.go +++ b/wrappers/golang/curves/grumpkin/tests/poseidon_test.go @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := grumpkin.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/golang/hash/keccak/hasher.go b/wrappers/golang/hash/keccak/hasher.go index 304342a8f..acac7c318 100644 --- a/wrappers/golang/hash/keccak/hasher.go +++ b/wrappers/golang/hash/keccak/hasher.go @@ -19,16 +19,16 @@ const ( Hash512 HashSize = 512 ) -type KeccakConfig struct { +type HashConfig struct { Ctx cr.DeviceContext areInputsOnDevice bool areOutputsOnDevice bool IsAsync bool } -func GetDefaultKeccakConfig() KeccakConfig { +func GetDefaultHashConfig() HashConfig { ctx, _ := cr.GetDefaultDeviceContext() - return KeccakConfig{ + return HashConfig{ ctx, false, false, @@ -36,7 +36,7 @@ func GetDefaultKeccakConfig() KeccakConfig { } } -func keccakCheck(input core.HostOrDeviceSlice, output core.HostOrDeviceSlice, cfg *KeccakConfig, hashSize HashSize, numberOfBlocks int32) (unsafe.Pointer, unsafe.Pointer, unsafe.Pointer) { +func keccakCheck(input core.HostOrDeviceSlice, output core.HostOrDeviceSlice, cfg *HashConfig, hashSize HashSize, numberOfBlocks int32) (unsafe.Pointer, unsafe.Pointer, unsafe.Pointer) { cfg.areInputsOnDevice = input.IsOnDevice() cfg.areOutputsOnDevice = output.IsOnDevice() @@ -61,13 +61,13 @@ func keccakCheck(input core.HostOrDeviceSlice, output core.HostOrDeviceSlice, cf return input.AsUnsafePointer(), output.AsUnsafePointer(), unsafe.Pointer(cfg) } -func keccak(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig, hashSize HashSize) (ret core.IcicleError) { +func keccak(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig, hashSize HashSize) (ret core.IcicleError) { inputPointer, outputPointer, cfgPointer := keccakCheck(input, output, config, hashSize, numberOfBlocks) cInput := (*C.uint8_t)(inputPointer) cOutput := (*C.uint8_t)(outputPointer) cInputBlockSize := (C.int)(inputBlockSize) cNumberOfBlocks := (C.int)(numberOfBlocks) - cConfig := (*C.KeccakConfig)(cfgPointer) + cConfig := (*C.HashConfig)(cfgPointer) switch hashSize { case Hash256: @@ -79,10 +79,10 @@ func keccak(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, return ret } -func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError { +func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError { return keccak(input, inputBlockSize, numberOfBlocks, output, config, Hash256) } -func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError { +func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError { return keccak(input, inputBlockSize, numberOfBlocks, output, config, Hash512) } diff --git a/wrappers/golang/hash/keccak/include/keccak.h b/wrappers/golang/hash/keccak/include/keccak.h index e2202e768..5f847467e 100644 --- a/wrappers/golang/hash/keccak/include/keccak.h +++ b/wrappers/golang/hash/keccak/include/keccak.h @@ -8,10 +8,10 @@ extern "C" { #endif -typedef struct KeccakConfig KeccakConfig; +typedef struct HashConfig HashConfig; -cudaError_t keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig* config); -cudaError_t keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig* config); +cudaError_t keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig* config); +cudaError_t keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig* config); #ifdef __cplusplus } diff --git a/wrappers/golang/hash/keccak/tests/hasher_test.go b/wrappers/golang/hash/keccak/tests/hasher_test.go index 040715bed..9c897b295 100644 --- a/wrappers/golang/hash/keccak/tests/hasher_test.go +++ b/wrappers/golang/hash/keccak/tests/hasher_test.go @@ -23,7 +23,7 @@ func TestSimpleHash256(t *testing.T) { input := createHostSliceFromHexString("1725b6679911bfe75ad7e248fbeec4a01034feace99aca43cd95d338a37db8d04b4aa5d83c8f8f5bdb8f7f98cec9a658f7f8061a6de07dcfd61db169cc7e666e1971adb4c7e97c43361c9a9eed8bb7b5c33cfe576a383a7440803996fd28148b") outHost := make(core.HostSlice[uint8], 32) - cfg := keccak.GetDefaultKeccakConfig() + cfg := keccak.GetDefaultHashConfig() e := keccak.Keccak256(input, int32(input.Len()), 1, outHost, &cfg) assert.Equal(t, e.CudaErrorCode, cr.CudaSuccess, "Hashing failed") t.Log(outHost) @@ -34,7 +34,7 @@ func TestBatchHash256(t *testing.T) { input := createHostSliceFromHexString("1725b6679911bfe75ad7e248fbeec4a01034feace99aca43cd95d338a37db8d04b4aa5d83c8f8f5bdb8f7f98cec9a658f7f8061a6de07dcfd61db169cc7e666e1971adb4c7e97c43361c9a9eed8bb7b5c33cfe576a383a7440803996fd28148b") outHost := make(core.HostSlice[uint8], 32*2) - cfg := keccak.GetDefaultKeccakConfig() + cfg := keccak.GetDefaultHashConfig() e := keccak.Keccak256(input, int32(input.Len()/2), 2, outHost, &cfg) assert.Equal(t, e.CudaErrorCode, cr.CudaSuccess, "Hashing failed") t.Log(outHost) @@ -46,7 +46,7 @@ func TestSimpleHash512(t *testing.T) { input := createHostSliceFromHexString("1725b6679911bfe75ad7e248fbeec4a01034feace99aca43cd95d338a37db8d04b4aa5d83c8f8f5bdb8f7f98cec9a658f7f8061a6de07dcfd61db169cc7e666e1971adb4c7e97c43361c9a9eed8bb7b5c33cfe576a383a7440803996fd28148b") outHost := make(core.HostSlice[uint8], 64) - cfg := keccak.GetDefaultKeccakConfig() + cfg := keccak.GetDefaultHashConfig() e := keccak.Keccak512(input, int32(input.Len()), 1, outHost, &cfg) assert.Equal(t, e.CudaErrorCode, cr.CudaSuccess, "Hashing failed") t.Log(outHost) @@ -57,7 +57,7 @@ func TestBatchHash512(t *testing.T) { input := createHostSliceFromHexString("1725b6679911bfe75ad7e248fbeec4a01034feace99aca43cd95d338a37db8d04b4aa5d83c8f8f5bdb8f7f98cec9a658f7f8061a6de07dcfd61db169cc7e666e1971adb4c7e97c43361c9a9eed8bb7b5c33cfe576a383a7440803996fd28148b") outHost := make(core.HostSlice[uint8], 64*2) - cfg := keccak.GetDefaultKeccakConfig() + cfg := keccak.GetDefaultHashConfig() e := keccak.Keccak512(input, int32(input.Len()/2), 2, outHost, &cfg) assert.Equal(t, e.CudaErrorCode, cr.CudaSuccess, "Hashing failed") t.Log(outHost) diff --git a/wrappers/golang/internal/generator/poseidon/templates/poseidon.go.tmpl b/wrappers/golang/internal/generator/poseidon/templates/poseidon.go.tmpl index 9a3a66f8d..f26b6c5a7 100644 --- a/wrappers/golang/internal/generator/poseidon/templates/poseidon.go.tmpl +++ b/wrappers/golang/internal/generator/poseidon/templates/poseidon.go.tmpl @@ -58,7 +58,7 @@ func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) { return &p, err } -func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError { +func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.HashConfig) core.IcicleError { core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx) core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx) @@ -67,7 +67,7 @@ func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.Ho cNumberOfStates := (C.uint)(numberOfStates) cInputBlockLen := (C.uint)(inputBlockLen) cOutputLen := (C.uint)(outputLen) - cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg)) + cCfg := (*C.HashConfig)(unsafe.Pointer(cfg)) __ret := C.{{.Field}}_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg) err := (cr.CudaError)(__ret) return core.FromCudaError(err) @@ -79,8 +79,8 @@ func (poseidon *Poseidon) Delete() core.IcicleError { return core.FromCudaError(err) } -func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig { - cfg := core.GetDefaultSpongeConfig() +func (poseidon *Poseidon) GetDefaultHashConfig() core.HashConfig { + cfg := core.GetDefaultHashConfig() cfg.InputRate = poseidon.width - 1 cfg.OutputRate = poseidon.width return cfg diff --git a/wrappers/golang/internal/generator/poseidon/templates/poseidon.h.tmpl b/wrappers/golang/internal/generator/poseidon/templates/poseidon.h.tmpl index 16803581b..df62f5c32 100644 --- a/wrappers/golang/internal/generator/poseidon/templates/poseidon.h.tmpl +++ b/wrappers/golang/internal/generator/poseidon/templates/poseidon.h.tmpl @@ -12,7 +12,7 @@ typedef struct scalar_t scalar_t; typedef struct DeviceContext DeviceContext; typedef struct TreeBuilderConfig TreeBuilderConfig; typedef struct PoseidonInst PoseidonInst; -typedef struct SpongeConfig SpongeConfig; +typedef struct HashConfig HashConfig; cudaError_t {{.Field}}_poseidon_create_cuda( @@ -40,7 +40,7 @@ cudaError_t {{.Field}}_poseidon_hash_many_cuda( unsigned int number_of_states, unsigned int input_block_len, unsigned int output_len, - SpongeConfig* cfg); + HashConfig* cfg); cudaError_t {{.Field}}_poseidon_delete_cuda(PoseidonInst* poseidon); diff --git a/wrappers/golang/internal/generator/poseidon/templates/poseidon_test.go.tmpl b/wrappers/golang/internal/generator/poseidon/templates/poseidon_test.go.tmpl index a587fb562..a6661b196 100644 --- a/wrappers/golang/internal/generator/poseidon/templates/poseidon_test.go.tmpl +++ b/wrappers/golang/internal/generator/poseidon/templates/poseidon_test.go.tmpl @@ -19,7 +19,7 @@ func TestPoseidon(t *testing.T) { p, err := poseidon.Load(uint32(arity), &ctx) assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode) - cfg := p.GetDefaultSpongeConfig() + cfg := p.GetDefaultHashConfig() scalars := {{.Field}}.GenerateScalars(numberOfStates * arity) scalars[0] = scalars[0].Zero() diff --git a/wrappers/rust/icicle-core/src/hash.rs b/wrappers/rust/icicle-core/src/hash.rs index df67cea92..8ced6e23e 100644 --- a/wrappers/rust/icicle-core/src/hash.rs +++ b/wrappers/rust/icicle-core/src/hash.rs @@ -11,44 +11,28 @@ use crate::ntt::IcicleResult; /// Struct that encodes Sponge hash parameters. #[repr(C)] #[derive(Debug, Clone)] -pub struct SpongeConfig<'a> { +pub struct HashConfig<'a> { /// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext). pub ctx: DeviceContext<'a>, - pub(crate) are_inputs_on_device: bool, - pub(crate) are_outputs_on_device: bool, - pub input_rate: u32, - pub output_rate: u32, - pub offset: u32, - - /// If true - input should be already aligned for poseidon permutation. - /// Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state) - /// not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) - pub recursive_squeeze: bool, - - /// If true, hash results will also be copied in the input pointer in aligned format - pub aligned: bool, + pub are_inputs_on_device: bool, + pub are_outputs_on_device: bool, /// Whether to run the sponge operations asynchronously. If set to `true`, the functions will be non-blocking and you'd need to synchronize /// it explicitly by running `stream.synchronize()`. If set to false, the functions will block the current CPU thread. pub is_async: bool, } -impl<'a> Default for SpongeConfig<'a> { +impl<'a> Default for HashConfig<'a> { fn default() -> Self { Self::default_for_device(DEFAULT_DEVICE_ID) } } -impl<'a> SpongeConfig<'a> { +impl<'a> HashConfig<'a> { pub(crate) fn default_for_device(device_id: usize) -> Self { - SpongeConfig { + HashConfig { ctx: DeviceContext::default_for_device(device_id), are_inputs_on_device: false, are_outputs_on_device: false, - input_rate: 0, - output_rate: 0, - offset: 0, - recursive_squeeze: false, - aligned: false, is_async: false, } } @@ -62,10 +46,10 @@ pub trait SpongeHash { number_of_states: usize, input_block_len: usize, output_len: usize, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()>; - fn default_config<'a>(&self) -> SpongeConfig<'a>; + fn default_config<'a>(&self) -> HashConfig<'a>; fn get_handle(&self) -> *const c_void; } diff --git a/wrappers/rust/icicle-core/src/poseidon/mod.rs b/wrappers/rust/icicle-core/src/poseidon/mod.rs index 39ba26b48..cd1828eef 100644 --- a/wrappers/rust/icicle-core/src/poseidon/mod.rs +++ b/wrappers/rust/icicle-core/src/poseidon/mod.rs @@ -7,7 +7,7 @@ use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSli use crate::{ error::IcicleResult, - hash::{sponge_check_input, sponge_check_outputs, SpongeConfig, SpongeHash}, + hash::{sponge_check_input, sponge_check_outputs, HashConfig, SpongeHash}, traits::FieldImpl, }; @@ -87,7 +87,7 @@ where number_of_states: usize, input_block_len: usize, output_len: usize, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()> { sponge_check_input(inputs, number_of_states, input_block_len, self.width - 1, &cfg.ctx); sponge_check_outputs(output, number_of_states, output_len, self.width, false, &cfg.ctx); @@ -107,11 +107,8 @@ where ) } - fn default_config<'a>(&self) -> SpongeConfig<'a> { - let mut cfg = SpongeConfig::default(); - cfg.input_rate = self.width as u32 - 1; - cfg.output_rate = self.width as u32; - cfg + fn default_config<'a>(&self) -> HashConfig<'a> { + HashConfig::default() } } @@ -148,7 +145,7 @@ pub trait PoseidonImpl { input_block_len: u32, output_len: u32, poseidon: PoseidonHandle, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()>; fn delete(poseidon: PoseidonHandle) -> IcicleResult<()>; @@ -163,7 +160,7 @@ macro_rules! impl_poseidon { $field_config:ident ) => { mod $field_prefix_ident { - use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, PoseidonHandle, SpongeConfig}; + use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, HashConfig, PoseidonHandle}; extern "C" { #[link_name = concat!($field_prefix, "_poseidon_create_cuda")] pub(crate) fn create( @@ -194,7 +191,7 @@ macro_rules! impl_poseidon { number_of_states: u32, input_block_len: u32, output_len: u32, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> CudaError; } } @@ -248,7 +245,7 @@ macro_rules! impl_poseidon { input_block_len: u32, output_len: u32, poseidon: PoseidonHandle, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()> { unsafe { $field_prefix_ident::hash_many( diff --git a/wrappers/rust/icicle-core/src/poseidon2/mod.rs b/wrappers/rust/icicle-core/src/poseidon2/mod.rs index dcbbdee85..2e08ecbc0 100644 --- a/wrappers/rust/icicle-core/src/poseidon2/mod.rs +++ b/wrappers/rust/icicle-core/src/poseidon2/mod.rs @@ -7,7 +7,7 @@ use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSli use crate::{ error::IcicleResult, - hash::{sponge_check_input, sponge_check_outputs, SpongeConfig, SpongeHash}, + hash::{sponge_check_input, sponge_check_outputs, HashConfig, SpongeHash}, traits::FieldImpl, }; @@ -32,6 +32,7 @@ where ::Config: Poseidon2Impl, { width: usize, + rate: usize, handle: Poseidon2Handle, phantom: PhantomData, } @@ -52,6 +53,7 @@ where .and_then(|handle| { Ok(Self { width, + rate, handle, phantom: PhantomData, }) @@ -85,6 +87,7 @@ where .and_then(|handle| { Ok(Self { width, + rate, handle, phantom: PhantomData, }) @@ -108,15 +111,9 @@ where number_of_states: usize, input_block_len: usize, output_len: usize, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()> { - sponge_check_input( - inputs, - number_of_states, - input_block_len, - cfg.input_rate as usize, - &cfg.ctx, - ); + sponge_check_input(inputs, number_of_states, input_block_len, self.rate, &cfg.ctx); sponge_check_outputs(output, number_of_states, output_len, self.width, false, &cfg.ctx); let mut local_cfg = cfg.clone(); @@ -134,11 +131,8 @@ where ) } - fn default_config<'a>(&self) -> SpongeConfig<'a> { - let mut cfg = SpongeConfig::default(); - cfg.input_rate = self.width as u32; - cfg.output_rate = self.width as u32; - cfg + fn default_config<'a>(&self) -> HashConfig<'a> { + HashConfig::default() } } @@ -181,7 +175,7 @@ pub trait Poseidon2Impl { input_block_len: u32, output_len: u32, poseidon: Poseidon2Handle, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()>; fn delete(poseidon: Poseidon2Handle) -> IcicleResult<()>; @@ -197,8 +191,8 @@ macro_rules! impl_poseidon2 { ) => { mod $field_prefix_ident { use crate::poseidon2::{ - $field, $field_config, CudaError, DeviceContext, DiffusionStrategy, MdsType, Poseidon2Handle, - SpongeConfig, + $field, $field_config, CudaError, DeviceContext, DiffusionStrategy, HashConfig, MdsType, + Poseidon2Handle, }; use icicle_core::error::IcicleError; extern "C" { @@ -238,7 +232,7 @@ macro_rules! impl_poseidon2 { number_of_states: u32, input_block_len: u32, output_len: u32, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> CudaError; } } @@ -298,7 +292,7 @@ macro_rules! impl_poseidon2 { input_block_len: u32, output_len: u32, poseidon: Poseidon2Handle, - cfg: &SpongeConfig, + cfg: &HashConfig, ) -> IcicleResult<()> { unsafe { $field_prefix_ident::hash_many( diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/src/poseidon/mod.rs b/wrappers/rust/icicle-curves/icicle-bls12-377/src/poseidon/mod.rs index 0136c3030..909f1602d 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-377/src/poseidon/mod.rs +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/src/poseidon/mod.rs @@ -3,7 +3,7 @@ use crate::curve::{BaseCfg, BaseField}; use crate::curve::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon; use icicle_core::poseidon::{PoseidonHandle, PoseidonImpl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/src/poseidon/mod.rs b/wrappers/rust/icicle-curves/icicle-bls12-381/src/poseidon/mod.rs index 1ec41a41d..617504568 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-381/src/poseidon/mod.rs +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/src/poseidon/mod.rs @@ -1,7 +1,7 @@ use crate::curve::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon; use icicle_core::poseidon::{PoseidonHandle, PoseidonImpl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon/mod.rs b/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon/mod.rs index bf555199c..d4e2bc977 100644 --- a/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon/mod.rs +++ b/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon/mod.rs @@ -1,7 +1,7 @@ use crate::curve::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon; use icicle_core::poseidon::{PoseidonHandle, PoseidonImpl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon2/mod.rs b/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon2/mod.rs index 063de3657..ef261acea 100644 --- a/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon2/mod.rs +++ b/wrappers/rust/icicle-curves/icicle-bn254/src/poseidon2/mod.rs @@ -1,7 +1,7 @@ use crate::curve::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon2; use icicle_core::poseidon2::{DiffusionStrategy, MdsType, Poseidon2Handle, Poseidon2Impl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-curves/icicle-grumpkin/src/poseidon/mod.rs b/wrappers/rust/icicle-curves/icicle-grumpkin/src/poseidon/mod.rs index 4857be0d1..633a45805 100644 --- a/wrappers/rust/icicle-curves/icicle-grumpkin/src/poseidon/mod.rs +++ b/wrappers/rust/icicle-curves/icicle-grumpkin/src/poseidon/mod.rs @@ -1,7 +1,7 @@ use crate::curve::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon; use icicle_core::poseidon::{PoseidonHandle, PoseidonImpl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs b/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs index c6159cb8d..3512791d0 100644 --- a/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs +++ b/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs @@ -1,7 +1,7 @@ use crate::field::{ScalarCfg, ScalarField}; use icicle_core::error::IcicleResult; -use icicle_core::hash::SpongeConfig; +use icicle_core::hash::HashConfig; use icicle_core::impl_poseidon2; use icicle_core::poseidon2::{DiffusionStrategy, MdsType, Poseidon2Handle, Poseidon2Impl}; use icicle_core::traits::IcicleResultWrap; diff --git a/wrappers/rust/icicle-hash/src/keccak/mod.rs b/wrappers/rust/icicle-hash/src/keccak/mod.rs index c24ae7d02..88fc5a2c7 100644 --- a/wrappers/rust/icicle-hash/src/keccak/mod.rs +++ b/wrappers/rust/icicle-hash/src/keccak/mod.rs @@ -1,84 +1,64 @@ +use icicle_core::hash::HashConfig; +use icicle_core::tree::TreeBuilderConfig; use icicle_cuda_runtime::error::CudaError; -use icicle_cuda_runtime::{ - device_context::{DeviceContext, DEFAULT_DEVICE_ID}, - memory::HostOrDeviceSlice, -}; +use icicle_cuda_runtime::memory::HostOrDeviceSlice; use icicle_core::error::IcicleResult; use icicle_core::traits::IcicleResultWrap; pub mod tests; -#[repr(C)] -#[derive(Debug, Clone)] -pub struct KeccakConfig<'a> { - /// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext). - pub ctx: DeviceContext<'a>, - - /// True if inputs are on device and false if they're on host. Default value: false. - pub are_inputs_on_device: bool, - - /// If true, output is preserved on device, otherwise on host. Default value: false. - pub are_outputs_on_device: bool, - - /// Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be - /// non-blocking and you'd need to synchronize it explicitly by running - /// `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash - /// function will block the current CPU thread. - pub is_async: bool, -} - -impl<'a> Default for KeccakConfig<'a> { - fn default() -> Self { - Self::default_for_device(DEFAULT_DEVICE_ID) - } -} - -impl<'a> KeccakConfig<'a> { - pub fn default_for_device(device_id: usize) -> Self { - KeccakConfig { - ctx: DeviceContext::default_for_device(device_id), - are_inputs_on_device: false, - are_outputs_on_device: false, - is_async: false, - } - } -} - extern "C" { pub(crate) fn keccak256_cuda( input: *const u8, - input_block_size: i32, - number_of_blocks: i32, + input_block_size: u32, + number_of_blocks: u32, output: *mut u8, - config: &KeccakConfig, + config: &HashConfig, ) -> CudaError; pub(crate) fn keccak512_cuda( input: *const u8, - input_block_size: i32, - number_of_blocks: i32, + input_block_size: u32, + number_of_blocks: u32, output: *mut u8, - config: &KeccakConfig, + config: &HashConfig, + ) -> CudaError; + + pub(crate) fn build_keccak256_merkle_tree_cuda( + leaves: *const u8, + digests: *mut u64, + height: u32, + input_block_len: u32, + config: &TreeBuilderConfig, + ) -> CudaError; + + pub(crate) fn build_keccak512_merkle_tree_cuda( + leaves: *const u8, + digests: *mut u64, + height: u32, + input_block_len: u32, + config: &TreeBuilderConfig, ) -> CudaError; } pub fn keccak256( input: &(impl HostOrDeviceSlice + ?Sized), - input_block_size: i32, - number_of_blocks: i32, + input_block_size: u32, + number_of_blocks: u32, output: &mut (impl HostOrDeviceSlice + ?Sized), - config: &mut KeccakConfig, + config: &HashConfig, ) -> IcicleResult<()> { - config.are_inputs_on_device = input.is_on_device(); - config.are_outputs_on_device = output.is_on_device(); + let mut local_cfg = config.clone(); + local_cfg.are_inputs_on_device = input.is_on_device(); + local_cfg.are_outputs_on_device = output.is_on_device(); unsafe { keccak256_cuda( input.as_ptr(), input_block_size, number_of_blocks, output.as_mut_ptr(), - config, + &local_cfg, ) .wrap() } @@ -86,19 +66,58 @@ pub fn keccak256( pub fn keccak512( input: &(impl HostOrDeviceSlice + ?Sized), - input_block_size: i32, - number_of_blocks: i32, + input_block_size: u32, + number_of_blocks: u32, output: &mut (impl HostOrDeviceSlice + ?Sized), - config: &mut KeccakConfig, + config: &HashConfig, ) -> IcicleResult<()> { - config.are_inputs_on_device = input.is_on_device(); - config.are_outputs_on_device = output.is_on_device(); + let mut local_cfg = config.clone(); + local_cfg.are_inputs_on_device = input.is_on_device(); + local_cfg.are_outputs_on_device = output.is_on_device(); unsafe { keccak512_cuda( input.as_ptr(), input_block_size, number_of_blocks, output.as_mut_ptr(), + &local_cfg, + ) + .wrap() + } +} + +pub fn build_keccak256_merkle_tree( + leaves: &(impl HostOrDeviceSlice + ?Sized), + digests: &mut (impl HostOrDeviceSlice + ?Sized), + height: usize, + input_block_len: usize, + config: &TreeBuilderConfig, +) -> IcicleResult<()> { + unsafe { + build_keccak256_merkle_tree_cuda( + leaves.as_ptr(), + digests.as_mut_ptr(), + height as u32, + input_block_len as u32, + config, + ) + .wrap() + } +} + +pub fn build_keccak512_merkle_tree( + leaves: &(impl HostOrDeviceSlice + ?Sized), + digests: &mut (impl HostOrDeviceSlice + ?Sized), + height: usize, + input_block_len: usize, + config: &TreeBuilderConfig, +) -> IcicleResult<()> { + unsafe { + build_keccak512_merkle_tree_cuda( + leaves.as_ptr(), + digests.as_mut_ptr(), + height as u32, + input_block_len as u32, config, ) .wrap() diff --git a/wrappers/rust/icicle-hash/src/keccak/tests.rs b/wrappers/rust/icicle-hash/src/keccak/tests.rs index 8b1378917..5675e9158 100644 --- a/wrappers/rust/icicle-hash/src/keccak/tests.rs +++ b/wrappers/rust/icicle-hash/src/keccak/tests.rs @@ -1 +1,48 @@ +#[cfg(test)] +pub(crate) mod tests { + use icicle_core::{ + hash::HashConfig, + tree::{merkle_tree_digests_len, TreeBuilderConfig}, + }; + use icicle_cuda_runtime::memory::HostSlice; + use crate::keccak::{build_keccak256_merkle_tree, keccak256}; + + #[test] + fn keccak_hash_test() { + let config = HashConfig::default(); + let input_block_len = 136; + let number_of_hashes = 1024; + + let preimages = vec![1u8; number_of_hashes * input_block_len]; + let mut digests = vec![0u8; number_of_hashes * 64]; + + let preimages_slice = HostSlice::from_slice(&preimages); + let digests_slice = HostSlice::from_mut_slice(&mut digests); + + keccak256( + preimages_slice, + input_block_len as u32, + number_of_hashes as u32, + digests_slice, + &config, + ) + .unwrap(); + } + + #[test] + fn keccak_merkle_tree_test() { + let mut config = TreeBuilderConfig::default(); + config.arity = 2; + let height = 22; + let input_block_len = 136; + let leaves = vec![1u8; (1 << height) * input_block_len]; + let mut digests = vec![0u64; merkle_tree_digests_len((height + 1) as u32, 2, 1)]; + + let leaves_slice = HostSlice::from_slice(&leaves); + let digests_slice = HostSlice::from_mut_slice(&mut digests); + + build_keccak256_merkle_tree(leaves_slice, digests_slice, height, input_block_len, &config).unwrap(); + println!("Root: {:?}", digests_slice[0]); + } +}