From 5b7b85e499b5c6c52a35d5631cff3619e131805b Mon Sep 17 00:00:00 2001 From: Maamoun TK Date: Thu, 14 Dec 2023 11:42:57 +0200 Subject: [PATCH 1/6] Change order of SHA3 Scalar/Simd256 parameters --- benchmarks/sha3.cc | 188 +-- include/Hacl_Hash_SHA3_Scalar.h | 20 +- include/Hacl_Hash_SHA3_Simd256.h | 64 +- include/msvc/Hacl_Hash_SHA3_Scalar.h | 20 +- include/msvc/Hacl_Hash_SHA3_Simd256.h | 64 +- src/Hacl_Hash_SHA3_Scalar.c | 20 +- src/Hacl_Hash_SHA3_Simd256.c | 1654 +++++++++++++------------ tests/sha3.cc | 104 +- 8 files changed, 1124 insertions(+), 1010 deletions(-) diff --git a/benchmarks/sha3.cc b/benchmarks/sha3.cc index 9caf2e54..1deb82c8 100644 --- a/benchmarks/sha3.cc +++ b/benchmarks/sha3.cc @@ -65,7 +65,7 @@ Hacl_Sha3_224_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_224( - input.size(), (uint8_t*)input.data(), digest224_0.data()); + digest224_0.data(), (uint8_t*)input.data(), input.size()); } if (digest224_0 != expected_digest_sha3_224) { state.SkipWithError("Incorrect digest."); @@ -85,15 +85,15 @@ Hacl_Sha3_224_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_224(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_224(digest224_0.data(), + digest224_1.data(), + digest224_2.data(), + digest224_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest224_0.data(), - digest224_1.data(), - digest224_2.data(), - digest224_3.data()); + input.size()); } if (digest224_0 != expected_digest_sha3_224 || digest224_1 != expected_digest_sha3_224 || @@ -132,12 +132,39 @@ Hacl_Sha3_256(benchmark::State& state) BENCHMARK(Hacl_Sha3_256)->Setup(DoSetup); +#include "sha3.h" + +static void +Digestif_sha3_256(benchmark::State& state) +{ + bytes digest(32, 0); + + for (auto _ : state) { + + sha3_ctx ctx; + digestif_sha3_init(&ctx, 256); + + for (auto chunk : chunk(input, chunk_len)) { + digestif_sha3_update(&ctx, chunk.data(), chunk.size()); + } + + digestif_sha3_finalize(&ctx, digest.data(), 0x06); + } + + if (digest != expected_digest_sha3_256) { + state.SkipWithError("Incorrect digest."); + return; + } +} + +BENCHMARK(Digestif_sha3_256)->Setup(DoSetup); + static void Hacl_Sha3_256_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_256( - input.size(), (uint8_t*)input.data(), digest256_0.data()); + digest256_0.data(), (uint8_t*)input.data(), input.size()); } if (digest256_0 != expected_digest_sha3_256) { state.SkipWithError("Incorrect digest."); @@ -157,15 +184,15 @@ Hacl_Sha3_256_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_256(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_256(digest256_0.data(), + digest256_1.data(), + digest256_2.data(), + digest256_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest256_0.data(), - digest256_1.data(), - digest256_2.data(), - digest256_3.data()); + input.size()); } if (digest256_0 != expected_digest_sha3_256 || digest256_1 != expected_digest_sha3_256 || @@ -179,33 +206,6 @@ Hacl_Sha3_256_Simd256(benchmark::State& state) BENCHMARK(Hacl_Sha3_256_Simd256)->Setup(DoSetup); #endif -#include "sha3.h" - -static void -Digestif_sha3_256(benchmark::State& state) -{ - bytes digest(32, 0); - - for (auto _ : state) { - - sha3_ctx ctx; - digestif_sha3_init(&ctx, 256); - - for (auto chunk : chunk(input, chunk_len)) { - digestif_sha3_update(&ctx, chunk.data(), chunk.size()); - } - - digestif_sha3_finalize(&ctx, digest.data(), 0x06); - } - - if (digest != expected_digest_sha3_256) { - state.SkipWithError("Incorrect digest."); - return; - } -} - -BENCHMARK(Digestif_sha3_256)->Setup(DoSetup); - #ifndef NO_OPENSSL BENCHMARK_CAPTURE(OpenSSL_hash_oneshot, sha3_256, @@ -236,7 +236,7 @@ Hacl_Sha3_384_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_384( - input.size(), (uint8_t*)input.data(), digest384_0.data()); + digest384_0.data(), (uint8_t*)input.data(), input.size()); } if (digest384_0 != expected_digest_sha3_384) { state.SkipWithError("Incorrect digest."); @@ -256,15 +256,15 @@ Hacl_Sha3_384_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_384(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_384(digest384_0.data(), + digest384_1.data(), + digest384_2.data(), + digest384_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest384_0.data(), - digest384_1.data(), - digest384_2.data(), - digest384_3.data()); + input.size()); } if (digest384_0 != expected_digest_sha3_384 || digest384_1 != expected_digest_sha3_384 || @@ -303,12 +303,37 @@ Hacl_Sha3_512(benchmark::State& state) BENCHMARK(Hacl_Sha3_512)->Setup(DoSetup); +static void +Digestif_sha3_512(benchmark::State& state) +{ + bytes digest(64, 0); + + for (auto _ : state) { + + sha3_ctx ctx; + digestif_sha3_init(&ctx, 512); + + for (auto chunk : chunk(input, chunk_len)) { + digestif_sha3_update(&ctx, chunk.data(), chunk.size()); + } + + digestif_sha3_finalize(&ctx, digest.data(), 0x06); + } + + if (digest != expected_digest_sha3_512) { + state.SkipWithError("Incorrect digest."); + return; + } +} + +BENCHMARK(Digestif_sha3_512)->Setup(DoSetup); + static void Hacl_Sha3_512_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_512( - input.size(), (uint8_t*)input.data(), digest512_0.data()); + digest512_0.data(), (uint8_t*)input.data(), input.size()); } if (digest512_0 != expected_digest_sha3_512) { state.SkipWithError("Incorrect digest."); @@ -328,15 +353,15 @@ Hacl_Sha3_512_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_512(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_512(digest512_0.data(), + digest512_1.data(), + digest512_2.data(), + digest512_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest512_0.data(), - digest512_1.data(), - digest512_2.data(), - digest512_3.data()); + input.size()); } if (digest512_0 != expected_digest_sha3_512 || digest512_1 != expected_digest_sha3_512 || @@ -350,31 +375,6 @@ Hacl_Sha3_512_Simd256(benchmark::State& state) BENCHMARK(Hacl_Sha3_512_Simd256)->Setup(DoSetup); #endif -static void -Digestif_sha3_512(benchmark::State& state) -{ - bytes digest(64, 0); - - for (auto _ : state) { - - sha3_ctx ctx; - digestif_sha3_init(&ctx, 512); - - for (auto chunk : chunk(input, chunk_len)) { - digestif_sha3_update(&ctx, chunk.data(), chunk.size()); - } - - digestif_sha3_finalize(&ctx, digest.data(), 0x06); - } - - if (digest != expected_digest_sha3_512) { - state.SkipWithError("Incorrect digest."); - return; - } -} - -BENCHMARK(Digestif_sha3_512)->Setup(DoSetup); - #ifndef NO_OPENSSL BENCHMARK_CAPTURE(OpenSSL_hash_oneshot, sha3_512, @@ -469,10 +469,10 @@ static void Hacl_Sha3_shake128_Scalar(benchmark::State& state) { for (auto _ : state) { - Hacl_Hash_SHA3_Scalar_shake128(input.size(), - (uint8_t*)input.data(), + Hacl_Hash_SHA3_Scalar_shake128(digest_shake_0.data(), digest_shake_0.size(), - digest_shake_0.data()); + (uint8_t*)input.data(), + input.size()); } } @@ -488,16 +488,16 @@ Hacl_Sha3_shake128_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_shake128(input.size(), + Hacl_Hash_SHA3_Simd256_shake128(digest_shake_0.data(), + digest_shake_1.data(), + digest_shake_2.data(), + digest_shake_3.data(), + digest_shake_0.size(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest_shake_0.size(), - digest_shake_0.data(), - digest_shake_1.data(), - digest_shake_2.data(), - digest_shake_3.data()); + input.size()); } } @@ -521,10 +521,10 @@ static void Hacl_Sha3_shake256_Scalar(benchmark::State& state) { for (auto _ : state) { - Hacl_Hash_SHA3_Scalar_shake256(input.size(), - (uint8_t*)input.data(), + Hacl_Hash_SHA3_Scalar_shake256(digest_shake_0.data(), digest_shake_0.size(), - digest_shake_0.data()); + (uint8_t*)input.data(), + input.size()); } } @@ -540,16 +540,16 @@ Hacl_Sha3_shake256_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_shake256(input.size(), + Hacl_Hash_SHA3_Simd256_shake256(digest_shake_0.data(), + digest_shake_1.data(), + digest_shake_2.data(), + digest_shake_3.data(), + digest_shake_0.size(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest_shake_0.size(), - digest_shake_0.data(), - digest_shake_1.data(), - digest_shake_2.data(), - digest_shake_3.data()); + input.size()); } } diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index e49f1967..2063da71 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -37,27 +37,27 @@ extern "C" { void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); #if defined(__cplusplus) } diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index 3dd3772d..22efc736 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -58,82 +58,82 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_; void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); #if defined(__cplusplus) diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index e49f1967..2063da71 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -37,27 +37,27 @@ extern "C" { void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); #if defined(__cplusplus) } diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index 3dd3772d..22efc736 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -58,82 +58,82 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_; void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); #if defined(__cplusplus) diff --git a/src/Hacl_Hash_SHA3_Scalar.c b/src/Hacl_Hash_SHA3_Scalar.c index 43d57482..724426eb 100644 --- a/src/Hacl_Hash_SHA3_Scalar.c +++ b/src/Hacl_Hash_SHA3_Scalar.c @@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] = void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 168U; @@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128( void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 136U; @@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256( memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 144U; uint64_t s[25U] = { 0U }; @@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 136U; uint64_t s[25U] = { 0U }; @@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 104U; uint64_t s[25U] = { 0U }; @@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 72U; uint64_t s[25U] = { 0U }; diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c index b9bfcee5..fbe195f5 100644 --- a/src/Hacl_Hash_SHA3_Simd256.c +++ b/src/Hacl_Hash_SHA3_Simd256.c @@ -30,16 +30,16 @@ void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -438,63 +438,63 @@ Hacl_Hash_SHA3_Simd256_shake128( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -723,57 +723,57 @@ Hacl_Hash_SHA3_Simd256_shake128( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -1332,25 +1332,34 @@ Hacl_Hash_SHA3_Simd256_shake128( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -1691,30 +1700,40 @@ Hacl_Hash_SHA3_Simd256_shake128( memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -2113,63 +2132,63 @@ Hacl_Hash_SHA3_Simd256_shake256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -2398,57 +2417,57 @@ Hacl_Hash_SHA3_Simd256_shake256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -3007,25 +3026,34 @@ Hacl_Hash_SHA3_Simd256_shake256( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -3366,29 +3394,39 @@ Hacl_Hash_SHA3_Simd256_shake256( memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + outputByteLen - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -3787,63 +3825,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -4072,57 +4110,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -4681,25 +4719,34 @@ Hacl_Hash_SHA3_Simd256_sha3_224( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -5040,29 +5087,39 @@ Hacl_Hash_SHA3_Simd256_sha3_224( memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 28U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + 28U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + 28U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + 28U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -5461,63 +5518,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -5746,57 +5803,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -6355,25 +6412,34 @@ Hacl_Hash_SHA3_Simd256_sha3_256( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -6714,29 +6780,39 @@ Hacl_Hash_SHA3_Simd256_sha3_256( memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 32U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + 32U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + 32U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + 32U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -7135,63 +7211,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -7420,57 +7496,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -8029,25 +8105,34 @@ Hacl_Hash_SHA3_Simd256_sha3_384( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -8388,29 +8473,39 @@ Hacl_Hash_SHA3_Simd256_sha3_384( memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 48U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + 48U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + 48U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + 48U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -8809,63 +8904,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -9094,57 +9189,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b36 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -9703,25 +9798,34 @@ Hacl_Hash_SHA3_Simd256_sha3_512( } for (uint32_t i = 0U; i < rateInBytes / 32U; i++) { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); + } + if (rateInBytes % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 32U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 64U, + rateInBytes % 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, + hbuf + rateInBytes / 32U * 128U + 96U, + rateInBytes % 32U * sizeof (uint8_t)); } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -10062,15 +10166,25 @@ Hacl_Hash_SHA3_Simd256_sha3_512( memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + if (remOut % 32U > 0U) + { + uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 64U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b1 + 64U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 32U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b2 + 64U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 64U, + remOut % 32U * sizeof (uint8_t)); + memcpy(b3 + 64U - remOut + remOut / 32U * 32U, + hbuf + remOut / 32U * 128U + 96U, + remOut % 32U * sizeof (uint8_t)); + return; + } } diff --git a/tests/sha3.cc b/tests/sha3.cc index 858c1542..cc88a2e8 100644 --- a/tests/sha3.cc +++ b/tests/sha3.cc @@ -178,7 +178,7 @@ TEST(ApiSuite, ApiTest) uint8_t digest[HACL_HASH_SHA3_256_DIGEST_LENGTH]; - Hacl_Hash_SHA3_Scalar_sha3_256(message_size, (uint8_t*)message, digest); + Hacl_Hash_SHA3_Scalar_sha3_256(digest, (uint8_t*)message, message_size); // ANCHOR_END(example scalar_sha3_256) bytes expected_digest = from_hex( @@ -204,7 +204,7 @@ TEST(ApiSuite, ApiTest) uint8_t digest[42]; Hacl_Hash_SHA3_Scalar_shake128( - message_size, (uint8_t*)message, digest_size, digest); + digest, digest_size, (uint8_t*)message, message_size); // ANCHOR_END(example scalar_shake128) bytes expected_digest = @@ -232,15 +232,15 @@ TEST(ApiSuite, ApiTest) uint8_t digest2[HACL_HASH_SHA3_256_DIGEST_LENGTH]; uint8_t digest3[HACL_HASH_SHA3_256_DIGEST_LENGTH]; - Hacl_Hash_SHA3_Simd256_sha3_256(message_size, + Hacl_Hash_SHA3_Simd256_sha3_256(digest0, + digest1, + digest2, + digest3, (uint8_t*)message, (uint8_t*)message, (uint8_t*)message, (uint8_t*)message, - digest0, - digest1, - digest2, - digest3); + message_size); // ANCHOR_END(example vec256_sha3_256) bytes expected_digest = from_hex( @@ -283,16 +283,16 @@ TEST(ApiSuite, ApiTest) uint8_t digest2[42]; uint8_t digest3[42]; - Hacl_Hash_SHA3_Simd256_shake128(message_size, + Hacl_Hash_SHA3_Simd256_shake128(digest0, + digest1, + digest2, + digest3, + digest_size, (uint8_t*)message0, (uint8_t*)message1, (uint8_t*)message2, (uint8_t*)message3, - digest_size, - digest0, - digest1, - digest2, - digest3); + message_size); // ANCHOR_END(example vec256_shake128) bytes expected_digest0 = from_hex( @@ -356,16 +356,16 @@ TEST_P(Sha3KAT, TryKAT) bytes digest(test_case.md.size(), 0); if (test_case.md.size() == 224 / 8) { Hacl_Hash_SHA3_Scalar_sha3_224( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 256 / 8) { Hacl_Hash_SHA3_Scalar_sha3_256( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 384 / 8) { Hacl_Hash_SHA3_Scalar_sha3_384( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 512 / 8) { Hacl_Hash_SHA3_Scalar_sha3_512( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl @@ -380,45 +380,45 @@ TEST_P(Sha3KAT, TryKAT) bytes digest2(test_case.md.size(), 0); bytes digest3(test_case.md.size(), 0); if (test_case.md.size() == 224 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_224(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_sha3_224(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 256 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_256(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 256 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_256(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 384 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_384(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 384 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_384(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 512 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_512(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 512 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_512(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); } EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl @@ -468,20 +468,20 @@ TEST_P(ShakeKAT, TryKAT) if (test_case.md.size() == 128 / 8) { bytes digest(test_case.md.size(), 128 / 8); - Hacl_Hash_SHA3_Scalar_shake128(test_case.msg.size(), - test_case.msg.data(), + Hacl_Hash_SHA3_Scalar_shake128(digest.data(), digest.size(), - digest.data()); + test_case.msg.data(), + test_case.msg.size()); EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl << bytes_to_hex(digest) << std::endl; } else if (test_case.md.size() == 256 / 8) { bytes digest(test_case.md.size(), 256 / 8); - Hacl_Hash_SHA3_Scalar_shake256(test_case.msg.size(), - test_case.msg.data(), + Hacl_Hash_SHA3_Scalar_shake256(digest.data(), digest.size(), - digest.data()); + test_case.msg.data(), + test_case.msg.size()); EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl << bytes_to_hex(digest) << std::endl; @@ -497,16 +497,16 @@ TEST_P(ShakeKAT, TryKAT) bytes digest2(test_case.md.size(), 128 / 8); bytes digest3(test_case.md.size(), 128 / 8); - Hacl_Hash_SHA3_Simd256_shake128(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_shake128(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), + digest0.size(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.size(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl @@ -526,16 +526,16 @@ TEST_P(ShakeKAT, TryKAT) bytes digest2(test_case.md.size(), 256 / 8); bytes digest3(test_case.md.size(), 256 / 8); - Hacl_Hash_SHA3_Simd256_shake256(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_shake256(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), + digest0.size(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.size(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl From 34a673651b5ca720793c6e86d0ca43f75616b5ba Mon Sep 17 00:00:00 2001 From: Maamoun TK Date: Thu, 14 Dec 2023 18:41:34 +0200 Subject: [PATCH 2/6] Expose shake128_absorb/squeeze functions --- include/Hacl_Hash_SHA3_Scalar.h | 14 + include/Hacl_Hash_SHA3_Simd256.h | 24 + include/msvc/Hacl_Hash_SHA3_Scalar.h | 14 + include/msvc/Hacl_Hash_SHA3_Simd256.h | 24 + src/Hacl_Hash_SHA3_Scalar.c | 402 +++++++ src/Hacl_Hash_SHA3_Simd256.c | 1434 +++++++++++++++++++++++++ 6 files changed, 1912 insertions(+) diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index 2063da71..4b893cd8 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -59,6 +59,20 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t in void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +); + #if defined(__cplusplus) } #endif diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index 22efc736..534a8899 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -136,6 +136,30 @@ Hacl_Hash_SHA3_Simd256_sha3_512( uint32_t inputByteLen ); +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +); + #if defined(__cplusplus) } #endif diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index 2063da71..4b893cd8 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -59,6 +59,20 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t in void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +); + #if defined(__cplusplus) } #endif diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index 22efc736..534a8899 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -136,6 +136,30 @@ Hacl_Hash_SHA3_Simd256_sha3_512( uint32_t inputByteLen ); +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +); + #if defined(__cplusplus) } #endif diff --git a/src/Hacl_Hash_SHA3_Scalar.c b/src/Hacl_Hash_SHA3_Scalar.c index 724426eb..19a1936a 100644 --- a/src/Hacl_Hash_SHA3_Scalar.c +++ b/src/Hacl_Hash_SHA3_Scalar.c @@ -2381,3 +2381,405 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t in memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); } +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____2 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____3 = current; + state[_Y] = uu____3 << r | uu____3 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, state, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c index fbe195f5..a401bd71 100644 --- a/src/Hacl_Hash_SHA3_Simd256.c +++ b/src/Hacl_Hash_SHA3_Simd256.c @@ -10188,3 +10188,1437 @@ Hacl_Hash_SHA3_Simd256_sha3_512( } } +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(100U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t)); + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b_.snd.snd.snd; + uint8_t *b2 = b_.snd.snd.fst; + uint8_t *b1 = b_.snd.fst; + uint8_t *b0 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws1; + ws[2U] = ws2; + ws[3U] = ws3; + ws[4U] = ws4; + ws[5U] = ws5; + ws[6U] = ws6; + ws[7U] = ws7; + ws[8U] = ws8; + ws[9U] = ws9; + ws[10U] = ws10; + ws[11U] = ws11; + ws[12U] = ws12; + ws[13U] = ws13; + ws[14U] = ws14; + ws[15U] = ws15; + ws[16U] = ws16; + ws[17U] = ws17; + ws[18U] = ws18; + ws[19U] = ws19; + ws[20U] = ws20; + ws[21U] = ws21; + ws[22U] = ws22; + ws[23U] = ws23; + ws[24U] = ws24; + ws[25U] = ws25; + ws[26U] = ws26; + ws[27U] = ws27; + ws[28U] = ws28; + ws[29U] = ws29; + ws[30U] = ws30; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } + uint32_t rem = inputByteLen % 168U; + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b33 = b_.snd.snd.snd; + uint8_t *b23 = b_.snd.snd.fst; + uint8_t *b13 = b_.snd.fst; + uint8_t *b03 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws00 = v0__; + Lib_IntVector_Intrinsics_vec256 ws110 = v2__; + Lib_IntVector_Intrinsics_vec256 ws210 = v1__; + Lib_IntVector_Intrinsics_vec256 ws32 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws40 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws50 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws60 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws70 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws80 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws90 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws100 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws111 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws120 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws130 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws140 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws150 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws160 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws170 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws180 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws190 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws200 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws211 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws220 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws230 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws240 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws250 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws260 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws270 = v3__5; + Lib_IntVector_Intrinsics_vec256 v07 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v17 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v27 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v37 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws280 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws290 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws300 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws310 = v3__6; + ws[0U] = ws00; + ws[1U] = ws110; + ws[2U] = ws210; + ws[3U] = ws32; + ws[4U] = ws40; + ws[5U] = ws50; + ws[6U] = ws60; + ws[7U] = ws70; + ws[8U] = ws80; + ws[9U] = ws90; + ws[10U] = ws100; + ws[11U] = ws111; + ws[12U] = ws120; + ws[13U] = ws130; + ws[14U] = ws140; + ws[15U] = ws150; + ws[16U] = ws160; + ws[17U] = ws170; + ws[18U] = ws180; + ws[19U] = ws190; + ws[20U] = ws200; + ws[21U] = ws211; + ws[22U] = ws220; + ws[23U] = ws230; + ws[24U] = ws240; + ws[25U] = ws250; + ws[26U] = ws260; + ws[27U] = ws270; + ws[28U] = ws280; + ws[29U] = ws290; + ws[30U] = ws300; + ws[31U] = ws310; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[167U] = 0x80U; + b15[167U] = 0x80U; + b25[167U] = 0x80U; + b35[167U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; + uint8_t *b2 = b.snd.snd.fst; + uint8_t *b1 = b.snd.fst; + uint8_t *b0 = b.fst; + ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U]; + Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U]; + Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U]; + Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 + v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__7; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__7; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__7; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__7; + Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U]; + Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U]; + Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U]; + Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 + v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__8; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__8; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__8; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__8; + Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U]; + Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U]; + Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U]; + Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 + v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__9; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__9; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__9; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__9; + Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U]; + Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U]; + Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U]; + Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 + v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__10; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__10; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__10; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__10; + Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U]; + Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U]; + Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U]; + Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 + v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__11; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__11; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__11; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__11; + Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U]; + Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U]; + Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U]; + Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 + v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__12; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__12; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__12; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__12; + Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U]; + Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U]; + Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U]; + Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 + v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__13; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__13; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__13; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__13; + Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 + v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__14; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__14; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__14; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__14; + ws33[0U] = ws0; + ws33[1U] = ws1; + ws33[2U] = ws2; + ws33[3U] = ws3; + ws33[4U] = ws4; + ws33[5U] = ws5; + ws33[6U] = ws6; + ws33[7U] = ws7; + ws33[8U] = ws8; + ws33[9U] = ws9; + ws33[10U] = ws10; + ws33[11U] = ws11; + ws33[12U] = ws12; + ws33[13U] = ws13; + ws33[14U] = ws14; + ws33[15U] = ws15; + ws33[16U] = ws16; + ws33[17U] = ws17; + ws33[18U] = ws18; + ws33[19U] = ws19; + ws33[20U] = ws20; + ws33[21U] = ws21; + ws33[22U] = ws22; + ws33[23U] = ws23; + ws33[24U] = ws24; + ws33[25U] = ws25; + ws33[26U] = ws26; + ws33[27U] = ws27; + ws33[28U] = ws28; + ws33[29U] = ws29; + ws33[30U] = ws30; + ws33[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]); + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____17 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____18 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____19 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____17, + Lib_IntVector_Intrinsics_vec256_xor(uu____18, + Lib_IntVector_Intrinsics_vec256_xor(uu____19, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____20 = _C[(i1 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____21 = _C[(i1 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____20, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____21, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____21, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____22 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____22, r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____22, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____23 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____24 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v015 = + Lib_IntVector_Intrinsics_vec256_xor(uu____23, + Lib_IntVector_Intrinsics_vec256_and(uu____24, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____25 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____26 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v115 = + Lib_IntVector_Intrinsics_vec256_xor(uu____25, + Lib_IntVector_Intrinsics_vec256_and(uu____26, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____27 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____28 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v215 = + Lib_IntVector_Intrinsics_vec256_xor(uu____27, + Lib_IntVector_Intrinsics_vec256_and(uu____28, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____29 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____30 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v315 = + Lib_IntVector_Intrinsics_vec256_xor(uu____29, + Lib_IntVector_Intrinsics_vec256_and(uu____30, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____31 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____32 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____31, + Lib_IntVector_Intrinsics_vec256_and(uu____32, state[1U + 5U * i])); + state[0U + 5U * i] = v015; + state[1U + 5U * i] = v115; + state[2U + 5U * i] = v215; + state[3U + 5U * i] = v315; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + Lib_IntVector_Intrinsics_vec256 uu____33 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____33, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[1024U] = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws1; + ws[2U] = ws2; + ws[3U] = ws3; + ws[4U] = ws4; + ws[5U] = ws5; + ws[6U] = ws6; + ws[7U] = ws7; + ws[8U] = ws8; + ws[9U] = ws9; + ws[10U] = ws10; + ws[11U] = ws11; + ws[12U] = ws12; + ws[13U] = ws13; + ws[14U] = ws14; + ws[15U] = ws15; + ws[16U] = ws16; + ws[17U] = ws17; + ws[18U] = ws18; + ws[19U] = ws19; + ws[20U] = ws20; + ws[21U] = ws21; + ws[22U] = ws22; + ws[23U] = ws23; + ws[24U] = ws24; + ws[25U] = ws25; + ws[26U] = ws26; + ws[27U] = ws27; + ws[28U] = ws28; + ws[29U] = ws29; + ws[30U] = ws30; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 32U; i++) + { + Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint8_t *b0 = output0; + uint8_t *b1 = output1; + uint8_t *b2 = output2; + uint8_t *b3 = output3; + memcpy(b0 + i0 * 168U + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));); + uint8_t *b0 = output0; + uint8_t *b1 = output1; + uint8_t *b2 = output2; + uint8_t *b3 = output3; + memcpy(b0 + i0 * 168U + 160U, hbuf + 640U, 8U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U + 160U, hbuf + 672U, 8U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U + 160U, hbuf + 704U, 8U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U + 160U, hbuf + 736U, 8U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } +} + From f1d02ee1245a46b2b52150719c6a9feb2e0a3642 Mon Sep 17 00:00:00 2001 From: mamonet Date: Mon, 15 Jan 2024 20:21:19 +0200 Subject: [PATCH 3/6] Add SHA3 absorb nblocks/last API --- include/Hacl_Hash_SHA3_Scalar.h | 14 + include/Hacl_Hash_SHA3_Simd256.h | 14 +- include/msvc/Hacl_Hash_SHA3_Scalar.h | 14 + include/msvc/Hacl_Hash_SHA3_Simd256.h | 14 +- src/Hacl_Hash_SHA3_Scalar.c | 338 +++ src/Hacl_Hash_SHA3_Simd256.c | 1457 ++++------ src/msvc/Hacl_Hash_SHA3_Scalar.c | 760 ++++- src/msvc/Hacl_Hash_SHA3_Simd256.c | 3685 +++++++++++++++++-------- 8 files changed, 4216 insertions(+), 2080 deletions(-) diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index 4b893cd8..d0b7f253 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -63,6 +63,20 @@ uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_last( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + void Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index 534a8899..d231e273 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -35,6 +35,8 @@ extern "C" { #include "krml/lowstar_endianness.h" #include "krml/internal/target.h" +#include "libintvector.h" + typedef struct K____uint8_t___uint8_t__s { uint8_t *fst; @@ -141,7 +143,17 @@ uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); void -Hacl_Hash_SHA3_Simd256_shake128_absorb( +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_last( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index 4b893cd8..d0b7f253 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -63,6 +63,20 @@ uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_last( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + void Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index 534a8899..d231e273 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -35,6 +35,8 @@ extern "C" { #include "krml/lowstar_endianness.h" #include "krml/internal/target.h" +#include "libintvector.h" + typedef struct K____uint8_t___uint8_t__s { uint8_t *fst; @@ -141,7 +143,17 @@ uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); void -Hacl_Hash_SHA3_Simd256_shake128_absorb( +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_last( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, diff --git a/src/Hacl_Hash_SHA3_Scalar.c b/src/Hacl_Hash_SHA3_Scalar.c index 19a1936a..7393ebf2 100644 --- a/src/Hacl_Hash_SHA3_Scalar.c +++ b/src/Hacl_Hash_SHA3_Scalar.c @@ -2392,6 +2392,344 @@ void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s) KRML_HOST_FREE(s); } +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_last( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + void Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen) { diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c index a401bd71..9748a375 100644 --- a/src/Hacl_Hash_SHA3_Simd256.c +++ b/src/Hacl_Hash_SHA3_Simd256.c @@ -26,7 +26,6 @@ #include "Hacl_Hash_SHA3_Simd256.h" #include "internal/Hacl_Hash_SHA3_Scalar.h" -#include "libintvector.h" void Hacl_Hash_SHA3_Simd256_shake128( @@ -738,42 +737,42 @@ Hacl_Hash_SHA3_Simd256_shake128( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -1295,71 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -1654,72 +1631,49 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void @@ -2432,42 +2386,42 @@ Hacl_Hash_SHA3_Simd256_shake256( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -2989,71 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -3348,72 +3280,49 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void @@ -4125,42 +4034,42 @@ Hacl_Hash_SHA3_Simd256_sha3_224( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -4682,71 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -5041,72 +4928,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void @@ -5818,42 +5682,42 @@ Hacl_Hash_SHA3_Simd256_sha3_256( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -6375,71 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -6734,72 +6576,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void @@ -7511,42 +7330,42 @@ Hacl_Hash_SHA3_Simd256_sha3_384( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -8068,71 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -8427,72 +8224,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void @@ -9204,42 +8978,42 @@ Hacl_Hash_SHA3_Simd256_sha3_512( b25[rateInBytes - 1U] = 0x80U; b35[rateInBytes - 1U] = 0x80U; KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b36 = b.snd.snd.snd; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b36 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -9761,71 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (rateInBytes % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 32U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 64U, - rateInBytes % 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * rateInBytes + rateInBytes / 32U * 32U, - hbuf + rateInBytes / 32U * 128U + 96U, - rateInBytes % 32U * sizeof (uint8_t)); - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -10120,72 +9872,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - if (remOut % 32U > 0U) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 32U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 64U, - remOut % 32U * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + remOut / 32U * 32U, - hbuf + remOut / 32U * 128U + 96U, - remOut % 32U * sizeof (uint8_t)); - return; - } + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void) @@ -10200,7 +9929,7 @@ void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s) } void -Hacl_Hash_SHA3_Simd256_shake128_absorb( +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, @@ -10591,6 +10320,18 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb( Lib_IntVector_Intrinsics_vec256_load64(c)); } } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_last( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ uint32_t rem = inputByteLen % 168U; uint8_t b00[256U] = { 0U }; uint8_t b10[256U] = { 0U }; @@ -11170,26 +10911,26 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb( 0U, 5U, 1U, - Lib_IntVector_Intrinsics_vec256 uu____17 = state[i + 0U]; - Lib_IntVector_Intrinsics_vec256 uu____18 = state[i + 5U]; - Lib_IntVector_Intrinsics_vec256 uu____19 = state[i + 10U]; + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; _C[i] = - Lib_IntVector_Intrinsics_vec256_xor(uu____17, - Lib_IntVector_Intrinsics_vec256_xor(uu____18, - Lib_IntVector_Intrinsics_vec256_xor(uu____19, + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); KRML_MAYBE_FOR5(i1, 0U, 5U, 1U, - Lib_IntVector_Intrinsics_vec256 uu____20 = _C[(i1 + 4U) % 5U]; - Lib_IntVector_Intrinsics_vec256 uu____21 = _C[(i1 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U]; Lib_IntVector_Intrinsics_vec256 _D = - Lib_IntVector_Intrinsics_vec256_xor(uu____20, - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____21, + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, 1U), - Lib_IntVector_Intrinsics_vec256_shift_right64(uu____21, 63U))); + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); KRML_MAYBE_FOR5(i, 0U, 5U, @@ -11202,60 +10943,60 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb( uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; - Lib_IntVector_Intrinsics_vec256 uu____22 = current; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; state[_Y] = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____22, r), - Lib_IntVector_Intrinsics_vec256_shift_right64(uu____22, 64U - r)); + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); current = temp; } KRML_MAYBE_FOR5(i, 0U, 5U, 1U, - Lib_IntVector_Intrinsics_vec256 uu____23 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; Lib_IntVector_Intrinsics_vec256 - uu____24 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); Lib_IntVector_Intrinsics_vec256 v015 = - Lib_IntVector_Intrinsics_vec256_xor(uu____23, - Lib_IntVector_Intrinsics_vec256_and(uu____24, state[2U + 5U * i])); - Lib_IntVector_Intrinsics_vec256 uu____25 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; Lib_IntVector_Intrinsics_vec256 - uu____26 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); Lib_IntVector_Intrinsics_vec256 v115 = - Lib_IntVector_Intrinsics_vec256_xor(uu____25, - Lib_IntVector_Intrinsics_vec256_and(uu____26, state[3U + 5U * i])); - Lib_IntVector_Intrinsics_vec256 uu____27 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; Lib_IntVector_Intrinsics_vec256 - uu____28 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); Lib_IntVector_Intrinsics_vec256 v215 = - Lib_IntVector_Intrinsics_vec256_xor(uu____27, - Lib_IntVector_Intrinsics_vec256_and(uu____28, state[4U + 5U * i])); - Lib_IntVector_Intrinsics_vec256 uu____29 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; Lib_IntVector_Intrinsics_vec256 - uu____30 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); Lib_IntVector_Intrinsics_vec256 v315 = - Lib_IntVector_Intrinsics_vec256_xor(uu____29, - Lib_IntVector_Intrinsics_vec256_and(uu____30, state[0U + 5U * i])); - Lib_IntVector_Intrinsics_vec256 uu____31 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; Lib_IntVector_Intrinsics_vec256 - uu____32 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); Lib_IntVector_Intrinsics_vec256 v4 = - Lib_IntVector_Intrinsics_vec256_xor(uu____31, - Lib_IntVector_Intrinsics_vec256_and(uu____32, state[1U + 5U * i])); + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); state[0U + 5U * i] = v015; state[1U + 5U * i] = v115; state[2U + 5U * i] = v215; state[3U + 5U * i] = v315; state[4U + 5U * i] = v4;); uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; - Lib_IntVector_Intrinsics_vec256 uu____33 = state[0U]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; state[0U] = - Lib_IntVector_Intrinsics_vec256_xor(uu____33, + Lib_IntVector_Intrinsics_vec256_xor(uu____16, Lib_IntVector_Intrinsics_vec256_load64(c)); } } @@ -11468,61 +11209,49 @@ Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - uint8_t *b0 = output0; - uint8_t *b1 = output1; - uint8_t *b2 = output2; - uint8_t *b3 = output3; - memcpy(b0 + i0 * 168U + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + i0 * 168U + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + i0 * 168U + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + i0 * 168U + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));); uint8_t *b0 = output0; uint8_t *b1 = output1; uint8_t *b2 = output2; uint8_t *b3 = output3; - memcpy(b0 + i0 * 168U + 160U, hbuf + 640U, 8U * sizeof (uint8_t)); - memcpy(b1 + i0 * 168U + 160U, hbuf + 672U, 8U * sizeof (uint8_t)); - memcpy(b2 + i0 * 168U + 160U, hbuf + 704U, 8U * sizeof (uint8_t)); - memcpy(b3 + i0 * 168U + 160U, hbuf + 736U, 8U * sizeof (uint8_t)); + memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; diff --git a/src/msvc/Hacl_Hash_SHA3_Scalar.c b/src/msvc/Hacl_Hash_SHA3_Scalar.c index 43d57482..7393ebf2 100644 --- a/src/msvc/Hacl_Hash_SHA3_Scalar.c +++ b/src/msvc/Hacl_Hash_SHA3_Scalar.c @@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] = void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 168U; @@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128( void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 136U; @@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256( memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 144U; uint64_t s[25U] = { 0U }; @@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 136U; uint64_t s[25U] = { 0U }; @@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 104U; uint64_t s[25U] = { 0U }; @@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 72U; uint64_t s[25U] = { 0U }; @@ -2381,3 +2381,743 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); } +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_last( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____2 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____3 = current; + state[_Y] = uu____3 << r | uu____3 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, state, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c index b9bfcee5..9748a375 100644 --- a/src/msvc/Hacl_Hash_SHA3_Simd256.c +++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c @@ -26,20 +26,19 @@ #include "Hacl_Hash_SHA3_Simd256.h" #include "internal/Hacl_Hash_SHA3_Scalar.h" -#include "libintvector.h" void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -438,63 +437,63 @@ Hacl_Hash_SHA3_Simd256_shake128( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -723,57 +722,57 @@ Hacl_Hash_SHA3_Simd256_shake128( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -1295,62 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -1645,76 +1631,63 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -2113,63 +2086,63 @@ Hacl_Hash_SHA3_Simd256_shake256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -2398,57 +2371,57 @@ Hacl_Hash_SHA3_Simd256_shake256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -2970,62 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -3320,75 +3280,62 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -3787,63 +3734,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -4072,57 +4019,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -4644,62 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -4994,75 +4928,62 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -5461,63 +5382,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -5746,57 +5667,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -6318,62 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -6668,75 +6576,62 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -7135,63 +7030,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -7420,57 +7315,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -7992,62 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -8342,75 +8224,62 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, - uint8_t *input0, - uint8_t *input1, - uint8_t *input2, - uint8_t *input3, uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -8809,63 +8678,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -9094,57 +8963,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -9666,62 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -10016,61 +9872,1482 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); +} + +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(100U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t)); + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b_.snd.snd.snd; + uint8_t *b2 = b_.snd.snd.fst; + uint8_t *b1 = b_.snd.fst; + uint8_t *b0 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws1; + ws[2U] = ws2; + ws[3U] = ws3; + ws[4U] = ws4; + ws[5U] = ws5; + ws[6U] = ws6; + ws[7U] = ws7; + ws[8U] = ws8; + ws[9U] = ws9; + ws[10U] = ws10; + ws[11U] = ws11; + ws[12U] = ws12; + ws[13U] = ws13; + ws[14U] = ws14; + ws[15U] = ws15; + ws[16U] = ws16; + ws[17U] = ws17; + ws[18U] = ws18; + ws[19U] = ws19; + ws[20U] = ws20; + ws[21U] = ws21; + ws[22U] = ws22; + ws[23U] = ws23; + ws[24U] = ws24; + ws[25U] = ws25; + ws[26U] = ws26; + ws[27U] = ws27; + ws[28U] = ws28; + ws[29U] = ws29; + ws[30U] = ws30; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_last( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b33 = b_.snd.snd.snd; + uint8_t *b23 = b_.snd.snd.fst; + uint8_t *b13 = b_.snd.fst; + uint8_t *b03 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws00 = v0__; + Lib_IntVector_Intrinsics_vec256 ws110 = v2__; + Lib_IntVector_Intrinsics_vec256 ws210 = v1__; + Lib_IntVector_Intrinsics_vec256 ws32 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws40 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws50 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws60 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws70 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws80 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws90 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws100 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws111 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws120 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws130 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws140 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws150 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws160 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws170 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws180 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws190 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws200 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws211 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws220 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws230 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws240 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws250 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws260 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws270 = v3__5; + Lib_IntVector_Intrinsics_vec256 v07 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v17 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v27 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v37 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws280 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws290 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws300 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws310 = v3__6; + ws[0U] = ws00; + ws[1U] = ws110; + ws[2U] = ws210; + ws[3U] = ws32; + ws[4U] = ws40; + ws[5U] = ws50; + ws[6U] = ws60; + ws[7U] = ws70; + ws[8U] = ws80; + ws[9U] = ws90; + ws[10U] = ws100; + ws[11U] = ws111; + ws[12U] = ws120; + ws[13U] = ws130; + ws[14U] = ws140; + ws[15U] = ws150; + ws[16U] = ws160; + ws[17U] = ws170; + ws[18U] = ws180; + ws[19U] = ws190; + ws[20U] = ws200; + ws[21U] = ws211; + ws[22U] = ws220; + ws[23U] = ws230; + ws[24U] = ws240; + ws[25U] = ws250; + ws[26U] = ws260; + ws[27U] = ws270; + ws[28U] = ws280; + ws[29U] = ws290; + ws[30U] = ws300; + ws[31U] = ws310; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[167U] = 0x80U; + b15[167U] = 0x80U; + b25[167U] = 0x80U; + b35[167U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; + uint8_t *b2 = b.snd.snd.fst; + uint8_t *b1 = b.snd.fst; + uint8_t *b0 = b.fst; + ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U]; + Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U]; + Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U]; + Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 + v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__7; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__7; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__7; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__7; + Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U]; + Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U]; + Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U]; + Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 + v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__8; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__8; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__8; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__8; + Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U]; + Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U]; + Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U]; + Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 + v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__9; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__9; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__9; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__9; + Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U]; + Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U]; + Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U]; + Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 + v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__10; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__10; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__10; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__10; + Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U]; + Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U]; + Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U]; + Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 + v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__11; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__11; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__11; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__11; + Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U]; + Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U]; + Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U]; + Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 + v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__12; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__12; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__12; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__12; + Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U]; + Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U]; + Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U]; + Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 + v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__13; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__13; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__13; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__13; + Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 + v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__14; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__14; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__14; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__14; + ws33[0U] = ws0; + ws33[1U] = ws1; + ws33[2U] = ws2; + ws33[3U] = ws3; + ws33[4U] = ws4; + ws33[5U] = ws5; + ws33[6U] = ws6; + ws33[7U] = ws7; + ws33[8U] = ws8; + ws33[9U] = ws9; + ws33[10U] = ws10; + ws33[11U] = ws11; + ws33[12U] = ws12; + ws33[13U] = ws13; + ws33[14U] = ws14; + ws33[15U] = ws15; + ws33[16U] = ws16; + ws33[17U] = ws17; + ws33[18U] = ws18; + ws33[19U] = ws19; + ws33[20U] = ws20; + ws33[21U] = ws21; + ws33[22U] = ws22; + ws33[23U] = ws23; + ws33[24U] = ws24; + ws33[25U] = ws25; + ws33[26U] = ws26; + ws33[27U] = ws27; + ws33[28U] = ws28; + ws33[29U] = ws29; + ws33[30U] = ws30; + ws33[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]); + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v015 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v115 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v215 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v315 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v015; + state[1U + 5U * i] = v115; + state[2U + 5U * i] = v215; + state[3U + 5U * i] = v315; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[1024U] = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 32U; i++) + { + Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); + } + uint8_t *b0 = output0; + uint8_t *b1 = output1; + uint8_t *b2 = output2; + uint8_t *b3 = output3; + memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } } From 9586ae756e7983ae63dedcf133e80c2b67f3bd61 Mon Sep 17 00:00:00 2001 From: mamonet Date: Mon, 15 Jan 2024 20:58:44 +0200 Subject: [PATCH 4/6] Remove shake128_absorb API --- include/Hacl_Hash_SHA3_Scalar.h | 5 +- include/Hacl_Hash_SHA3_Simd256.h | 2 +- include/msvc/Hacl_Hash_SHA3_Scalar.h | 5 +- include/msvc/Hacl_Hash_SHA3_Simd256.h | 2 +- src/Hacl_Hash_SHA3_Scalar.c | 327 +------------------------- src/Hacl_Hash_SHA3_Simd256.c | 2 +- src/msvc/Hacl_Hash_SHA3_Scalar.c | 327 +------------------------- src/msvc/Hacl_Hash_SHA3_Simd256.c | 2 +- 8 files changed, 8 insertions(+), 664 deletions(-) diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index d0b7f253..63cf8710 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -71,15 +71,12 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( ); void -Hacl_Hash_SHA3_Scalar_shake128_absorb_last( +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, uint8_t *input, uint32_t inputByteLen ); -void -Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); - void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index d231e273..25c1a166 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -153,7 +153,7 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( ); void -Hacl_Hash_SHA3_Simd256_shake128_absorb_last( +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index d0b7f253..63cf8710 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -71,15 +71,12 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( ); void -Hacl_Hash_SHA3_Scalar_shake128_absorb_last( +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, uint8_t *input, uint32_t inputByteLen ); -void -Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen); - void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index d231e273..25c1a166 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -153,7 +153,7 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( ); void -Hacl_Hash_SHA3_Simd256_shake128_absorb_last( +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, diff --git a/src/Hacl_Hash_SHA3_Scalar.c b/src/Hacl_Hash_SHA3_Scalar.c index 7393ebf2..6d6806a3 100644 --- a/src/Hacl_Hash_SHA3_Scalar.c +++ b/src/Hacl_Hash_SHA3_Scalar.c @@ -2526,7 +2526,7 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( } void -Hacl_Hash_SHA3_Scalar_shake128_absorb_last( +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, uint8_t *input, uint32_t inputByteLen @@ -2730,331 +2730,6 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_last( } } -void -Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen) -{ - for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) - { - uint8_t b1[256U] = { 0U }; - uint8_t *b_ = b1; - uint8_t *b0 = input; - uint8_t *bl0 = b_; - memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); - uint64_t ws[32U] = { 0U }; - uint8_t *b = b_; - uint64_t u = load64_le(b); - ws[0U] = u; - uint64_t u0 = load64_le(b + 8U); - ws[1U] = u0; - uint64_t u1 = load64_le(b + 16U); - ws[2U] = u1; - uint64_t u2 = load64_le(b + 24U); - ws[3U] = u2; - uint64_t u3 = load64_le(b + 32U); - ws[4U] = u3; - uint64_t u4 = load64_le(b + 40U); - ws[5U] = u4; - uint64_t u5 = load64_le(b + 48U); - ws[6U] = u5; - uint64_t u6 = load64_le(b + 56U); - ws[7U] = u6; - uint64_t u7 = load64_le(b + 64U); - ws[8U] = u7; - uint64_t u8 = load64_le(b + 72U); - ws[9U] = u8; - uint64_t u9 = load64_le(b + 80U); - ws[10U] = u9; - uint64_t u10 = load64_le(b + 88U); - ws[11U] = u10; - uint64_t u11 = load64_le(b + 96U); - ws[12U] = u11; - uint64_t u12 = load64_le(b + 104U); - ws[13U] = u12; - uint64_t u13 = load64_le(b + 112U); - ws[14U] = u13; - uint64_t u14 = load64_le(b + 120U); - ws[15U] = u14; - uint64_t u15 = load64_le(b + 128U); - ws[16U] = u15; - uint64_t u16 = load64_le(b + 136U); - ws[17U] = u16; - uint64_t u17 = load64_le(b + 144U); - ws[18U] = u17; - uint64_t u18 = load64_le(b + 152U); - ws[19U] = u18; - uint64_t u19 = load64_le(b + 160U); - ws[20U] = u19; - uint64_t u20 = load64_le(b + 168U); - ws[21U] = u20; - uint64_t u21 = load64_le(b + 176U); - ws[22U] = u21; - uint64_t u22 = load64_le(b + 184U); - ws[23U] = u22; - uint64_t u23 = load64_le(b + 192U); - ws[24U] = u23; - uint64_t u24 = load64_le(b + 200U); - ws[25U] = u24; - uint64_t u25 = load64_le(b + 208U); - ws[26U] = u25; - uint64_t u26 = load64_le(b + 216U); - ws[27U] = u26; - uint64_t u27 = load64_le(b + 224U); - ws[28U] = u27; - uint64_t u28 = load64_le(b + 232U); - ws[29U] = u28; - uint64_t u29 = load64_le(b + 240U); - ws[30U] = u29; - uint64_t u30 = load64_le(b + 248U); - ws[31U] = u30; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws[i]; - } - for (uint32_t i1 = 0U; i1 < 24U; i1++) - { - uint64_t _C[5U] = { 0U }; - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - _C[i] = - state[i - + 0U] - ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); - KRML_MAYBE_FOR5(i2, - 0U, - 5U, - 1U, - uint64_t uu____0 = _C[(i2 + 1U) % 5U]; - uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); - KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); - uint64_t x = state[1U]; - uint64_t current = x; - for (uint32_t i = 0U; i < 24U; i++) - { - uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; - uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; - uint64_t temp = state[_Y]; - uint64_t uu____1 = current; - state[_Y] = uu____1 << r | uu____1 >> (64U - r); - current = temp; - } - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); - uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); - uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); - uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); - uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); - state[0U + 5U * i] = v0; - state[1U + 5U * i] = v1; - state[2U + 5U * i] = v2; - state[3U + 5U * i] = v3; - state[4U + 5U * i] = v4;); - uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; - state[0U] = state[0U] ^ c; - } - } - uint32_t rem = inputByteLen % 168U; - uint8_t b2[256U] = { 0U }; - uint8_t *b_ = b2; - uint32_t rem1 = inputByteLen % 168U; - uint8_t *b00 = input; - uint8_t *bl0 = b_; - memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - uint8_t *b01 = b_; - b01[rem] = 0x1FU; - uint64_t ws[32U] = { 0U }; - uint8_t *b = b_; - uint64_t u0 = load64_le(b); - ws[0U] = u0; - uint64_t u1 = load64_le(b + 8U); - ws[1U] = u1; - uint64_t u2 = load64_le(b + 16U); - ws[2U] = u2; - uint64_t u3 = load64_le(b + 24U); - ws[3U] = u3; - uint64_t u4 = load64_le(b + 32U); - ws[4U] = u4; - uint64_t u5 = load64_le(b + 40U); - ws[5U] = u5; - uint64_t u6 = load64_le(b + 48U); - ws[6U] = u6; - uint64_t u7 = load64_le(b + 56U); - ws[7U] = u7; - uint64_t u8 = load64_le(b + 64U); - ws[8U] = u8; - uint64_t u9 = load64_le(b + 72U); - ws[9U] = u9; - uint64_t u10 = load64_le(b + 80U); - ws[10U] = u10; - uint64_t u11 = load64_le(b + 88U); - ws[11U] = u11; - uint64_t u12 = load64_le(b + 96U); - ws[12U] = u12; - uint64_t u13 = load64_le(b + 104U); - ws[13U] = u13; - uint64_t u14 = load64_le(b + 112U); - ws[14U] = u14; - uint64_t u15 = load64_le(b + 120U); - ws[15U] = u15; - uint64_t u16 = load64_le(b + 128U); - ws[16U] = u16; - uint64_t u17 = load64_le(b + 136U); - ws[17U] = u17; - uint64_t u18 = load64_le(b + 144U); - ws[18U] = u18; - uint64_t u19 = load64_le(b + 152U); - ws[19U] = u19; - uint64_t u20 = load64_le(b + 160U); - ws[20U] = u20; - uint64_t u21 = load64_le(b + 168U); - ws[21U] = u21; - uint64_t u22 = load64_le(b + 176U); - ws[22U] = u22; - uint64_t u23 = load64_le(b + 184U); - ws[23U] = u23; - uint64_t u24 = load64_le(b + 192U); - ws[24U] = u24; - uint64_t u25 = load64_le(b + 200U); - ws[25U] = u25; - uint64_t u26 = load64_le(b + 208U); - ws[26U] = u26; - uint64_t u27 = load64_le(b + 216U); - ws[27U] = u27; - uint64_t u28 = load64_le(b + 224U); - ws[28U] = u28; - uint64_t u29 = load64_le(b + 232U); - ws[29U] = u29; - uint64_t u30 = load64_le(b + 240U); - ws[30U] = u30; - uint64_t u31 = load64_le(b + 248U); - ws[31U] = u31; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws[i]; - } - uint8_t b3[256U] = { 0U }; - uint8_t *b4 = b3; - uint8_t *b0 = b4; - b0[167U] = 0x80U; - uint64_t ws0[32U] = { 0U }; - uint8_t *b1 = b4; - uint64_t u = load64_le(b1); - ws0[0U] = u; - uint64_t u32 = load64_le(b1 + 8U); - ws0[1U] = u32; - uint64_t u33 = load64_le(b1 + 16U); - ws0[2U] = u33; - uint64_t u34 = load64_le(b1 + 24U); - ws0[3U] = u34; - uint64_t u35 = load64_le(b1 + 32U); - ws0[4U] = u35; - uint64_t u36 = load64_le(b1 + 40U); - ws0[5U] = u36; - uint64_t u37 = load64_le(b1 + 48U); - ws0[6U] = u37; - uint64_t u38 = load64_le(b1 + 56U); - ws0[7U] = u38; - uint64_t u39 = load64_le(b1 + 64U); - ws0[8U] = u39; - uint64_t u40 = load64_le(b1 + 72U); - ws0[9U] = u40; - uint64_t u41 = load64_le(b1 + 80U); - ws0[10U] = u41; - uint64_t u42 = load64_le(b1 + 88U); - ws0[11U] = u42; - uint64_t u43 = load64_le(b1 + 96U); - ws0[12U] = u43; - uint64_t u44 = load64_le(b1 + 104U); - ws0[13U] = u44; - uint64_t u45 = load64_le(b1 + 112U); - ws0[14U] = u45; - uint64_t u46 = load64_le(b1 + 120U); - ws0[15U] = u46; - uint64_t u47 = load64_le(b1 + 128U); - ws0[16U] = u47; - uint64_t u48 = load64_le(b1 + 136U); - ws0[17U] = u48; - uint64_t u49 = load64_le(b1 + 144U); - ws0[18U] = u49; - uint64_t u50 = load64_le(b1 + 152U); - ws0[19U] = u50; - uint64_t u51 = load64_le(b1 + 160U); - ws0[20U] = u51; - uint64_t u52 = load64_le(b1 + 168U); - ws0[21U] = u52; - uint64_t u53 = load64_le(b1 + 176U); - ws0[22U] = u53; - uint64_t u54 = load64_le(b1 + 184U); - ws0[23U] = u54; - uint64_t u55 = load64_le(b1 + 192U); - ws0[24U] = u55; - uint64_t u56 = load64_le(b1 + 200U); - ws0[25U] = u56; - uint64_t u57 = load64_le(b1 + 208U); - ws0[26U] = u57; - uint64_t u58 = load64_le(b1 + 216U); - ws0[27U] = u58; - uint64_t u59 = load64_le(b1 + 224U); - ws0[28U] = u59; - uint64_t u60 = load64_le(b1 + 232U); - ws0[29U] = u60; - uint64_t u61 = load64_le(b1 + 240U); - ws0[30U] = u61; - uint64_t u62 = load64_le(b1 + 248U); - ws0[31U] = u62; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws0[i]; - } - for (uint32_t i0 = 0U; i0 < 24U; i0++) - { - uint64_t _C[5U] = { 0U }; - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); - KRML_MAYBE_FOR5(i1, - 0U, - 5U, - 1U, - uint64_t uu____2 = _C[(i1 + 1U) % 5U]; - uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U); - KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); - uint64_t x = state[1U]; - uint64_t current = x; - for (uint32_t i = 0U; i < 24U; i++) - { - uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; - uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; - uint64_t temp = state[_Y]; - uint64_t uu____3 = current; - state[_Y] = uu____3 << r | uu____3 >> (64U - r); - current = temp; - } - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); - uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); - uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); - uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); - uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); - state[0U + 5U * i] = v0; - state[1U + 5U * i] = v1; - state[2U + 5U * i] = v2; - state[3U + 5U * i] = v3; - state[4U + 5U * i] = v4;); - uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; - state[0U] = state[0U] ^ c; - } -} - void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c index 9748a375..9046f3db 100644 --- a/src/Hacl_Hash_SHA3_Simd256.c +++ b/src/Hacl_Hash_SHA3_Simd256.c @@ -10323,7 +10323,7 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( } void -Hacl_Hash_SHA3_Simd256_shake128_absorb_last( +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, diff --git a/src/msvc/Hacl_Hash_SHA3_Scalar.c b/src/msvc/Hacl_Hash_SHA3_Scalar.c index 7393ebf2..6d6806a3 100644 --- a/src/msvc/Hacl_Hash_SHA3_Scalar.c +++ b/src/msvc/Hacl_Hash_SHA3_Scalar.c @@ -2526,7 +2526,7 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( } void -Hacl_Hash_SHA3_Scalar_shake128_absorb_last( +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, uint8_t *input, uint32_t inputByteLen @@ -2730,331 +2730,6 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_last( } } -void -Hacl_Hash_SHA3_Scalar_shake128_absorb(uint64_t *state, uint8_t *input, uint32_t inputByteLen) -{ - for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) - { - uint8_t b1[256U] = { 0U }; - uint8_t *b_ = b1; - uint8_t *b0 = input; - uint8_t *bl0 = b_; - memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); - uint64_t ws[32U] = { 0U }; - uint8_t *b = b_; - uint64_t u = load64_le(b); - ws[0U] = u; - uint64_t u0 = load64_le(b + 8U); - ws[1U] = u0; - uint64_t u1 = load64_le(b + 16U); - ws[2U] = u1; - uint64_t u2 = load64_le(b + 24U); - ws[3U] = u2; - uint64_t u3 = load64_le(b + 32U); - ws[4U] = u3; - uint64_t u4 = load64_le(b + 40U); - ws[5U] = u4; - uint64_t u5 = load64_le(b + 48U); - ws[6U] = u5; - uint64_t u6 = load64_le(b + 56U); - ws[7U] = u6; - uint64_t u7 = load64_le(b + 64U); - ws[8U] = u7; - uint64_t u8 = load64_le(b + 72U); - ws[9U] = u8; - uint64_t u9 = load64_le(b + 80U); - ws[10U] = u9; - uint64_t u10 = load64_le(b + 88U); - ws[11U] = u10; - uint64_t u11 = load64_le(b + 96U); - ws[12U] = u11; - uint64_t u12 = load64_le(b + 104U); - ws[13U] = u12; - uint64_t u13 = load64_le(b + 112U); - ws[14U] = u13; - uint64_t u14 = load64_le(b + 120U); - ws[15U] = u14; - uint64_t u15 = load64_le(b + 128U); - ws[16U] = u15; - uint64_t u16 = load64_le(b + 136U); - ws[17U] = u16; - uint64_t u17 = load64_le(b + 144U); - ws[18U] = u17; - uint64_t u18 = load64_le(b + 152U); - ws[19U] = u18; - uint64_t u19 = load64_le(b + 160U); - ws[20U] = u19; - uint64_t u20 = load64_le(b + 168U); - ws[21U] = u20; - uint64_t u21 = load64_le(b + 176U); - ws[22U] = u21; - uint64_t u22 = load64_le(b + 184U); - ws[23U] = u22; - uint64_t u23 = load64_le(b + 192U); - ws[24U] = u23; - uint64_t u24 = load64_le(b + 200U); - ws[25U] = u24; - uint64_t u25 = load64_le(b + 208U); - ws[26U] = u25; - uint64_t u26 = load64_le(b + 216U); - ws[27U] = u26; - uint64_t u27 = load64_le(b + 224U); - ws[28U] = u27; - uint64_t u28 = load64_le(b + 232U); - ws[29U] = u28; - uint64_t u29 = load64_le(b + 240U); - ws[30U] = u29; - uint64_t u30 = load64_le(b + 248U); - ws[31U] = u30; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws[i]; - } - for (uint32_t i1 = 0U; i1 < 24U; i1++) - { - uint64_t _C[5U] = { 0U }; - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - _C[i] = - state[i - + 0U] - ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); - KRML_MAYBE_FOR5(i2, - 0U, - 5U, - 1U, - uint64_t uu____0 = _C[(i2 + 1U) % 5U]; - uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); - KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); - uint64_t x = state[1U]; - uint64_t current = x; - for (uint32_t i = 0U; i < 24U; i++) - { - uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; - uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; - uint64_t temp = state[_Y]; - uint64_t uu____1 = current; - state[_Y] = uu____1 << r | uu____1 >> (64U - r); - current = temp; - } - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); - uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); - uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); - uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); - uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); - state[0U + 5U * i] = v0; - state[1U + 5U * i] = v1; - state[2U + 5U * i] = v2; - state[3U + 5U * i] = v3; - state[4U + 5U * i] = v4;); - uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; - state[0U] = state[0U] ^ c; - } - } - uint32_t rem = inputByteLen % 168U; - uint8_t b2[256U] = { 0U }; - uint8_t *b_ = b2; - uint32_t rem1 = inputByteLen % 168U; - uint8_t *b00 = input; - uint8_t *bl0 = b_; - memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - uint8_t *b01 = b_; - b01[rem] = 0x1FU; - uint64_t ws[32U] = { 0U }; - uint8_t *b = b_; - uint64_t u0 = load64_le(b); - ws[0U] = u0; - uint64_t u1 = load64_le(b + 8U); - ws[1U] = u1; - uint64_t u2 = load64_le(b + 16U); - ws[2U] = u2; - uint64_t u3 = load64_le(b + 24U); - ws[3U] = u3; - uint64_t u4 = load64_le(b + 32U); - ws[4U] = u4; - uint64_t u5 = load64_le(b + 40U); - ws[5U] = u5; - uint64_t u6 = load64_le(b + 48U); - ws[6U] = u6; - uint64_t u7 = load64_le(b + 56U); - ws[7U] = u7; - uint64_t u8 = load64_le(b + 64U); - ws[8U] = u8; - uint64_t u9 = load64_le(b + 72U); - ws[9U] = u9; - uint64_t u10 = load64_le(b + 80U); - ws[10U] = u10; - uint64_t u11 = load64_le(b + 88U); - ws[11U] = u11; - uint64_t u12 = load64_le(b + 96U); - ws[12U] = u12; - uint64_t u13 = load64_le(b + 104U); - ws[13U] = u13; - uint64_t u14 = load64_le(b + 112U); - ws[14U] = u14; - uint64_t u15 = load64_le(b + 120U); - ws[15U] = u15; - uint64_t u16 = load64_le(b + 128U); - ws[16U] = u16; - uint64_t u17 = load64_le(b + 136U); - ws[17U] = u17; - uint64_t u18 = load64_le(b + 144U); - ws[18U] = u18; - uint64_t u19 = load64_le(b + 152U); - ws[19U] = u19; - uint64_t u20 = load64_le(b + 160U); - ws[20U] = u20; - uint64_t u21 = load64_le(b + 168U); - ws[21U] = u21; - uint64_t u22 = load64_le(b + 176U); - ws[22U] = u22; - uint64_t u23 = load64_le(b + 184U); - ws[23U] = u23; - uint64_t u24 = load64_le(b + 192U); - ws[24U] = u24; - uint64_t u25 = load64_le(b + 200U); - ws[25U] = u25; - uint64_t u26 = load64_le(b + 208U); - ws[26U] = u26; - uint64_t u27 = load64_le(b + 216U); - ws[27U] = u27; - uint64_t u28 = load64_le(b + 224U); - ws[28U] = u28; - uint64_t u29 = load64_le(b + 232U); - ws[29U] = u29; - uint64_t u30 = load64_le(b + 240U); - ws[30U] = u30; - uint64_t u31 = load64_le(b + 248U); - ws[31U] = u31; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws[i]; - } - uint8_t b3[256U] = { 0U }; - uint8_t *b4 = b3; - uint8_t *b0 = b4; - b0[167U] = 0x80U; - uint64_t ws0[32U] = { 0U }; - uint8_t *b1 = b4; - uint64_t u = load64_le(b1); - ws0[0U] = u; - uint64_t u32 = load64_le(b1 + 8U); - ws0[1U] = u32; - uint64_t u33 = load64_le(b1 + 16U); - ws0[2U] = u33; - uint64_t u34 = load64_le(b1 + 24U); - ws0[3U] = u34; - uint64_t u35 = load64_le(b1 + 32U); - ws0[4U] = u35; - uint64_t u36 = load64_le(b1 + 40U); - ws0[5U] = u36; - uint64_t u37 = load64_le(b1 + 48U); - ws0[6U] = u37; - uint64_t u38 = load64_le(b1 + 56U); - ws0[7U] = u38; - uint64_t u39 = load64_le(b1 + 64U); - ws0[8U] = u39; - uint64_t u40 = load64_le(b1 + 72U); - ws0[9U] = u40; - uint64_t u41 = load64_le(b1 + 80U); - ws0[10U] = u41; - uint64_t u42 = load64_le(b1 + 88U); - ws0[11U] = u42; - uint64_t u43 = load64_le(b1 + 96U); - ws0[12U] = u43; - uint64_t u44 = load64_le(b1 + 104U); - ws0[13U] = u44; - uint64_t u45 = load64_le(b1 + 112U); - ws0[14U] = u45; - uint64_t u46 = load64_le(b1 + 120U); - ws0[15U] = u46; - uint64_t u47 = load64_le(b1 + 128U); - ws0[16U] = u47; - uint64_t u48 = load64_le(b1 + 136U); - ws0[17U] = u48; - uint64_t u49 = load64_le(b1 + 144U); - ws0[18U] = u49; - uint64_t u50 = load64_le(b1 + 152U); - ws0[19U] = u50; - uint64_t u51 = load64_le(b1 + 160U); - ws0[20U] = u51; - uint64_t u52 = load64_le(b1 + 168U); - ws0[21U] = u52; - uint64_t u53 = load64_le(b1 + 176U); - ws0[22U] = u53; - uint64_t u54 = load64_le(b1 + 184U); - ws0[23U] = u54; - uint64_t u55 = load64_le(b1 + 192U); - ws0[24U] = u55; - uint64_t u56 = load64_le(b1 + 200U); - ws0[25U] = u56; - uint64_t u57 = load64_le(b1 + 208U); - ws0[26U] = u57; - uint64_t u58 = load64_le(b1 + 216U); - ws0[27U] = u58; - uint64_t u59 = load64_le(b1 + 224U); - ws0[28U] = u59; - uint64_t u60 = load64_le(b1 + 232U); - ws0[29U] = u60; - uint64_t u61 = load64_le(b1 + 240U); - ws0[30U] = u61; - uint64_t u62 = load64_le(b1 + 248U); - ws0[31U] = u62; - for (uint32_t i = 0U; i < 25U; i++) - { - state[i] = state[i] ^ ws0[i]; - } - for (uint32_t i0 = 0U; i0 < 24U; i0++) - { - uint64_t _C[5U] = { 0U }; - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); - KRML_MAYBE_FOR5(i1, - 0U, - 5U, - 1U, - uint64_t uu____2 = _C[(i1 + 1U) % 5U]; - uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U); - KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); - uint64_t x = state[1U]; - uint64_t current = x; - for (uint32_t i = 0U; i < 24U; i++) - { - uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; - uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; - uint64_t temp = state[_Y]; - uint64_t uu____3 = current; - state[_Y] = uu____3 << r | uu____3 >> (64U - r); - current = temp; - } - KRML_MAYBE_FOR5(i, - 0U, - 5U, - 1U, - uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); - uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); - uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); - uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); - uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); - state[0U + 5U * i] = v0; - state[1U + 5U * i] = v1; - state[2U + 5U * i] = v2; - state[3U + 5U * i] = v3; - state[4U + 5U * i] = v4;); - uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; - state[0U] = state[0U] ^ c; - } -} - void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c index 9748a375..9046f3db 100644 --- a/src/msvc/Hacl_Hash_SHA3_Simd256.c +++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c @@ -10323,7 +10323,7 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( } void -Hacl_Hash_SHA3_Simd256_shake128_absorb_last( +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, From 60488ea2f8679cd9aec5960d297363716d1fd198 Mon Sep 17 00:00:00 2001 From: mamonet Date: Wed, 17 Jan 2024 13:18:42 +0200 Subject: [PATCH 5/6] Document some API in SHA3 Scalar/SIMD256 headers --- include/Hacl_Hash_SHA3_Scalar.h | 43 +++++++++++++++++++++++++ include/Hacl_Hash_SHA3_Simd256.h | 46 +++++++++++++++++++++++++++ include/msvc/Hacl_Hash_SHA3_Scalar.h | 43 +++++++++++++++++++++++++ include/msvc/Hacl_Hash_SHA3_Simd256.h | 46 +++++++++++++++++++++++++++ 4 files changed, 178 insertions(+) diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index 63cf8710..a40c2d04 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -59,10 +59,27 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t in void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); +/** +Allocate state buffer of 200-bytes +*/ uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); +/** +Free state buffer +*/ void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( uint64_t *state, @@ -70,6 +87,21 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( uint32_t inputByteLen ); +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ void Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, @@ -77,6 +109,17 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint32_t inputByteLen ); +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index 25c1a166..302094a4 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -138,10 +138,28 @@ Hacl_Hash_SHA3_Simd256_sha3_512( uint32_t inputByteLen ); +/** +Allocate quadruple state buffer (200-bytes for each) +*/ uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); +/** +Free quadruple state buffer +*/ void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); +/** +Absorb number of blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( Lib_IntVector_Intrinsics_vec256 *state, @@ -152,6 +170,22 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( uint32_t inputByteLen ); +/** +Absorb a final partial blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses a sequence of bytes at end of each input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffers are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] + + Note: Full size of input buffers must be passed to `inputByteLen` including + the number of full-block bytes at start of each input buffer that are ignored +*/ void Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, @@ -162,6 +196,18 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_final( uint32_t inputByteLen ); +/** +Squeeze a quadruple hash state to 4 output buffers + + This function is intended to receive a quadruple hash state and 4 output buffers. + It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( Lib_IntVector_Intrinsics_vec256 *state, diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index 63cf8710..a40c2d04 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -59,10 +59,27 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t in void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); +/** +Allocate state buffer of 200-bytes +*/ uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); +/** +Free state buffer +*/ void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( uint64_t *state, @@ -70,6 +87,21 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( uint32_t inputByteLen ); +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ void Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint64_t *state, @@ -77,6 +109,17 @@ Hacl_Hash_SHA3_Scalar_shake128_absorb_final( uint32_t inputByteLen ); +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ void Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( uint64_t *state, diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index 25c1a166..302094a4 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -138,10 +138,28 @@ Hacl_Hash_SHA3_Simd256_sha3_512( uint32_t inputByteLen ); +/** +Allocate quadruple state buffer (200-bytes for each) +*/ uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); +/** +Free quadruple state buffer +*/ void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); +/** +Absorb number of blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( Lib_IntVector_Intrinsics_vec256 *state, @@ -152,6 +170,22 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( uint32_t inputByteLen ); +/** +Absorb a final partial blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses a sequence of bytes at end of each input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffers are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] + + Note: Full size of input buffers must be passed to `inputByteLen` including + the number of full-block bytes at start of each input buffer that are ignored +*/ void Hacl_Hash_SHA3_Simd256_shake128_absorb_final( Lib_IntVector_Intrinsics_vec256 *state, @@ -162,6 +196,18 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_final( uint32_t inputByteLen ); +/** +Squeeze a quadruple hash state to 4 output buffers + + This function is intended to receive a quadruple hash state and 4 output buffers. + It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( Lib_IntVector_Intrinsics_vec256 *state, From 856e97ad6f1b487813bd36ddd5e76ab0c564f5ab Mon Sep 17 00:00:00 2001 From: mamonet Date: Wed, 17 Jan 2024 13:56:00 +0200 Subject: [PATCH 6/6] Fix calling Hacl_Hash_SHA3_Simd256_shake128 in libcrux_hacl_glue.c --- libcrux/src/libcrux_hacl_glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libcrux/src/libcrux_hacl_glue.c b/libcrux/src/libcrux_hacl_glue.c index 4346053f..1c02291b 100644 --- a/libcrux/src/libcrux_hacl_glue.c +++ b/libcrux/src/libcrux_hacl_glue.c @@ -39,16 +39,16 @@ libcrux_digest_shake128x4f(size_t len, }; #ifdef HACL_CAN_COMPILE_VEC256 if (libcrux_platform_simd256_support() == true) { - Hacl_Hash_SHA3_Simd256_shake128(input0.len, + Hacl_Hash_SHA3_Simd256_shake128(out.fst, + out.snd, + out.thd, + out.f3, + (uint32_t)len, input0.ptr, input1.ptr, input2.ptr, input3.ptr, - (uint32_t)len, - out.fst, - out.snd, - out.thd, - out.f3); + input0.len); } else { Hacl_Hash_SHA3_shake128_hacl( input0.len, input0.ptr, (uint32_t)len, out.fst);