diff --git a/benchmarks/sha3.cc b/benchmarks/sha3.cc index 9caf2e54..1deb82c8 100644 --- a/benchmarks/sha3.cc +++ b/benchmarks/sha3.cc @@ -65,7 +65,7 @@ Hacl_Sha3_224_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_224( - input.size(), (uint8_t*)input.data(), digest224_0.data()); + digest224_0.data(), (uint8_t*)input.data(), input.size()); } if (digest224_0 != expected_digest_sha3_224) { state.SkipWithError("Incorrect digest."); @@ -85,15 +85,15 @@ Hacl_Sha3_224_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_224(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_224(digest224_0.data(), + digest224_1.data(), + digest224_2.data(), + digest224_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest224_0.data(), - digest224_1.data(), - digest224_2.data(), - digest224_3.data()); + input.size()); } if (digest224_0 != expected_digest_sha3_224 || digest224_1 != expected_digest_sha3_224 || @@ -132,12 +132,39 @@ Hacl_Sha3_256(benchmark::State& state) BENCHMARK(Hacl_Sha3_256)->Setup(DoSetup); +#include "sha3.h" + +static void +Digestif_sha3_256(benchmark::State& state) +{ + bytes digest(32, 0); + + for (auto _ : state) { + + sha3_ctx ctx; + digestif_sha3_init(&ctx, 256); + + for (auto chunk : chunk(input, chunk_len)) { + digestif_sha3_update(&ctx, chunk.data(), chunk.size()); + } + + digestif_sha3_finalize(&ctx, digest.data(), 0x06); + } + + if (digest != expected_digest_sha3_256) { + state.SkipWithError("Incorrect digest."); + return; + } +} + +BENCHMARK(Digestif_sha3_256)->Setup(DoSetup); + static void Hacl_Sha3_256_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_256( - input.size(), (uint8_t*)input.data(), digest256_0.data()); + digest256_0.data(), (uint8_t*)input.data(), input.size()); } if (digest256_0 != expected_digest_sha3_256) { state.SkipWithError("Incorrect digest."); @@ -157,15 +184,15 @@ Hacl_Sha3_256_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_256(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_256(digest256_0.data(), + digest256_1.data(), + digest256_2.data(), + digest256_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest256_0.data(), - digest256_1.data(), - digest256_2.data(), - digest256_3.data()); + input.size()); } if (digest256_0 != expected_digest_sha3_256 || digest256_1 != expected_digest_sha3_256 || @@ -179,33 +206,6 @@ Hacl_Sha3_256_Simd256(benchmark::State& state) BENCHMARK(Hacl_Sha3_256_Simd256)->Setup(DoSetup); #endif -#include "sha3.h" - -static void -Digestif_sha3_256(benchmark::State& state) -{ - bytes digest(32, 0); - - for (auto _ : state) { - - sha3_ctx ctx; - digestif_sha3_init(&ctx, 256); - - for (auto chunk : chunk(input, chunk_len)) { - digestif_sha3_update(&ctx, chunk.data(), chunk.size()); - } - - digestif_sha3_finalize(&ctx, digest.data(), 0x06); - } - - if (digest != expected_digest_sha3_256) { - state.SkipWithError("Incorrect digest."); - return; - } -} - -BENCHMARK(Digestif_sha3_256)->Setup(DoSetup); - #ifndef NO_OPENSSL BENCHMARK_CAPTURE(OpenSSL_hash_oneshot, sha3_256, @@ -236,7 +236,7 @@ Hacl_Sha3_384_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_384( - input.size(), (uint8_t*)input.data(), digest384_0.data()); + digest384_0.data(), (uint8_t*)input.data(), input.size()); } if (digest384_0 != expected_digest_sha3_384) { state.SkipWithError("Incorrect digest."); @@ -256,15 +256,15 @@ Hacl_Sha3_384_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_384(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_384(digest384_0.data(), + digest384_1.data(), + digest384_2.data(), + digest384_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest384_0.data(), - digest384_1.data(), - digest384_2.data(), - digest384_3.data()); + input.size()); } if (digest384_0 != expected_digest_sha3_384 || digest384_1 != expected_digest_sha3_384 || @@ -303,12 +303,37 @@ Hacl_Sha3_512(benchmark::State& state) BENCHMARK(Hacl_Sha3_512)->Setup(DoSetup); +static void +Digestif_sha3_512(benchmark::State& state) +{ + bytes digest(64, 0); + + for (auto _ : state) { + + sha3_ctx ctx; + digestif_sha3_init(&ctx, 512); + + for (auto chunk : chunk(input, chunk_len)) { + digestif_sha3_update(&ctx, chunk.data(), chunk.size()); + } + + digestif_sha3_finalize(&ctx, digest.data(), 0x06); + } + + if (digest != expected_digest_sha3_512) { + state.SkipWithError("Incorrect digest."); + return; + } +} + +BENCHMARK(Digestif_sha3_512)->Setup(DoSetup); + static void Hacl_Sha3_512_Scalar(benchmark::State& state) { for (auto _ : state) { Hacl_Hash_SHA3_Scalar_sha3_512( - input.size(), (uint8_t*)input.data(), digest512_0.data()); + digest512_0.data(), (uint8_t*)input.data(), input.size()); } if (digest512_0 != expected_digest_sha3_512) { state.SkipWithError("Incorrect digest."); @@ -328,15 +353,15 @@ Hacl_Sha3_512_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_sha3_512(input.size(), + Hacl_Hash_SHA3_Simd256_sha3_512(digest512_0.data(), + digest512_1.data(), + digest512_2.data(), + digest512_3.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest512_0.data(), - digest512_1.data(), - digest512_2.data(), - digest512_3.data()); + input.size()); } if (digest512_0 != expected_digest_sha3_512 || digest512_1 != expected_digest_sha3_512 || @@ -350,31 +375,6 @@ Hacl_Sha3_512_Simd256(benchmark::State& state) BENCHMARK(Hacl_Sha3_512_Simd256)->Setup(DoSetup); #endif -static void -Digestif_sha3_512(benchmark::State& state) -{ - bytes digest(64, 0); - - for (auto _ : state) { - - sha3_ctx ctx; - digestif_sha3_init(&ctx, 512); - - for (auto chunk : chunk(input, chunk_len)) { - digestif_sha3_update(&ctx, chunk.data(), chunk.size()); - } - - digestif_sha3_finalize(&ctx, digest.data(), 0x06); - } - - if (digest != expected_digest_sha3_512) { - state.SkipWithError("Incorrect digest."); - return; - } -} - -BENCHMARK(Digestif_sha3_512)->Setup(DoSetup); - #ifndef NO_OPENSSL BENCHMARK_CAPTURE(OpenSSL_hash_oneshot, sha3_512, @@ -469,10 +469,10 @@ static void Hacl_Sha3_shake128_Scalar(benchmark::State& state) { for (auto _ : state) { - Hacl_Hash_SHA3_Scalar_shake128(input.size(), - (uint8_t*)input.data(), + Hacl_Hash_SHA3_Scalar_shake128(digest_shake_0.data(), digest_shake_0.size(), - digest_shake_0.data()); + (uint8_t*)input.data(), + input.size()); } } @@ -488,16 +488,16 @@ Hacl_Sha3_shake128_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_shake128(input.size(), + Hacl_Hash_SHA3_Simd256_shake128(digest_shake_0.data(), + digest_shake_1.data(), + digest_shake_2.data(), + digest_shake_3.data(), + digest_shake_0.size(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest_shake_0.size(), - digest_shake_0.data(), - digest_shake_1.data(), - digest_shake_2.data(), - digest_shake_3.data()); + input.size()); } } @@ -521,10 +521,10 @@ static void Hacl_Sha3_shake256_Scalar(benchmark::State& state) { for (auto _ : state) { - Hacl_Hash_SHA3_Scalar_shake256(input.size(), - (uint8_t*)input.data(), + Hacl_Hash_SHA3_Scalar_shake256(digest_shake_0.data(), digest_shake_0.size(), - digest_shake_0.data()); + (uint8_t*)input.data(), + input.size()); } } @@ -540,16 +540,16 @@ Hacl_Sha3_shake256_Simd256(benchmark::State& state) } for (auto _ : state) { - Hacl_Hash_SHA3_Simd256_shake256(input.size(), + Hacl_Hash_SHA3_Simd256_shake256(digest_shake_0.data(), + digest_shake_1.data(), + digest_shake_2.data(), + digest_shake_3.data(), + digest_shake_0.size(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), (uint8_t*)input.data(), - digest_shake_0.size(), - digest_shake_0.data(), - digest_shake_1.data(), - digest_shake_2.data(), - digest_shake_3.data()); + input.size()); } } diff --git a/include/Hacl_Hash_SHA3_Scalar.h b/include/Hacl_Hash_SHA3_Scalar.h index e49f1967..a40c2d04 100644 --- a/include/Hacl_Hash_SHA3_Scalar.h +++ b/include/Hacl_Hash_SHA3_Scalar.h @@ -37,27 +37,95 @@ extern "C" { void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen); + +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen); + +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +/** +Allocate state buffer of 200-bytes +*/ +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +/** +Free state buffer +*/ +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); + +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +); #if defined(__cplusplus) } diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h index 3dd3772d..302094a4 100644 --- a/include/Hacl_Hash_SHA3_Simd256.h +++ b/include/Hacl_Hash_SHA3_Simd256.h @@ -35,6 +35,8 @@ extern "C" { #include "krml/lowstar_endianness.h" #include "krml/internal/target.h" +#include "libintvector.h" + typedef struct K____uint8_t___uint8_t__s { uint8_t *fst; @@ -58,82 +60,162 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_; void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_sha3_256( uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ); void -Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_sha3_384( + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_sha3_512( uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ); +/** +Allocate quadruple state buffer (200-bytes for each) +*/ +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); + +/** +Free quadruple state buffer +*/ +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); + +/** +Absorb number of blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void -Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); +/** +Absorb a final partial blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses a sequence of bytes at end of each input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffers are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] + + Note: Full size of input buffers must be passed to `inputByteLen` including + the number of full-block bytes at start of each input buffer that are ignored +*/ void -Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +/** +Squeeze a quadruple hash state to 4 output buffers + + This function is intended to receive a quadruple hash state and 4 output buffers. + It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint32_t outputByteLen ); #if defined(__cplusplus) diff --git a/include/msvc/Hacl_Hash_SHA3_Scalar.h b/include/msvc/Hacl_Hash_SHA3_Scalar.h index e49f1967..a40c2d04 100644 --- a/include/msvc/Hacl_Hash_SHA3_Scalar.h +++ b/include/msvc/Hacl_Hash_SHA3_Scalar.h @@ -37,27 +37,95 @@ extern "C" { void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen); + +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen); + +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +/** +Allocate state buffer of 200-bytes +*/ +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void); -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output); +/** +Free state buffer +*/ +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s); + +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It prcoesses a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +); + +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +); #if defined(__cplusplus) } diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h index 3dd3772d..302094a4 100644 --- a/include/msvc/Hacl_Hash_SHA3_Simd256.h +++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h @@ -35,6 +35,8 @@ extern "C" { #include "krml/lowstar_endianness.h" #include "krml/internal/target.h" +#include "libintvector.h" + typedef struct K____uint8_t___uint8_t__s { uint8_t *fst; @@ -58,82 +60,162 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_; void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_sha3_256( uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ); void -Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_sha3_384( + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +void +Hacl_Hash_SHA3_Simd256_sha3_512( uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ); +/** +Allocate quadruple state buffer (200-bytes for each) +*/ +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void); + +/** +Free quadruple state buffer +*/ +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s); + +/** +Absorb number of blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ void -Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ); +/** +Absorb a final partial blocks of 4 input buffers and write the output states + + This function is intended to receive a quadruple hash state and 4 input buffers. + It prcoesses a sequence of bytes at end of each input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffers are ignored. + + The argument `state` (IN/OUT) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] + + Note: Full size of input buffers must be passed to `inputByteLen` including + the number of full-block bytes at start of each input buffer that are ignored +*/ void -Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, + uint32_t inputByteLen +); + +/** +Squeeze a quadruple hash state to 4 output buffers + + This function is intended to receive a quadruple hash state and 4 output buffers. + It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block for each buffer are ignored. + + The argument `state` (IN) points to quadruple hash state, + i.e., Lib_IntVector_Intrinsics_vec256[25] + The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes + of valid memory for each buffer, i.e., uint8_t[inputByteLen] +*/ +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint32_t outputByteLen ); #if defined(__cplusplus) diff --git a/libcrux/src/libcrux_hacl_glue.c b/libcrux/src/libcrux_hacl_glue.c index 4346053f..1c02291b 100644 --- a/libcrux/src/libcrux_hacl_glue.c +++ b/libcrux/src/libcrux_hacl_glue.c @@ -39,16 +39,16 @@ libcrux_digest_shake128x4f(size_t len, }; #ifdef HACL_CAN_COMPILE_VEC256 if (libcrux_platform_simd256_support() == true) { - Hacl_Hash_SHA3_Simd256_shake128(input0.len, + Hacl_Hash_SHA3_Simd256_shake128(out.fst, + out.snd, + out.thd, + out.f3, + (uint32_t)len, input0.ptr, input1.ptr, input2.ptr, input3.ptr, - (uint32_t)len, - out.fst, - out.snd, - out.thd, - out.f3); + input0.len); } else { Hacl_Hash_SHA3_shake128_hacl( input0.len, input0.ptr, (uint32_t)len, out.fst); diff --git a/src/Hacl_Hash_SHA3_Scalar.c b/src/Hacl_Hash_SHA3_Scalar.c index 43d57482..6d6806a3 100644 --- a/src/Hacl_Hash_SHA3_Scalar.c +++ b/src/Hacl_Hash_SHA3_Scalar.c @@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] = void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 168U; @@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128( void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 136U; @@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256( memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 144U; uint64_t s[25U] = { 0U }; @@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 136U; uint64_t s[25U] = { 0U }; @@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 104U; uint64_t s[25U] = { 0U }; @@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 72U; uint64_t s[25U] = { 0U }; @@ -2381,3 +2381,418 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); } +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, state, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c index b9bfcee5..9046f3db 100644 --- a/src/Hacl_Hash_SHA3_Simd256.c +++ b/src/Hacl_Hash_SHA3_Simd256.c @@ -26,20 +26,19 @@ #include "Hacl_Hash_SHA3_Simd256.h" #include "internal/Hacl_Hash_SHA3_Scalar.h" -#include "libintvector.h" void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -438,63 +437,63 @@ Hacl_Hash_SHA3_Simd256_shake128( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -723,57 +722,57 @@ Hacl_Hash_SHA3_Simd256_shake128( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -1295,62 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -1645,76 +1631,63 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -2113,63 +2086,63 @@ Hacl_Hash_SHA3_Simd256_shake256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -2398,57 +2371,57 @@ Hacl_Hash_SHA3_Simd256_shake256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -2970,62 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -3320,75 +3280,62 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -3787,63 +3734,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -4072,57 +4019,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -4644,62 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -4994,75 +4928,62 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -5461,63 +5382,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -5746,57 +5667,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -6318,62 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -6668,75 +6576,62 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -7135,63 +7030,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -7420,57 +7315,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -7992,62 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -8342,75 +8224,62 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, - uint8_t *input0, - uint8_t *input1, - uint8_t *input2, - uint8_t *input3, uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -8809,63 +8678,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -9094,57 +8963,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -9666,62 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -10016,61 +9872,1482 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); +} + +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(100U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t)); + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b_.snd.snd.snd; + uint8_t *b2 = b_.snd.snd.fst; + uint8_t *b1 = b_.snd.fst; + uint8_t *b0 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws1; + ws[2U] = ws2; + ws[3U] = ws3; + ws[4U] = ws4; + ws[5U] = ws5; + ws[6U] = ws6; + ws[7U] = ws7; + ws[8U] = ws8; + ws[9U] = ws9; + ws[10U] = ws10; + ws[11U] = ws11; + ws[12U] = ws12; + ws[13U] = ws13; + ws[14U] = ws14; + ws[15U] = ws15; + ws[16U] = ws16; + ws[17U] = ws17; + ws[18U] = ws18; + ws[19U] = ws19; + ws[20U] = ws20; + ws[21U] = ws21; + ws[22U] = ws22; + ws[23U] = ws23; + ws[24U] = ws24; + ws[25U] = ws25; + ws[26U] = ws26; + ws[27U] = ws27; + ws[28U] = ws28; + ws[29U] = ws29; + ws[30U] = ws30; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b33 = b_.snd.snd.snd; + uint8_t *b23 = b_.snd.snd.fst; + uint8_t *b13 = b_.snd.fst; + uint8_t *b03 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws00 = v0__; + Lib_IntVector_Intrinsics_vec256 ws110 = v2__; + Lib_IntVector_Intrinsics_vec256 ws210 = v1__; + Lib_IntVector_Intrinsics_vec256 ws32 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws40 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws50 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws60 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws70 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws80 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws90 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws100 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws111 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws120 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws130 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws140 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws150 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws160 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws170 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws180 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws190 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws200 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws211 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws220 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws230 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws240 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws250 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws260 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws270 = v3__5; + Lib_IntVector_Intrinsics_vec256 v07 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v17 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v27 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v37 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws280 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws290 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws300 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws310 = v3__6; + ws[0U] = ws00; + ws[1U] = ws110; + ws[2U] = ws210; + ws[3U] = ws32; + ws[4U] = ws40; + ws[5U] = ws50; + ws[6U] = ws60; + ws[7U] = ws70; + ws[8U] = ws80; + ws[9U] = ws90; + ws[10U] = ws100; + ws[11U] = ws111; + ws[12U] = ws120; + ws[13U] = ws130; + ws[14U] = ws140; + ws[15U] = ws150; + ws[16U] = ws160; + ws[17U] = ws170; + ws[18U] = ws180; + ws[19U] = ws190; + ws[20U] = ws200; + ws[21U] = ws211; + ws[22U] = ws220; + ws[23U] = ws230; + ws[24U] = ws240; + ws[25U] = ws250; + ws[26U] = ws260; + ws[27U] = ws270; + ws[28U] = ws280; + ws[29U] = ws290; + ws[30U] = ws300; + ws[31U] = ws310; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[167U] = 0x80U; + b15[167U] = 0x80U; + b25[167U] = 0x80U; + b35[167U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; + uint8_t *b2 = b.snd.snd.fst; + uint8_t *b1 = b.snd.fst; + uint8_t *b0 = b.fst; + ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U]; + Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U]; + Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U]; + Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 + v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__7; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__7; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__7; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__7; + Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U]; + Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U]; + Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U]; + Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 + v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__8; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__8; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__8; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__8; + Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U]; + Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U]; + Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U]; + Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 + v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__9; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__9; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__9; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__9; + Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U]; + Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U]; + Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U]; + Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 + v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__10; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__10; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__10; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__10; + Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U]; + Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U]; + Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U]; + Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 + v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__11; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__11; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__11; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__11; + Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U]; + Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U]; + Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U]; + Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 + v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__12; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__12; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__12; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__12; + Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U]; + Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U]; + Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U]; + Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 + v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__13; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__13; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__13; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__13; + Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 + v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__14; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__14; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__14; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__14; + ws33[0U] = ws0; + ws33[1U] = ws1; + ws33[2U] = ws2; + ws33[3U] = ws3; + ws33[4U] = ws4; + ws33[5U] = ws5; + ws33[6U] = ws6; + ws33[7U] = ws7; + ws33[8U] = ws8; + ws33[9U] = ws9; + ws33[10U] = ws10; + ws33[11U] = ws11; + ws33[12U] = ws12; + ws33[13U] = ws13; + ws33[14U] = ws14; + ws33[15U] = ws15; + ws33[16U] = ws16; + ws33[17U] = ws17; + ws33[18U] = ws18; + ws33[19U] = ws19; + ws33[20U] = ws20; + ws33[21U] = ws21; + ws33[22U] = ws22; + ws33[23U] = ws23; + ws33[24U] = ws24; + ws33[25U] = ws25; + ws33[26U] = ws26; + ws33[27U] = ws27; + ws33[28U] = ws28; + ws33[29U] = ws29; + ws33[30U] = ws30; + ws33[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]); + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v015 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v115 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v215 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v315 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v015; + state[1U + 5U * i] = v115; + state[2U + 5U * i] = v215; + state[3U + 5U * i] = v315; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[1024U] = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 32U; i++) + { + Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); + } + uint8_t *b0 = output0; + uint8_t *b1 = output1; + uint8_t *b2 = output2; + uint8_t *b3 = output3; + memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } } diff --git a/src/msvc/Hacl_Hash_SHA3_Scalar.c b/src/msvc/Hacl_Hash_SHA3_Scalar.c index 43d57482..6d6806a3 100644 --- a/src/msvc/Hacl_Hash_SHA3_Scalar.c +++ b/src/msvc/Hacl_Hash_SHA3_Scalar.c @@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] = void Hacl_Hash_SHA3_Scalar_shake128( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 168U; @@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128( void Hacl_Hash_SHA3_Scalar_shake256( - uint32_t inputByteLen, - uint8_t *input, + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ) { uint32_t rateInBytes = 136U; @@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256( memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 144U; uint64_t s[25U] = { 0U }; @@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 136U; uint64_t s[25U] = { 0U }; @@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 104U; uint64_t s[25U] = { 0U }; @@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); } -void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output) +void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen) { uint32_t rateInBytes = 72U; uint64_t s[25U] = { 0U }; @@ -2381,3 +2381,418 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8 memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); } +uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t)); + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_absorb_final( + uint64_t *state, + uint8_t *input, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b2[256U] = { 0U }; + uint8_t *b_ = b2; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[rem] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u0 = load64_le(b); + ws[0U] = u0; + uint64_t u1 = load64_le(b + 8U); + ws[1U] = u1; + uint64_t u2 = load64_le(b + 16U); + ws[2U] = u2; + uint64_t u3 = load64_le(b + 24U); + ws[3U] = u3; + uint64_t u4 = load64_le(b + 32U); + ws[4U] = u4; + uint64_t u5 = load64_le(b + 40U); + ws[5U] = u5; + uint64_t u6 = load64_le(b + 48U); + ws[6U] = u6; + uint64_t u7 = load64_le(b + 56U); + ws[7U] = u7; + uint64_t u8 = load64_le(b + 64U); + ws[8U] = u8; + uint64_t u9 = load64_le(b + 72U); + ws[9U] = u9; + uint64_t u10 = load64_le(b + 80U); + ws[10U] = u10; + uint64_t u11 = load64_le(b + 88U); + ws[11U] = u11; + uint64_t u12 = load64_le(b + 96U); + ws[12U] = u12; + uint64_t u13 = load64_le(b + 104U); + ws[13U] = u13; + uint64_t u14 = load64_le(b + 112U); + ws[14U] = u14; + uint64_t u15 = load64_le(b + 120U); + ws[15U] = u15; + uint64_t u16 = load64_le(b + 128U); + ws[16U] = u16; + uint64_t u17 = load64_le(b + 136U); + ws[17U] = u17; + uint64_t u18 = load64_le(b + 144U); + ws[18U] = u18; + uint64_t u19 = load64_le(b + 152U); + ws[19U] = u19; + uint64_t u20 = load64_le(b + 160U); + ws[20U] = u20; + uint64_t u21 = load64_le(b + 168U); + ws[21U] = u21; + uint64_t u22 = load64_le(b + 176U); + ws[22U] = u22; + uint64_t u23 = load64_le(b + 184U); + ws[23U] = u23; + uint64_t u24 = load64_le(b + 192U); + ws[24U] = u24; + uint64_t u25 = load64_le(b + 200U); + ws[25U] = u25; + uint64_t u26 = load64_le(b + 208U); + ws[26U] = u26; + uint64_t u27 = load64_le(b + 216U); + ws[27U] = u27; + uint64_t u28 = load64_le(b + 224U); + ws[28U] = u28; + uint64_t u29 = load64_le(b + 232U); + ws[29U] = u29; + uint64_t u30 = load64_le(b + 240U); + ws[30U] = u30; + uint64_t u31 = load64_le(b + 248U); + ws[31U] = u31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[167U] = 0x80U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b1 = b4; + uint64_t u = load64_le(b1); + ws0[0U] = u; + uint64_t u32 = load64_le(b1 + 8U); + ws0[1U] = u32; + uint64_t u33 = load64_le(b1 + 16U); + ws0[2U] = u33; + uint64_t u34 = load64_le(b1 + 24U); + ws0[3U] = u34; + uint64_t u35 = load64_le(b1 + 32U); + ws0[4U] = u35; + uint64_t u36 = load64_le(b1 + 40U); + ws0[5U] = u36; + uint64_t u37 = load64_le(b1 + 48U); + ws0[6U] = u37; + uint64_t u38 = load64_le(b1 + 56U); + ws0[7U] = u38; + uint64_t u39 = load64_le(b1 + 64U); + ws0[8U] = u39; + uint64_t u40 = load64_le(b1 + 72U); + ws0[9U] = u40; + uint64_t u41 = load64_le(b1 + 80U); + ws0[10U] = u41; + uint64_t u42 = load64_le(b1 + 88U); + ws0[11U] = u42; + uint64_t u43 = load64_le(b1 + 96U); + ws0[12U] = u43; + uint64_t u44 = load64_le(b1 + 104U); + ws0[13U] = u44; + uint64_t u45 = load64_le(b1 + 112U); + ws0[14U] = u45; + uint64_t u46 = load64_le(b1 + 120U); + ws0[15U] = u46; + uint64_t u47 = load64_le(b1 + 128U); + ws0[16U] = u47; + uint64_t u48 = load64_le(b1 + 136U); + ws0[17U] = u48; + uint64_t u49 = load64_le(b1 + 144U); + ws0[18U] = u49; + uint64_t u50 = load64_le(b1 + 152U); + ws0[19U] = u50; + uint64_t u51 = load64_le(b1 + 160U); + ws0[20U] = u51; + uint64_t u52 = load64_le(b1 + 168U); + ws0[21U] = u52; + uint64_t u53 = load64_le(b1 + 176U); + ws0[22U] = u53; + uint64_t u54 = load64_le(b1 + 184U); + ws0[23U] = u54; + uint64_t u55 = load64_le(b1 + 192U); + ws0[24U] = u55; + uint64_t u56 = load64_le(b1 + 200U); + ws0[25U] = u56; + uint64_t u57 = load64_le(b1 + 208U); + ws0[26U] = u57; + uint64_t u58 = load64_le(b1 + 216U); + ws0[27U] = u58; + uint64_t u59 = load64_le(b1 + 224U); + ws0[28U] = u59; + uint64_t u60 = load64_le(b1 + 232U); + ws0[29U] = u60; + uint64_t u61 = load64_le(b1 + 240U); + ws0[30U] = u61; + uint64_t u62 = load64_le(b1 + 248U); + ws0[31U] = u62; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws0[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + state[0U] = state[0U] ^ c; + } +} + +void +Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, state, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } +} + diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c index b9bfcee5..9046f3db 100644 --- a/src/msvc/Hacl_Hash_SHA3_Simd256.c +++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c @@ -26,20 +26,19 @@ #include "Hacl_Hash_SHA3_Simd256.h" #include "internal/Hacl_Hash_SHA3_Scalar.h" -#include "libintvector.h" void Hacl_Hash_SHA3_Simd256_shake128( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -438,63 +437,63 @@ Hacl_Hash_SHA3_Simd256_shake128( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -723,57 +722,57 @@ Hacl_Hash_SHA3_Simd256_shake128( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -1295,62 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -1645,76 +1631,63 @@ Hacl_Hash_SHA3_Simd256_shake128( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_shake256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint32_t outputByteLen, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -2113,63 +2086,63 @@ Hacl_Hash_SHA3_Simd256_shake256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x1FU; - b13[rem] = 0x1FU; - b23[rem] = 0x1FU; - b33[rem] = 0x1FU; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -2398,57 +2371,57 @@ Hacl_Hash_SHA3_Simd256_shake256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -2970,62 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -3320,75 +3280,62 @@ Hacl_Hash_SHA3_Simd256_shake256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_224( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -3787,63 +3734,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -4072,57 +4019,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -4644,62 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -4994,75 +4928,62 @@ Hacl_Hash_SHA3_Simd256_sha3_224( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_256( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -5461,63 +5382,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -5746,57 +5667,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -6318,62 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -6668,75 +6576,62 @@ Hacl_Hash_SHA3_Simd256_sha3_256( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_384( - uint32_t inputByteLen, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, uint8_t *input0, uint8_t *input1, uint8_t *input2, uint8_t *input3, - uint8_t *output0, - uint8_t *output1, - uint8_t *output2, - uint8_t *output3 + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -7135,63 +7030,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -7420,57 +7315,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -7992,62 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -8342,75 +8224,62 @@ Hacl_Hash_SHA3_Simd256_sha3_384( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); } void Hacl_Hash_SHA3_Simd256_sha3_512( - uint32_t inputByteLen, - uint8_t *input0, - uint8_t *input1, - uint8_t *input2, - uint8_t *input3, uint8_t *output0, uint8_t *output1, uint8_t *output2, - uint8_t *output3 + uint8_t *output3, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen ) { K____uint8_t___uint8_t____K____uint8_t___uint8_t_ @@ -8809,63 +8678,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512( K____uint8_t___uint8_t____K____uint8_t___uint8_t_ b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; uint32_t rem1 = inputByteLen % rateInBytes; - uint8_t *b32 = ib.snd.snd.snd; - uint8_t *b22 = ib.snd.snd.fst; - uint8_t *b12 = ib.snd.fst; - uint8_t *b02 = ib.fst; + uint8_t *b31 = ib.snd.snd.snd; + uint8_t *b21 = ib.snd.snd.fst; + uint8_t *b11 = ib.snd.fst; + uint8_t *b01 = ib.fst; uint8_t *bl3 = b_.snd.snd.snd; uint8_t *bl2 = b_.snd.snd.fst; uint8_t *bl1 = b_.snd.fst; uint8_t *bl0 = b_.fst; - memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); - memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x06U; + b12[rem] = 0x06U; + b22[rem] = 0x06U; + b32[rem] = 0x06U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; uint8_t *b33 = b_.snd.snd.snd; uint8_t *b23 = b_.snd.snd.fst; uint8_t *b13 = b_.snd.fst; uint8_t *b03 = b_.fst; - b03[rem] = 0x06U; - b13[rem] = 0x06U; - b23[rem] = 0x06U; - b33[rem] = 0x06U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b34 = b_.snd.snd.snd; - uint8_t *b24 = b_.snd.snd.fst; - uint8_t *b14 = b_.snd.fst; - uint8_t *b04 = b_.fst; - ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04); - ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14); - ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24); - ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34); - ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U); - ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U); - ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U); - ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U); - ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U); - ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U); - ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U); - ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U); - ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U); - ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U); - ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U); - ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U); - ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U); - ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U); - ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U); - ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U); - ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U); - ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U); - ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U); - ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U); - ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U); - ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U); - ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U); - ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U); - ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U); - ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U); - ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U); - ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U); + ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U]; Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U]; Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U]; @@ -9094,57 +8963,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512( { s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]); } - uint8_t b05[256U] = { 0U }; - uint8_t b15[256U] = { 0U }; - uint8_t b25[256U] = { 0U }; - uint8_t b35[256U] = { 0U }; + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; K____uint8_t___uint8_t____K____uint8_t___uint8_t_ - b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } }; - uint8_t *b36 = b.snd.snd.snd; + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[rateInBytes - 1U] = 0x80U; + b15[rateInBytes - 1U] = 0x80U; + b25[rateInBytes - 1U] = 0x80U; + b35[rateInBytes - 1U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; uint8_t *b26 = b.snd.snd.fst; uint8_t *b16 = b.snd.fst; uint8_t *b06 = b.fst; - b06[rateInBytes - 1U] = 0x80U; - b16[rateInBytes - 1U] = 0x80U; - b26[rateInBytes - 1U] = 0x80U; - b36[rateInBytes - 1U] = 0x80U; - KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U }; - uint8_t *b37 = b.snd.snd.snd; - uint8_t *b27 = b.snd.snd.fst; - uint8_t *b17 = b.snd.fst; - uint8_t *b07 = b.fst; - ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07); - ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17); - ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27); - ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37); - ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U); - ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U); - ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U); - ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U); - ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U); - ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U); - ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U); - ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U); - ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U); - ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U); - ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U); - ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U); - ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U); - ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U); - ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U); - ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U); - ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U); - ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U); - ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U); - ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U); - ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U); - ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U); - ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U); - ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U); - ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U); - ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U); - ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U); - ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U); + ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06); + ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16); + ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26); + ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U); + ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U); + ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U); + ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U); + ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U); + ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U); + ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U); + ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U); + ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U); + ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U); + ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U); + ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U); + ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U); + ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U); + ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U); + ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U); + ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U); + ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U); + ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U); + ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U); + ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U); + ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U]; Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U]; Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U]; @@ -9666,62 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < rateInBytes / 32U; i++) - { - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = rateInBytes % 32U; - uint32_t j = rateInBytes / 32U; - uint8_t *b31 = rb.snd.snd.snd; - uint8_t *b21 = rb.snd.snd.fst; - uint8_t *b11 = rb.snd.fst; - uint8_t *b01 = rb.fst; - memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + uint8_t *b36 = rb.snd.snd.snd; + uint8_t *b2 = rb.snd.snd.fst; + uint8_t *b1 = rb.snd.fst; + uint8_t *b0 = rb.fst; + memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t)); + memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t)); + memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t)); + memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t)); for (uint32_t i1 = 0U; i1 < 24U; i1++) { KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; @@ -10016,61 +9872,1482 @@ Hacl_Hash_SHA3_Simd256_sha3_512( Lib_IntVector_Intrinsics_vec256 ws30 = v1__22; Lib_IntVector_Intrinsics_vec256 ws31 = v3__22; ws[0U] = ws0; - ws[1U] = ws1; - ws[2U] = ws2; - ws[3U] = ws3; - ws[4U] = ws4; - ws[5U] = ws5; - ws[6U] = ws6; - ws[7U] = ws7; - ws[8U] = ws8; - ws[9U] = ws9; - ws[10U] = ws10; - ws[11U] = ws11; - ws[12U] = ws12; - ws[13U] = ws13; - ws[14U] = ws14; - ws[15U] = ws15; - ws[16U] = ws16; - ws[17U] = ws17; - ws[18U] = ws18; - ws[19U] = ws19; - ws[20U] = ws20; - ws[21U] = ws21; - ws[22U] = ws22; - ws[23U] = ws23; - ws[24U] = ws24; - ws[25U] = ws25; - ws[26U] = ws26; - ws[27U] = ws27; - ws[28U] = ws28; - ws[29U] = ws29; - ws[30U] = ws30; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; ws[31U] = ws31; for (uint32_t i = 0U; i < 32U; i++) { Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); } - for (uint32_t i = 0U; i < remOut / 32U; i++) - { - uint8_t *b3 = rb.snd.snd.snd; - uint8_t *b2 = rb.snd.snd.fst; - uint8_t *b1 = rb.snd.fst; - uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t)); - } - uint32_t rem0 = remOut % 32U; - uint32_t j = remOut / 32U; - uint8_t *b3 = rb.snd.snd.snd; + uint8_t *b36 = rb.snd.snd.snd; uint8_t *b2 = rb.snd.snd.fst; uint8_t *b1 = rb.snd.fst; uint8_t *b0 = rb.fst; - memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t)); - memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t)); - memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t)); - memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t)); + memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); + memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t)); + memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t)); + memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t)); +} + +uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(100U, sizeof (uint64_t)); + return buf; +} + +void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++) + { + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t)); + memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t)); + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b_.snd.snd.snd; + uint8_t *b2 = b_.snd.snd.fst; + uint8_t *b1 = b_.snd.fst; + uint8_t *b0 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws1; + ws[2U] = ws2; + ws[3U] = ws3; + ws[4U] = ws4; + ws[5U] = ws5; + ws[6U] = ws6; + ws[7U] = ws7; + ws[8U] = ws8; + ws[9U] = ws9; + ws[10U] = ws10; + ws[11U] = ws11; + ws[12U] = ws12; + ws[13U] = ws13; + ws[14U] = ws14; + ws[15U] = ws15; + ws[16U] = ws16; + ws[17U] = ws17; + ws[18U] = ws18; + ws[19U] = ws19; + ws[20U] = ws20; + ws[21U] = ws21; + ws[22U] = ws22; + ws[23U] = ws23; + ws[24U] = ws24; + ws[25U] = ws25; + ws[26U] = ws26; + ws[27U] = ws27; + ws[28U] = ws28; + ws[29U] = ws29; + ws[30U] = ws30; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_absorb_final( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *input0, + uint8_t *input1, + uint8_t *input2, + uint8_t *input3, + uint32_t inputByteLen +) +{ + uint32_t rem = inputByteLen % 168U; + uint8_t b00[256U] = { 0U }; + uint8_t b10[256U] = { 0U }; + uint8_t b20[256U] = { 0U }; + uint8_t b30[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } }; + uint32_t rem1 = inputByteLen % 168U; + uint8_t *b01 = input0; + uint8_t *b11 = input1; + uint8_t *b21 = input2; + uint8_t *b31 = input3; + uint8_t *bl3 = b_.snd.snd.snd; + uint8_t *bl2 = b_.snd.snd.fst; + uint8_t *bl1 = b_.snd.fst; + uint8_t *bl0 = b_.fst; + memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t)); + uint8_t *b32 = b_.snd.snd.snd; + uint8_t *b22 = b_.snd.snd.fst; + uint8_t *b12 = b_.snd.fst; + uint8_t *b02 = b_.fst; + b02[rem] = 0x1FU; + b12[rem] = 0x1FU; + b22[rem] = 0x1FU; + b32[rem] = 0x1FU; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b33 = b_.snd.snd.snd; + uint8_t *b23 = b_.snd.snd.fst; + uint8_t *b13 = b_.snd.fst; + uint8_t *b03 = b_.fst; + ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03); + ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13); + ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23); + ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33); + ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U); + ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U); + ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U); + ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U); + ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U); + ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U); + ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U); + ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U); + ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U); + ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U); + ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U); + ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U); + ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U); + ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U); + ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U); + ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U); + ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U); + ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U); + ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U); + ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U); + ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U); + ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U); + ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U); + ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U); + ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U); + ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U); + ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U); + ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws00 = v0__; + Lib_IntVector_Intrinsics_vec256 ws110 = v2__; + Lib_IntVector_Intrinsics_vec256 ws210 = v1__; + Lib_IntVector_Intrinsics_vec256 ws32 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws40 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws50 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws60 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws70 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws80 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws90 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws100 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws111 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws120 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws130 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws140 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws150 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws160 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws170 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws180 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws190 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws200 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws211 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws220 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws230 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws240 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws250 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws260 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws270 = v3__5; + Lib_IntVector_Intrinsics_vec256 v07 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v17 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v27 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v37 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws280 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws290 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws300 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws310 = v3__6; + ws[0U] = ws00; + ws[1U] = ws110; + ws[2U] = ws210; + ws[3U] = ws32; + ws[4U] = ws40; + ws[5U] = ws50; + ws[6U] = ws60; + ws[7U] = ws70; + ws[8U] = ws80; + ws[9U] = ws90; + ws[10U] = ws100; + ws[11U] = ws111; + ws[12U] = ws120; + ws[13U] = ws130; + ws[14U] = ws140; + ws[15U] = ws150; + ws[16U] = ws160; + ws[17U] = ws170; + ws[18U] = ws180; + ws[19U] = ws190; + ws[20U] = ws200; + ws[21U] = ws211; + ws[22U] = ws220; + ws[23U] = ws230; + ws[24U] = ws240; + ws[25U] = ws250; + ws[26U] = ws260; + ws[27U] = ws270; + ws[28U] = ws280; + ws[29U] = ws290; + ws[30U] = ws300; + ws[31U] = ws310; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]); + } + uint8_t b04[256U] = { 0U }; + uint8_t b14[256U] = { 0U }; + uint8_t b24[256U] = { 0U }; + uint8_t b34[256U] = { 0U }; + K____uint8_t___uint8_t____K____uint8_t___uint8_t_ + b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } }; + uint8_t *b35 = b.snd.snd.snd; + uint8_t *b25 = b.snd.snd.fst; + uint8_t *b15 = b.snd.fst; + uint8_t *b05 = b.fst; + b05[167U] = 0x80U; + b15[167U] = 0x80U; + b25[167U] = 0x80U; + b35[167U] = 0x80U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t *b3 = b.snd.snd.snd; + uint8_t *b2 = b.snd.snd.fst; + uint8_t *b1 = b.snd.fst; + uint8_t *b0 = b.fst; + ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0); + ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1); + ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2); + ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3); + ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U); + ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U); + ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U); + ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U); + ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U); + ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U); + ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U); + ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U); + ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U); + ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U); + ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U); + ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U); + ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U); + ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U); + ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U); + ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U); + ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U); + ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U); + ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U); + ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U); + ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U); + ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U); + ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U); + ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U); + ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U); + ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U); + ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U); + ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U); + Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U]; + Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U]; + Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U]; + Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18); + Lib_IntVector_Intrinsics_vec256 + v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38); + Lib_IntVector_Intrinsics_vec256 + v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7); + Lib_IntVector_Intrinsics_vec256 + v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 + v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__7; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__7; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__7; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__7; + Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U]; + Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U]; + Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U]; + Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19); + Lib_IntVector_Intrinsics_vec256 + v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39); + Lib_IntVector_Intrinsics_vec256 + v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8); + Lib_IntVector_Intrinsics_vec256 + v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 + v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__8; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__8; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__8; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__8; + Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U]; + Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U]; + Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U]; + Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110); + Lib_IntVector_Intrinsics_vec256 + v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310); + Lib_IntVector_Intrinsics_vec256 + v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9); + Lib_IntVector_Intrinsics_vec256 + v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 + v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__9; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__9; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__9; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__9; + Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U]; + Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U]; + Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U]; + Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111); + Lib_IntVector_Intrinsics_vec256 + v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311); + Lib_IntVector_Intrinsics_vec256 + v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10); + Lib_IntVector_Intrinsics_vec256 + v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 + v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__10; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__10; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__10; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__10; + Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U]; + Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U]; + Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U]; + Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312); + Lib_IntVector_Intrinsics_vec256 + v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11); + Lib_IntVector_Intrinsics_vec256 + v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 + v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__11; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__11; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__11; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__11; + Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U]; + Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U]; + Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U]; + Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113); + Lib_IntVector_Intrinsics_vec256 + v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313); + Lib_IntVector_Intrinsics_vec256 + v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12); + Lib_IntVector_Intrinsics_vec256 + v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 + v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__12; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__12; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__12; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__12; + Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U]; + Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U]; + Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U]; + Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114); + Lib_IntVector_Intrinsics_vec256 + v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314); + Lib_IntVector_Intrinsics_vec256 + v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13); + Lib_IntVector_Intrinsics_vec256 + v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 + v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__13; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__13; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__13; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__13; + Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14); + Lib_IntVector_Intrinsics_vec256 + v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 + v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__14; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__14; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__14; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__14; + ws33[0U] = ws0; + ws33[1U] = ws1; + ws33[2U] = ws2; + ws33[3U] = ws3; + ws33[4U] = ws4; + ws33[5U] = ws5; + ws33[6U] = ws6; + ws33[7U] = ws7; + ws33[8U] = ws8; + ws33[9U] = ws9; + ws33[10U] = ws10; + ws33[11U] = ws11; + ws33[12U] = ws12; + ws33[13U] = ws13; + ws33[14U] = ws14; + ws33[15U] = ws15; + ws33[16U] = ws16; + ws33[17U] = ws17; + ws33[18U] = ws18; + ws33[19U] = ws19; + ws33[20U] = ws20; + ws33[21U] = ws21; + ws33[22U] = ws22; + ws33[23U] = ws23; + ws33[24U] = ws24; + ws33[25U] = ws25; + ws33[26U] = ws26; + ws33[27U] = ws27; + ws33[28U] = ws28; + ws33[29U] = ws29; + ws33[30U] = ws30; + ws33[31U] = ws31; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]); + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v015 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v115 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v215 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v315 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v015; + state[1U + 5U * i] = v115; + state[2U + 5U * i] = v215; + state[3U + 5U * i] = v315; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } +} + +void +Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks( + Lib_IntVector_Intrinsics_vec256 *state, + uint8_t *output0, + uint8_t *output1, + uint8_t *output2, + uint8_t *output3, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[1024U] = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U }; + memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 v00 = ws[0U]; + Lib_IntVector_Intrinsics_vec256 v10 = ws[1U]; + Lib_IntVector_Intrinsics_vec256 v20 = ws[2U]; + Lib_IntVector_Intrinsics_vec256 v30 = ws[3U]; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_); + Lib_IntVector_Intrinsics_vec256 + v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 + v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_); + Lib_IntVector_Intrinsics_vec256 ws0 = v0__; + Lib_IntVector_Intrinsics_vec256 ws1 = v2__; + Lib_IntVector_Intrinsics_vec256 ws2 = v1__; + Lib_IntVector_Intrinsics_vec256 ws3 = v3__; + Lib_IntVector_Intrinsics_vec256 v01 = ws[4U]; + Lib_IntVector_Intrinsics_vec256 v11 = ws[5U]; + Lib_IntVector_Intrinsics_vec256 v21 = ws[6U]; + Lib_IntVector_Intrinsics_vec256 v31 = ws[7U]; + Lib_IntVector_Intrinsics_vec256 + v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11); + Lib_IntVector_Intrinsics_vec256 + v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 ws4 = v0__0; + Lib_IntVector_Intrinsics_vec256 ws5 = v2__0; + Lib_IntVector_Intrinsics_vec256 ws6 = v1__0; + Lib_IntVector_Intrinsics_vec256 ws7 = v3__0; + Lib_IntVector_Intrinsics_vec256 v02 = ws[8U]; + Lib_IntVector_Intrinsics_vec256 v12 = ws[9U]; + Lib_IntVector_Intrinsics_vec256 v22 = ws[10U]; + Lib_IntVector_Intrinsics_vec256 v32 = ws[11U]; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32); + Lib_IntVector_Intrinsics_vec256 + v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec256 + v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 + v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec256 ws8 = v0__1; + Lib_IntVector_Intrinsics_vec256 ws9 = v2__1; + Lib_IntVector_Intrinsics_vec256 ws10 = v1__1; + Lib_IntVector_Intrinsics_vec256 ws11 = v3__1; + Lib_IntVector_Intrinsics_vec256 v03 = ws[12U]; + Lib_IntVector_Intrinsics_vec256 v13 = ws[13U]; + Lib_IntVector_Intrinsics_vec256 v23 = ws[14U]; + Lib_IntVector_Intrinsics_vec256 v33 = ws[15U]; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33); + Lib_IntVector_Intrinsics_vec256 + v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec256 + v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 + v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec256 ws12 = v0__2; + Lib_IntVector_Intrinsics_vec256 ws13 = v2__2; + Lib_IntVector_Intrinsics_vec256 ws14 = v1__2; + Lib_IntVector_Intrinsics_vec256 ws15 = v3__2; + Lib_IntVector_Intrinsics_vec256 v04 = ws[16U]; + Lib_IntVector_Intrinsics_vec256 v14 = ws[17U]; + Lib_IntVector_Intrinsics_vec256 v24 = ws[18U]; + Lib_IntVector_Intrinsics_vec256 v34 = ws[19U]; + Lib_IntVector_Intrinsics_vec256 + v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14); + Lib_IntVector_Intrinsics_vec256 + v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34); + Lib_IntVector_Intrinsics_vec256 + v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3); + Lib_IntVector_Intrinsics_vec256 + v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 + v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3); + Lib_IntVector_Intrinsics_vec256 ws16 = v0__3; + Lib_IntVector_Intrinsics_vec256 ws17 = v2__3; + Lib_IntVector_Intrinsics_vec256 ws18 = v1__3; + Lib_IntVector_Intrinsics_vec256 ws19 = v3__3; + Lib_IntVector_Intrinsics_vec256 v05 = ws[20U]; + Lib_IntVector_Intrinsics_vec256 v15 = ws[21U]; + Lib_IntVector_Intrinsics_vec256 v25 = ws[22U]; + Lib_IntVector_Intrinsics_vec256 v35 = ws[23U]; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35); + Lib_IntVector_Intrinsics_vec256 + v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4); + Lib_IntVector_Intrinsics_vec256 + v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 + v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4); + Lib_IntVector_Intrinsics_vec256 ws20 = v0__4; + Lib_IntVector_Intrinsics_vec256 ws21 = v2__4; + Lib_IntVector_Intrinsics_vec256 ws22 = v1__4; + Lib_IntVector_Intrinsics_vec256 ws23 = v3__4; + Lib_IntVector_Intrinsics_vec256 v06 = ws[24U]; + Lib_IntVector_Intrinsics_vec256 v16 = ws[25U]; + Lib_IntVector_Intrinsics_vec256 v26 = ws[26U]; + Lib_IntVector_Intrinsics_vec256 v36 = ws[27U]; + Lib_IntVector_Intrinsics_vec256 + v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16); + Lib_IntVector_Intrinsics_vec256 + v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36); + Lib_IntVector_Intrinsics_vec256 + v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 ws24 = v0__5; + Lib_IntVector_Intrinsics_vec256 ws25 = v2__5; + Lib_IntVector_Intrinsics_vec256 ws26 = v1__5; + Lib_IntVector_Intrinsics_vec256 ws27 = v3__5; + Lib_IntVector_Intrinsics_vec256 v0 = ws[28U]; + Lib_IntVector_Intrinsics_vec256 v1 = ws[29U]; + Lib_IntVector_Intrinsics_vec256 v2 = ws[30U]; + Lib_IntVector_Intrinsics_vec256 v3 = ws[31U]; + Lib_IntVector_Intrinsics_vec256 + v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1); + Lib_IntVector_Intrinsics_vec256 + v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3); + Lib_IntVector_Intrinsics_vec256 + v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6); + Lib_IntVector_Intrinsics_vec256 + v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 + v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6); + Lib_IntVector_Intrinsics_vec256 ws28 = v0__6; + Lib_IntVector_Intrinsics_vec256 ws29 = v2__6; + Lib_IntVector_Intrinsics_vec256 ws30 = v1__6; + Lib_IntVector_Intrinsics_vec256 ws31 = v3__6; + ws[0U] = ws0; + ws[1U] = ws4; + ws[2U] = ws8; + ws[3U] = ws12; + ws[4U] = ws16; + ws[5U] = ws20; + ws[6U] = ws24; + ws[7U] = ws28; + ws[8U] = ws1; + ws[9U] = ws5; + ws[10U] = ws9; + ws[11U] = ws13; + ws[12U] = ws17; + ws[13U] = ws21; + ws[14U] = ws25; + ws[15U] = ws29; + ws[16U] = ws2; + ws[17U] = ws6; + ws[18U] = ws10; + ws[19U] = ws14; + ws[20U] = ws18; + ws[21U] = ws22; + ws[22U] = ws26; + ws[23U] = ws30; + ws[24U] = ws3; + ws[25U] = ws7; + ws[26U] = ws11; + ws[27U] = ws15; + ws[28U] = ws19; + ws[29U] = ws23; + ws[30U] = ws27; + ws[31U] = ws31; + for (uint32_t i = 0U; i < 32U; i++) + { + Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]); + } + uint8_t *b0 = output0; + uint8_t *b1 = output1; + uint8_t *b2 = output2; + uint8_t *b3 = output3; + memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t)); + memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t)); + memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U]; + Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U]; + Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U]; + _C[i] = + Lib_IntVector_Intrinsics_vec256_xor(uu____0, + Lib_IntVector_Intrinsics_vec256_xor(uu____1, + Lib_IntVector_Intrinsics_vec256_xor(uu____2, + Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U]))));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U]; + Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U]; + Lib_IntVector_Intrinsics_vec256 + _D = + Lib_IntVector_Intrinsics_vec256_xor(uu____3, + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4, + 1U), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U))); + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D););); + Lib_IntVector_Intrinsics_vec256 x = state[1U]; + Lib_IntVector_Intrinsics_vec256 current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i]; + uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i]; + Lib_IntVector_Intrinsics_vec256 temp = state[_Y]; + Lib_IntVector_Intrinsics_vec256 uu____5 = current; + state[_Y] = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, + r), + Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r)); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v07 = + Lib_IntVector_Intrinsics_vec256_xor(uu____6, + Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v17 = + Lib_IntVector_Intrinsics_vec256_xor(uu____8, + Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v27 = + Lib_IntVector_Intrinsics_vec256_xor(uu____10, + Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v37 = + Lib_IntVector_Intrinsics_vec256_xor(uu____12, + Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i])); + Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i]; + Lib_IntVector_Intrinsics_vec256 + uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]); + Lib_IntVector_Intrinsics_vec256 + v4 = + Lib_IntVector_Intrinsics_vec256_xor(uu____14, + Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i])); + state[0U + 5U * i] = v07; + state[1U + 5U * i] = v17; + state[2U + 5U * i] = v27; + state[3U + 5U * i] = v37; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1]; + Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U]; + state[0U] = + Lib_IntVector_Intrinsics_vec256_xor(uu____16, + Lib_IntVector_Intrinsics_vec256_load64(c)); + } + } } diff --git a/tests/sha3.cc b/tests/sha3.cc index 858c1542..cc88a2e8 100644 --- a/tests/sha3.cc +++ b/tests/sha3.cc @@ -178,7 +178,7 @@ TEST(ApiSuite, ApiTest) uint8_t digest[HACL_HASH_SHA3_256_DIGEST_LENGTH]; - Hacl_Hash_SHA3_Scalar_sha3_256(message_size, (uint8_t*)message, digest); + Hacl_Hash_SHA3_Scalar_sha3_256(digest, (uint8_t*)message, message_size); // ANCHOR_END(example scalar_sha3_256) bytes expected_digest = from_hex( @@ -204,7 +204,7 @@ TEST(ApiSuite, ApiTest) uint8_t digest[42]; Hacl_Hash_SHA3_Scalar_shake128( - message_size, (uint8_t*)message, digest_size, digest); + digest, digest_size, (uint8_t*)message, message_size); // ANCHOR_END(example scalar_shake128) bytes expected_digest = @@ -232,15 +232,15 @@ TEST(ApiSuite, ApiTest) uint8_t digest2[HACL_HASH_SHA3_256_DIGEST_LENGTH]; uint8_t digest3[HACL_HASH_SHA3_256_DIGEST_LENGTH]; - Hacl_Hash_SHA3_Simd256_sha3_256(message_size, + Hacl_Hash_SHA3_Simd256_sha3_256(digest0, + digest1, + digest2, + digest3, (uint8_t*)message, (uint8_t*)message, (uint8_t*)message, (uint8_t*)message, - digest0, - digest1, - digest2, - digest3); + message_size); // ANCHOR_END(example vec256_sha3_256) bytes expected_digest = from_hex( @@ -283,16 +283,16 @@ TEST(ApiSuite, ApiTest) uint8_t digest2[42]; uint8_t digest3[42]; - Hacl_Hash_SHA3_Simd256_shake128(message_size, + Hacl_Hash_SHA3_Simd256_shake128(digest0, + digest1, + digest2, + digest3, + digest_size, (uint8_t*)message0, (uint8_t*)message1, (uint8_t*)message2, (uint8_t*)message3, - digest_size, - digest0, - digest1, - digest2, - digest3); + message_size); // ANCHOR_END(example vec256_shake128) bytes expected_digest0 = from_hex( @@ -356,16 +356,16 @@ TEST_P(Sha3KAT, TryKAT) bytes digest(test_case.md.size(), 0); if (test_case.md.size() == 224 / 8) { Hacl_Hash_SHA3_Scalar_sha3_224( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 256 / 8) { Hacl_Hash_SHA3_Scalar_sha3_256( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 384 / 8) { Hacl_Hash_SHA3_Scalar_sha3_384( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } else if (test_case.md.size() == 512 / 8) { Hacl_Hash_SHA3_Scalar_sha3_512( - test_case.msg.size(), test_case.msg.data(), digest.data()); + digest.data(), test_case.msg.data(), test_case.msg.size()); } EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl @@ -380,45 +380,45 @@ TEST_P(Sha3KAT, TryKAT) bytes digest2(test_case.md.size(), 0); bytes digest3(test_case.md.size(), 0); if (test_case.md.size() == 224 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_224(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_sha3_224(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 256 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_256(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 256 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_256(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 384 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_384(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 384 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_384(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), + test_case.msg.size()); + } else if (test_case.md.size() == 512 / 8) { + Hacl_Hash_SHA3_Simd256_sha3_512(digest0.data(), digest1.data(), digest2.data(), - digest3.data()); - } else if (test_case.md.size() == 512 / 8) { - Hacl_Hash_SHA3_Simd256_sha3_512(test_case.msg.size(), + digest3.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); } EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl @@ -468,20 +468,20 @@ TEST_P(ShakeKAT, TryKAT) if (test_case.md.size() == 128 / 8) { bytes digest(test_case.md.size(), 128 / 8); - Hacl_Hash_SHA3_Scalar_shake128(test_case.msg.size(), - test_case.msg.data(), + Hacl_Hash_SHA3_Scalar_shake128(digest.data(), digest.size(), - digest.data()); + test_case.msg.data(), + test_case.msg.size()); EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl << bytes_to_hex(digest) << std::endl; } else if (test_case.md.size() == 256 / 8) { bytes digest(test_case.md.size(), 256 / 8); - Hacl_Hash_SHA3_Scalar_shake256(test_case.msg.size(), - test_case.msg.data(), + Hacl_Hash_SHA3_Scalar_shake256(digest.data(), digest.size(), - digest.data()); + test_case.msg.data(), + test_case.msg.size()); EXPECT_EQ(test_case.md, digest) << bytes_to_hex(test_case.md) << std::endl << bytes_to_hex(digest) << std::endl; @@ -497,16 +497,16 @@ TEST_P(ShakeKAT, TryKAT) bytes digest2(test_case.md.size(), 128 / 8); bytes digest3(test_case.md.size(), 128 / 8); - Hacl_Hash_SHA3_Simd256_shake128(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_shake128(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), + digest0.size(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.size(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl @@ -526,16 +526,16 @@ TEST_P(ShakeKAT, TryKAT) bytes digest2(test_case.md.size(), 256 / 8); bytes digest3(test_case.md.size(), 256 / 8); - Hacl_Hash_SHA3_Simd256_shake256(test_case.msg.size(), + Hacl_Hash_SHA3_Simd256_shake256(digest0.data(), + digest1.data(), + digest2.data(), + digest3.data(), + digest0.size(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), test_case.msg.data(), - digest0.size(), - digest0.data(), - digest1.data(), - digest2.data(), - digest3.data()); + test_case.msg.size()); EXPECT_EQ(test_case.md, digest0) << bytes_to_hex(test_case.md) << std::endl