diff --git a/.gitmodules b/.gitmodules index a9caf6a..79292c9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "subtle"] path = subtle url = https://github.com/itzmeanjan/subtle.git +[submodule "dudect"] + path = dudect + url = https://github.com/oreparaz/dudect.git diff --git a/Makefile b/Makefile index 3088594..2f32eca 100644 --- a/Makefile +++ b/Makefile @@ -8,22 +8,28 @@ UBSAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanit SHA3_INC_DIR = ./sha3/include SUBTLE_INC_DIR = ./subtle/include +DUDECT_INC_DIR = ./dudect/src I_FLAGS = -I ./include DEP_IFLAGS = -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR) +DUDECT_DEP_IFLAGS = $(DEP_IFLAGS) -I $(DUDECT_INC_DIR) SRC_DIR = include KYBER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp) BUILD_DIR = build +DUDECT_BUILD_DIR = $(BUILD_DIR)/dudect ASAN_BUILD_DIR = $(BUILD_DIR)/asan UBSAN_BUILD_DIR = $(BUILD_DIR)/ubsan TEST_DIR = tests +DUDECT_TEST_DIR = $(TEST_DIR)/dudect TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) +DUDECT_TEST_SOURCES := $(wildcard $(DUDECT_TEST_DIR)/*.cpp) TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) UBSAN_TEST_OBJECTS := $(addprefix $(UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) TEST_LINK_FLAGS = -lgtest -lgtest_main TEST_BINARY = $(BUILD_DIR)/test.out +DUDECT_TEST_BINARIES := $(addprefix $(DUDECT_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.out,$(DUDECT_TEST_SOURCES)))) ASAN_TEST_BINARY = $(ASAN_BUILD_DIR)/test.out UBSAN_TEST_BINARY = $(UBSAN_BUILD_DIR)/test.out @@ -37,6 +43,9 @@ PERF_BINARY = $(BUILD_DIR)/perf.out all: test +$(DUDECT_BUILD_DIR): + mkdir -p $@ + $(ASAN_BUILD_DIR): mkdir -p $@ @@ -49,6 +58,8 @@ $(BUILD_DIR): $(SHA3_INC_DIR): git submodule update --init +$(DUDECT_INC_DIR): $(SHA3_INC_DIR) + $(SUBTLE_INC_DIR): $(SHA3_INC_DIR) $(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) @@ -63,6 +74,9 @@ $(UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(S $(TEST_BINARY): $(TEST_OBJECTS) $(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ +$(DUDECT_BUILD_DIR)/%.out: $(DUDECT_TEST_DIR)/%.cpp $(DUDECT_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) $(DUDECT_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DUDECT_DEP_IFLAGS) $(LINK_FLAGS) $< -o $@ + $(ASAN_TEST_BINARY): $(ASAN_TEST_OBJECTS) $(CXX) $(ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ @@ -72,6 +86,9 @@ $(UBSAN_TEST_BINARY): $(UBSAN_TEST_OBJECTS) test: $(TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 +dudect_test: $(DUDECT_TEST_BINARIES) + $(foreach binary,$^,timeout 3.0m ./$(binary);) + asan_test: $(ASAN_TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 @@ -100,5 +117,5 @@ perf: $(PERF_BINARY) clean: rm -rf $(BUILD_DIR) -format: $(KYBER_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES) +format: $(KYBER_SOURCES) $(TEST_SOURCES) $(DUDECT_TEST_SOURCES) $(BENCHMARK_SOURCES) clang-format -i $^ diff --git a/README.md b/README.md index f4349fd..884c70c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ > [!CAUTION] -> This Kyber implementation is conformant with Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf and I also *try* to make it constant-time but be informed that it is not yet audited. *If you consider using it in production, be careful !* +> This Kyber implementation is conformant with Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf and I also *try* to make it timing leakage free, using **dudect** (see https://github.com/oreparaz/dudect) but be informed that it is not yet audited. *If you consider using it in production, be careful !* # kyber CRYSTALS-Kyber: Post-Quantum Public-key Encryption & Key-establishment Algorithm @@ -10,7 +10,7 @@ Kyber is being standardized by NIST as post-quantum secure key encapsulation mec Kyber offers an *IND-CCA2-secure* Key Encapsulation Mechanism - its security is based on the hardness of solving the learning-with-errors (LWE) problem in module (i.e. structured) lattices. -Kyber Key Encapsulation Mechanism is built on top of *IND-CPA-secure Kyber Public Key Encryption*, where two communicating parties, both generating their key pairs, while publishing their public keys to each other, can encrypt fixed length ( = 32 -bytes ) message using peer's public key. Cipher text can be decrypted by corresponding secret key ( which is private to the keypair owner ) and 32 -bytes message can be recovered back. Then a slightly tweaked Fujisaki–Okamoto (FO) transform is applied on *IND-CPA-secure Kyber PKE* - giving us the *IND-CCA2-secure KEM* construction. In KEM scheme, two parties interested in establishing a secure communication channel over public & insecure channel, can generate a shared secret key ( of arbitrary byte length ) from a key derivation function ( i.e. KDF which is SHAKE256 Xof in this context ) which is obtained by both of these parties as result of seeding SHAKE256 Xof with same secret. This secret is 32 -bytes and that's what is communicated by sender to receiver using underlying Kyber PKE scheme. +Kyber Key Encapsulation Mechanism is built on top of *IND-CPA-secure Kyber Public Key Encryption*, where two communicating parties, both generating their key pairs, while publishing only their public keys to each other, can encrypt fixed length ( = 32 -bytes ) message using peer's public key. Cipher text can be decrypted by corresponding secret key ( which is private to the keypair owner ) and 32 -bytes message can be recovered back. Then a slightly tweaked Fujisaki–Okamoto (FO) transform is applied on *IND-CPA-secure Kyber PKE* - giving us the *IND-CCA2-secure KEM* construction. In KEM scheme, two parties interested in establishing a secure communication channel over public & insecure channel, can generate a shared secret key ( of arbitrary byte length ) from a key derivation function ( i.e. KDF which is SHAKE256 Xof in this context ) which is obtained by both of these parties as result of seeding SHAKE256 Xof with same secret. This secret is 32 -bytes and that's what is communicated by sender to receiver using underlying Kyber PKE scheme. Algorithm | Input | Output --- | :-: | --: @@ -18,14 +18,11 @@ KEM KeyGen | - | Public Key and Secret Key Encapsulation | Public Key | Cipher Text and SHAKE256 KDF Decapsulation | Secret Key and Cipher Text | SHAKE256 KDF -Here I'm maintaining `kyber` - a header-only and easy-to-use ( see more in [usage](#usage) ) C++ library implementing Kyber KEM, supporting Kyber-{512, 768, 1024} parameter sets, as defined in table 1 of Kyber specification. `sha3` and `subtle` are two dependencies of this library, which are pinned to specific git commits, using git submodule. +Here I'm maintaining `kyber` - a header-only and easy-to-use ( see more in [usage](#usage) ) C++ library implementing Kyber KEM, supporting Kyber-{512, 768, 1024} parameter sets, as defined in table 1 of Kyber specification. `sha3`, `subtle` and `dudect` (for timing leakage tests) are dependencies of this library, which are pinned to specific git commits, using git submodule. > [!NOTE] > Find Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf - this is the document that I followed when implementing Kyber. I suggest you go through the specification to get an in-depth understanding of Kyber PQC suite. -> [!NOTE] -> Find progress of NIST PQC standardization effort @ https://csrc.nist.gov/projects/post-quantum-cryptography. - ## Prerequisites - A C++ compiler with C++20 standard library such as `clang++`/ `g++`. @@ -74,37 +71,99 @@ make ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled ``` ```bash -Note: Randomizing tests' orders with a seed of 61247 . +Note: Randomizing tests' orders with a seed of 50193 . [==========] Running 10 tests from 1 test suite. [----------] Global test environment set-up. [----------] 10 tests from KyberKEM [ RUN ] KyberKEM.ArithmeticOverZq -[ OK ] KyberKEM.ArithmeticOverZq (116 ms) -[ RUN ] KyberKEM.NumberTheoreticTransform -[ OK ] KyberKEM.NumberTheoreticTransform (0 ms) +[ OK ] KyberKEM.ArithmeticOverZq (126 ms) +[ RUN ] KyberKEM.Kyber768KeygenEncapsDecaps +[ OK ] KyberKEM.Kyber768KeygenEncapsDecaps (0 ms) +[ RUN ] KyberKEM.Kyber512KeygenEncapsDecaps +[ OK ] KyberKEM.Kyber512KeygenEncapsDecaps (0 ms) [ RUN ] KyberKEM.Kyber768KnownAnswerTests [ OK ] KyberKEM.Kyber768KnownAnswerTests (8 ms) [ RUN ] KyberKEM.Kyber512KnownAnswerTests [ OK ] KyberKEM.Kyber512KnownAnswerTests (5 ms) [ RUN ] KyberKEM.CompressDecompressZq -[ OK ] KyberKEM.CompressDecompressZq (94 ms) +[ OK ] KyberKEM.CompressDecompressZq (98 ms) [ RUN ] KyberKEM.Kyber1024KnownAnswerTests [ OK ] KyberKEM.Kyber1024KnownAnswerTests (13 ms) -[ RUN ] KyberKEM.Kyber768KeygenEncapsDecaps -[ OK ] KyberKEM.Kyber768KeygenEncapsDecaps (0 ms) -[ RUN ] KyberKEM.Kyber512KeygenEncapsDecaps -[ OK ] KyberKEM.Kyber512KeygenEncapsDecaps (0 ms) +[ RUN ] KyberKEM.NumberTheoreticTransform +[ OK ] KyberKEM.NumberTheoreticTransform (0 ms) [ RUN ] KyberKEM.PolynomialSerialization [ OK ] KyberKEM.PolynomialSerialization (0 ms) [ RUN ] KyberKEM.Kyber1024KeygenEncapsDecaps [ OK ] KyberKEM.Kyber1024KeygenEncapsDecaps (0 ms) -[----------] 10 tests from KyberKEM (238 ms total) +[----------] 10 tests from KyberKEM (253 ms total) [----------] Global test environment tear-down -[==========] 10 tests from 1 test suite ran. (238 ms total) +[==========] 10 tests from 1 test suite ran. (253 ms total) [ PASSED ] 10 tests. ``` +In case you're interested in running timing leakage tests using `dudect`, execute following + +> ![NOTE] +> `dudect` is integrated into this library implementation of Kyber KEM to find any sort of timing leakages. It checks for constant-timeness of all *vital* functions including Fujisaki-Okamoto transform, used in decapsulation step. It doesn't check constant-timeness of function which samples public matrix `A`, because that fails the check anyway, due to use of uniform rejection sampling. As matrix `A` is public, it's not critical that it must be *strictly* constant-time. + +```bash +make dudect_test -j # Only on x86_64 machine + # Each executable is run for at max 3 mins. +``` + +> ![TIP] +> `dudect` documentation says if `t` statistic is < 10, we're *probably* good, yes **probably**. You may want to read `dudect` documentation @ https://github.com/oreparaz/dudect. Also you might find the original paper @ https://ia.cr/2016/1123 interesting. + +```bash +meas: 0.10 M, max t: +2.35, max tau: 7.27e-03, (5/tau)^2: 4.73e+05. For the moment, maybe constant time. +meas: 0.12 M, max t: +1.89, max tau: 5.57e-03, (5/tau)^2: 8.06e+05. For the moment, maybe constant time. +meas: 3.10 M, max t: +2.48, max tau: 1.41e-03, (5/tau)^2: 1.26e+07. For the moment, maybe constant time. +meas: 2.07 M, max t: +1.72, max tau: 1.20e-03, (5/tau)^2: 1.75e+07. For the moment, maybe constant time. +meas: 2.10 M, max t: +1.66, max tau: 1.14e-03, (5/tau)^2: 1.91e+07. For the moment, maybe constant time. +meas: 6.01 M, max t: +1.67, max tau: 6.82e-04, (5/tau)^2: 5.37e+07. For the moment, maybe constant time. +meas: 7.31 M, max t: +1.67, max tau: 6.18e-04, (5/tau)^2: 6.54e+07. For the moment, maybe constant time. +meas: 7.96 M, max t: +2.04, max tau: 7.22e-04, (5/tau)^2: 4.80e+07. For the moment, maybe constant time. +meas: 9.41 M, max t: +1.70, max tau: 5.54e-04, (5/tau)^2: 8.14e+07. For the moment, maybe constant time. +meas: 9.89 M, max t: +1.59, max tau: 5.05e-04, (5/tau)^2: 9.78e+07. For the moment, maybe constant time. +meas: 0.99 M, max t: +2.16, max tau: 2.18e-03, (5/tau)^2: 5.28e+06. For the moment, maybe constant time. +meas: 0.14 M, max t: +2.04, max tau: 5.44e-03, (5/tau)^2: 8.45e+05. For the moment, maybe constant time. +meas: 2.31 M, max t: +2.90, max tau: 1.90e-03, (5/tau)^2: 6.89e+06. For the moment, maybe constant time. +meas: 3.03 M, max t: +3.55, max tau: 2.04e-03, (5/tau)^2: 5.99e+06. For the moment, maybe constant time. +meas: 3.56 M, max t: +3.23, max tau: 1.71e-03, (5/tau)^2: 8.56e+06. For the moment, maybe constant time. +meas: 4.18 M, max t: +2.42, max tau: 1.18e-03, (5/tau)^2: 1.78e+07. For the moment, maybe constant time. +meas: 7.16 M, max t: +2.40, max tau: 8.96e-04, (5/tau)^2: 3.12e+07. For the moment, maybe constant time. +meas: 8.25 M, max t: +2.21, max tau: 7.68e-04, (5/tau)^2: 4.24e+07. For the moment, maybe constant time. +meas: 9.20 M, max t: +2.27, max tau: 7.48e-04, (5/tau)^2: 4.47e+07. For the moment, maybe constant time. +meas: 10.23 M, max t: +2.45, max tau: 7.66e-04, (5/tau)^2: 4.26e+07. For the moment, maybe constant time. +meas: 6.93 M, max t: +2.54, max tau: 9.65e-04, (5/tau)^2: 2.69e+07. For the moment, maybe constant time. +meas: 7.49 M, max t: +2.54, max tau: 9.30e-04, (5/tau)^2: 2.89e+07. For the moment, maybe constant time. +meas: 8.04 M, max t: +2.16, max tau: 7.61e-04, (5/tau)^2: 4.32e+07. For the moment, maybe constant time. +meas: 8.57 M, max t: +2.08, max tau: 7.10e-04, (5/tau)^2: 4.96e+07. For the moment, maybe constant time. +meas: 9.15 M, max t: +2.03, max tau: 6.72e-04, (5/tau)^2: 5.54e+07. For the moment, maybe constant time. +meas: 0.15 M, max t: +1.80, max tau: 4.60e-03, (5/tau)^2: 1.18e+06. For the moment, maybe constant time. +meas: 8.04 M, max t: +1.90, max tau: 6.70e-04, (5/tau)^2: 5.57e+07. For the moment, maybe constant time. +meas: 10.31 M, max t: +2.04, max tau: 6.35e-04, (5/tau)^2: 6.20e+07. For the moment, maybe constant time. +meas: 10.38 M, max t: +2.05, max tau: 6.35e-04, (5/tau)^2: 6.19e+07. For the moment, maybe constant time. +meas: 9.19 M, max t: +1.99, max tau: 6.56e-04, (5/tau)^2: 5.80e+07. For the moment, maybe constant time. +meas: 9.24 M, max t: +2.04, max tau: 6.69e-04, (5/tau)^2: 5.58e+07. For the moment, maybe constant time. +meas: 1.02 M, max t: +1.98, max tau: 1.97e-03, (5/tau)^2: 6.47e+06. For the moment, maybe constant time. +meas: 2.10 M, max t: +2.10, max tau: 1.45e-03, (5/tau)^2: 1.19e+07. For the moment, maybe constant time. +meas: 1.40 M, max t: +1.81, max tau: 1.52e-03, (5/tau)^2: 1.08e+07. For the moment, maybe constant time. +meas: 1.41 M, max t: +2.21, max tau: 1.86e-03, (5/tau)^2: 7.22e+06. For the moment, maybe constant time. +meas: 1.81 M, max t: +2.95, max tau: 2.19e-03, (5/tau)^2: 5.20e+06. For the moment, maybe constant time. +meas: 2.54 M, max t: +2.96, max tau: 1.86e-03, (5/tau)^2: 7.26e+06. For the moment, maybe constant time. +meas: 3.15 M, max t: +2.77, max tau: 1.56e-03, (5/tau)^2: 1.02e+07. For the moment, maybe constant time. +meas: 4.94 M, max t: +2.46, max tau: 1.11e-03, (5/tau)^2: 2.04e+07. For the moment, maybe constant time. +meas: 0.91 M, max t: +2.06, max tau: 2.17e-03, (5/tau)^2: 5.32e+06. For the moment, maybe constant time. +meas: 1.21 M, max t: +2.19, max tau: 1.99e-03, (5/tau)^2: 6.32e+06. For the moment, maybe constant time. +meas: 1.44 M, max t: +2.24, max tau: 1.87e-03, (5/tau)^2: 7.17e+06. For the moment, maybe constant time. +meas: 8.74 M, max t: +2.32, max tau: 7.87e-04, (5/tau)^2: 4.04e+07. For the moment, maybe constant time. +meas: 9.65 M, max t: +2.42, max tau: 7.80e-04, (5/tau)^2: 4.11e+07. For the moment, maybe constant time. +meas: 10.57 M, max t: +2.22, max tau: 6.82e-04, (5/tau)^2: 5.38e+07. For the moment, maybe constant time. +meas: 11.71 M, max t: +2.45, max tau: 7.16e-04, (5/tau)^2: 4.88e+07. For the moment, maybe constant time. +``` + ## Benchmarking For benchmarking Kyber KEM routines ( i.e. keygen, encaps and decaps ) for various suggested parameter sets, you have to issue. diff --git a/benchmarks/bench_kem.cpp b/benchmarks/bench_kem.cpp index 30f68e2..ce354ed 100644 --- a/benchmarks/bench_kem.cpp +++ b/benchmarks/bench_kem.cpp @@ -8,8 +8,8 @@ void bench_keygen(benchmark::State& state) { constexpr size_t slen = 32; - constexpr size_t pklen = kyber_utils::get_kem_public_key_len(); - constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(); + constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k); + constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k); std::vector d(slen); std::vector z(slen); @@ -44,9 +44,9 @@ void bench_encapsulate(benchmark::State& state) { constexpr size_t slen = 32; - constexpr size_t pklen = kyber_utils::get_kem_public_key_len(); - constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(); - constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(); + constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k); + constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k); + constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv); constexpr size_t klen = 32; std::vector d(slen); @@ -94,9 +94,9 @@ void bench_decapsulate(benchmark::State& state) { constexpr size_t slen = 32; - constexpr size_t pklen = kyber_utils::get_kem_public_key_len(); - constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(); - constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(); + constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k); + constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k); + constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv); constexpr size_t klen = 32; std::vector d(slen); diff --git a/dudect b/dudect new file mode 160000 index 0000000..04235f7 --- /dev/null +++ b/dudect @@ -0,0 +1 @@ +Subproject commit 04235f7f2ba29006c5e6eb3b4c58d7c45ad8dcb1 diff --git a/include/compression.hpp b/include/compression.hpp index 8c3a2b0..36f3a19 100644 --- a/include/compression.hpp +++ b/include/compression.hpp @@ -12,20 +12,23 @@ namespace kyber_utils { // // See top of page 5 of Kyber specification // https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf +// +// Following implementation collects inspiration from https://github.com/FiloSottile/mlkem768/blob/cffbfb96c407b3cfc9f6e1749475b673794402c1/mlkem768.go#L395-L425. template static inline constexpr field::zq_t compress(const field::zq_t x) requires(kyber_params::check_d(d)) { - constexpr uint16_t t0 = 1u << d; - constexpr uint32_t t1 = field::Q >> 1; + constexpr uint16_t mask = (1u << d) - 1; - const uint32_t t2 = x.raw() << d; - const uint32_t t3 = t2 + t1; - const uint16_t t4 = static_cast(t3 / field::Q); - const uint16_t t5 = t4 & (t0 - 1); + const auto dividend = x.raw() << d; + const auto quotient0 = static_cast((static_cast(dividend) * field::R) >> (field::RADIX_BIT_WIDTH * 2)); + const auto remainder = dividend - quotient0 * field::Q; + + const auto quotient1 = quotient0 + ((((field::Q / 2) - remainder) >> 31) & 1); + const auto quotient2 = quotient1 + (((field::Q + (field::Q / 2) - remainder) >> 31) & 1); - return field::zq_t(t5); + return field::zq_t(static_cast(quotient2) & mask); } // Given an element x ∈ [0, 2^d) | d < round(log2(q)), this routine decompresses diff --git a/include/field.hpp b/include/field.hpp index fe28f0f..a369a42 100644 --- a/include/field.hpp +++ b/include/field.hpp @@ -47,11 +47,8 @@ struct zq_t // Modulo addition of two Zq elements. inline constexpr zq_t operator+(const zq_t rhs) const { - const uint32_t t0 = this->v + rhs.v; - const auto mask = -static_cast(t0 >= Q); - const uint32_t t1 = t0 - (mask & Q); - - return zq_t(t1); + const uint32_t t = this->v + rhs.v; + return zq_t(reduce_once(t)); } // Compound modulo addition of two Zq elements. @@ -132,10 +129,8 @@ struct zq_t // Note, v is always kept in its canonical form i.e. v ∈ [0, Q). uint32_t v = 0u; - // Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, Q*Q), this routine - // can be invoked for reducing `v` modulo Q, using barrett reduction - // technique, following algorithm description @ - // https://www.nayuki.io/page/barrett-reduction-algorithm. + // Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, Q*Q), this routine can be invoked for reducing `v` modulo Q, using + // barrett reduction technique, following algorithm description @ https://www.nayuki.io/page/barrett-reduction-algorithm. static inline constexpr uint32_t barrett_reduce(const uint32_t v) { const uint64_t t0 = static_cast(v) * static_cast(R); @@ -143,10 +138,18 @@ struct zq_t const uint32_t t2 = t1 * Q; const uint32_t t = v - t2; - const auto mask = -static_cast(t >= Q); - const uint32_t t_prime = t - (mask & Q); + return reduce_once(t); + } + + // Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, 2*Q), this routine can be invoked for reducing `v` modulo prime Q. + static inline constexpr uint32_t reduce_once(const uint32_t v) + { + const uint32_t t0 = v - Q; + const uint32_t t1 = -(t0 >> 31); + const uint32_t t2 = Q & t1; + const uint32_t t3 = t0 + t2; - return t_prime; + return t3; } }; diff --git a/include/kem.hpp b/include/kem.hpp index f604f07..bce4cae 100644 --- a/include/kem.hpp +++ b/include/kem.hpp @@ -3,9 +3,9 @@ #include "sha3_256.hpp" #include "sha3_512.hpp" #include "shake256.hpp" -#include "subtle.hpp" #include "utils.hpp" #include +#include // IND-CCA2-secure Key Encapsulation Mechanism namespace kem { @@ -29,8 +29,8 @@ template static inline void keygen(std::span d, // used in CPA-PKE std::span z, // used in CCA-KEM - std::span()> pubkey, - std::span()> seckey) + std::span pubkey, + std::span seckey) requires(kyber_params::check_keygen_params(k, eta1)) { constexpr size_t skoff0 = k * 12 * 32; @@ -76,8 +76,8 @@ keygen(std::span d, // used in CPA-PKE template static inline shake256::shake256_t encapsulate(std::span m, - std::span()> pubkey, - std::span()> cipher) + std::span pubkey, + std::span cipher) requires(kyber_params::check_encap_params(k, eta1, eta2, du, dv)) { std::array g_in{}; @@ -144,7 +144,7 @@ encapsulate(std::span m, // https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf template static inline shake256::shake256_t -decapsulate(std::span()> seckey, std::span()> cipher) +decapsulate(std::span seckey, std::span cipher) requires(kyber_params::check_decap_params(k, eta1, eta2, du, dv)) { constexpr size_t sklen = k * 12 * 32; @@ -189,14 +189,9 @@ decapsulate(std::span()> s pke::encrypt(pubkey, _g_in0, _g_out1, c_prime); // line 7-11 of algorithm 9, in constant-time - uint32_t flg = -1u; - for (size_t i = 0; i < ctlen; i++) { - flg &= subtle::ct_eq(cipher[i], c_prime[i]); - } - - for (size_t i = 0; i < 32; i++) { - kdf_in[i] = subtle::ct_select(flg, g_out[i], z[i]); - } + using kdf_t = std::span; + const uint32_t cond = kyber_utils::ct_memcmp(cipher, std::span(c_prime)); + kyber_utils::ct_cond_memcpy(cond, _kdf_in0, kdf_t(_g_out0), kdf_t(z)); sha3_256::sha3_256_t h256; h256.absorb(cipher); diff --git a/include/kyber1024_kem.hpp b/include/kyber1024_kem.hpp index fbd5348..852764e 100644 --- a/include/kyber1024_kem.hpp +++ b/include/kyber1024_kem.hpp @@ -16,13 +16,13 @@ constexpr size_t du = 11; constexpr size_t dv = 5; // = 1568 -bytes Kyber1024 public key -constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(); +constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k); // = 3168 -bytes Kyber1024 secret key -constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(); +constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k); // = 1568 -bytes Kyber1024 cipher text length -constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(); +constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv); // Computes a new Kyber1024 KEM keypair s.t. public key is 1568 -bytes and // secret key is 3168 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32 diff --git a/include/kyber512_kem.hpp b/include/kyber512_kem.hpp index 3cbc1ba..3da9362 100644 --- a/include/kyber512_kem.hpp +++ b/include/kyber512_kem.hpp @@ -16,13 +16,13 @@ constexpr size_t du = 10; constexpr size_t dv = 4; // = 800 -bytes Kyber512 public key -constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(); +constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k); // = 1632 -bytes Kyber512 secret key -constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(); +constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k); // = 768 -bytes Kyber512 cipher text length -constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(); +constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv); // Computes a new Kyber512 KEM keypair s.t. public key is 800 -bytes and secret // key is 1632 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32 -bytes diff --git a/include/kyber768_kem.hpp b/include/kyber768_kem.hpp index 92f20d6..9fa09f4 100644 --- a/include/kyber768_kem.hpp +++ b/include/kyber768_kem.hpp @@ -15,13 +15,13 @@ constexpr size_t du = 10; constexpr size_t dv = 4; // = 1184 -bytes Kyber768 public key -constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(); +constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k); // = 2400 -bytes Kyber768 secret key -constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(); +constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k); // = 1088 -bytes Kyber768 cipher text length -constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(); +constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv); // Computes a new Kyber768 KEM keypair s.t. public key is 1184 -bytes and secret // key is 2400 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32 -bytes diff --git a/include/utils.hpp b/include/utils.hpp index ad6766c..0c58bca 100644 --- a/include/utils.hpp +++ b/include/utils.hpp @@ -1,5 +1,6 @@ #pragma once #include "params.hpp" +#include "subtle.hpp" #include #include #include @@ -12,6 +13,32 @@ // IND-CPA-secure Public Key Encryption Scheme Utilities namespace kyber_utils { +// Given two byte arrays of equal length, this routine can be used for comparing them in constant-time, +// producing truth value (0xffffffff) in case of equality, otherwise it returns false value (0x00000000). +template +static inline uint32_t +ct_memcmp(std::span bytes0, std::span bytes1) +{ + uint32_t flag = -1u; + for (size_t i = 0; i < n; i++) { + flag &= subtle::ct_eq(bytes0[i], bytes1[i]); + } + + return flag; +} + +// Given a branch value, taking either 0x00000000 (false value) or 0xffffffff (truth value), this routine can be used for conditionally +// copying bytes from either `source0` byte array (in case branch holds truth value) or `source1` byte array (if branch holds false value) +// to `sink` byte array, all in constant-time. Note, all these byte arrays are of equal length. +template +static inline void +ct_cond_memcpy(const uint32_t cond, std::span sink, std::span source0, std::span source1) +{ + for (size_t i = 0; i < n; i++) { + sink[i] = subtle::ct_select(cond, source0[i], source1[i]); + } +} + // Given a bytearray of length N, this function converts it to human readable // hex string of length N << 1 | N >= 0 inline const std::string @@ -54,31 +81,25 @@ from_hex(std::string_view bytes) } // Compile-time compute IND-CCA-secure Kyber KEM public key length ( in bytes ) -template static inline constexpr size_t -get_kem_public_key_len() - requires(kyber_params::check_k(k)) +get_kem_public_key_len(const size_t k) { return k * 12 * 32 + 32; } // Compile-time compute IND-CCA-secure Kyber KEM secret key length ( in bytes ) -template static inline constexpr size_t -get_kem_secret_key_len() - requires(kyber_params::check_k(k)) +get_kem_secret_key_len(const size_t k) { - constexpr size_t t0 = k * 12 * 32; - constexpr size_t t1 = get_kem_public_key_len(); + const size_t t0 = k * 12 * 32; + const size_t t1 = get_kem_public_key_len(k); return t0 + t1 + 32 + 32; } // Compile-time compute IND-CCA-secure Kyber KEM cipher text length ( in bytes ) -template static inline constexpr size_t -get_kem_cipher_len() - requires(kyber_params::check_decrypt_params(k, du, dv)) +get_kem_cipher_len(size_t k, size_t du, size_t dv) { return k * du * 32 + dv * 32; } diff --git a/tests/dudect/test_kyber1024_kem.cpp b/tests/dudect/test_kyber1024_kem.cpp new file mode 100644 index 0000000..96864ca --- /dev/null +++ b/tests/dudect/test_kyber1024_kem.cpp @@ -0,0 +1,106 @@ +#include "kyber1024_kem.hpp" + +#define DUDECT_IMPLEMENTATION +#define DUDECT_VISIBLITY_STATIC +#include "dudect.h" + +constexpr size_t SEED_LEN = 32; // Byte length of seed(s) + +uint8_t +do_one_computation(uint8_t* const data) +{ + constexpr size_t doff0 = 0; + constexpr size_t doff1 = doff0 + SEED_LEN; + constexpr size_t doff2 = doff1 + 1; + constexpr size_t doff3 = doff2 + kyber1024_kem::CIPHER_LEN; + constexpr size_t doff4 = doff3 + kyber1024_kem::CIPHER_LEN; + constexpr size_t doff5 = doff4 + SEED_LEN; + constexpr size_t doff6 = doff5 + SEED_LEN; + + std::array poly_vec{}; + std::array byte_arr{}; + + auto sigma = std::span(data + doff0, doff1 - doff0); + const auto nonce = data[doff1]; + + // Generate new secret polynomial vector + kyber_utils::generate_vector(poly_vec, sigma, nonce); + // Apply NTT on that secret vector + kyber_utils::poly_vec_ntt(poly_vec); + // Apply iNTT on bit-reversed NTT form secret polynomial vector + kyber_utils::poly_vec_intt(poly_vec); + // Compress coefficients of polynomial vector + kyber_utils::poly_vec_compress(poly_vec); + // Serialize polynomial vector into byte array + kyber_utils::poly_vec_encode(poly_vec, byte_arr); + // Recover coefficients of polynomial vector from byte array + kyber_utils::poly_vec_decode(byte_arr, poly_vec); + // Decompress coefficients of polynomial vector + kyber_utils::poly_vec_decompress(poly_vec); + + std::array sink{}; + auto _sink = std::span(sink); + + using ctxt_t = std::span; + using seed_t = std::span; + + // Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time + const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3)); + kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5)); + + // Just so that optimizer doesn't remove above function calls ! + return static_cast(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) & // result of generating vector of polynomials + (byte_arr[0] ^ byte_arr[byte_arr.size() - 1]) & // result of serializing vector of polynomials + (_sink[0] ^ _sink[_sink.size() - 1]) & // result of conditional memcpy + static_cast(cond >> 24); // result of constant-time memcmp +} + +void +prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* const classes) +{ + randombytes(input_data, c->number_measurements * c->chunk_size); + + for (size_t i = 0; i < c->number_measurements; i++) { + classes[i] = randombit(); + if (classes[i] == 0) { + std::memset(input_data + i * c->chunk_size, 0x00, c->chunk_size); + } + } +} + +dudect_state_t +test_kyber1024_kem() +{ + constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma` + 1 + // single byte nonce + kyber1024_kem::CIPHER_LEN + // bytes holding received cipher text + kyber1024_kem::CIPHER_LEN + // bytes for locally computed cipher text + SEED_LEN + // bytes for first source buffer to copy from + SEED_LEN; // bytes for second source buffer to copy from + constexpr size_t number_measurements = 1ul << 20; + + dudect_config_t config = { + chunk_size, + number_measurements, + }; + dudect_ctx_t ctx; + dudect_init(&ctx, &config); + + dudect_state_t state = DUDECT_NO_LEAKAGE_EVIDENCE_YET; + while (state == DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + state = dudect_main(&ctx); + } + + dudect_free(&ctx); + return state; +} + +int +main() +{ + if (test_kyber1024_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/dudect/test_kyber512_kem.cpp b/tests/dudect/test_kyber512_kem.cpp new file mode 100644 index 0000000..ffe3634 --- /dev/null +++ b/tests/dudect/test_kyber512_kem.cpp @@ -0,0 +1,106 @@ +#include "kyber512_kem.hpp" + +#define DUDECT_IMPLEMENTATION +#define DUDECT_VISIBLITY_STATIC +#include "dudect.h" + +constexpr size_t SEED_LEN = 32; // Byte length of seed(s) + +uint8_t +do_one_computation(uint8_t* const data) +{ + constexpr size_t doff0 = 0; + constexpr size_t doff1 = doff0 + SEED_LEN; + constexpr size_t doff2 = doff1 + 1; + constexpr size_t doff3 = doff2 + kyber512_kem::CIPHER_LEN; + constexpr size_t doff4 = doff3 + kyber512_kem::CIPHER_LEN; + constexpr size_t doff5 = doff4 + SEED_LEN; + constexpr size_t doff6 = doff5 + SEED_LEN; + + std::array poly_vec{}; + std::array byte_arr{}; + + auto sigma = std::span(data + doff0, doff1 - doff0); + const auto nonce = data[doff1]; + + // Generate new secret polynomial vector + kyber_utils::generate_vector(poly_vec, sigma, nonce); + // Apply NTT on that secret vector + kyber_utils::poly_vec_ntt(poly_vec); + // Apply iNTT on bit-reversed NTT form secret polynomial vector + kyber_utils::poly_vec_intt(poly_vec); + // Compress coefficients of polynomial vector + kyber_utils::poly_vec_compress(poly_vec); + // Serialize polynomial vector into byte array + kyber_utils::poly_vec_encode(poly_vec, byte_arr); + // Recover coefficients of polynomial vector from byte array + kyber_utils::poly_vec_decode(byte_arr, poly_vec); + // Decompress coefficients of polynomial vector + kyber_utils::poly_vec_decompress(poly_vec); + + std::array sink{}; + auto _sink = std::span(sink); + + using ctxt_t = std::span; + using seed_t = std::span; + + // Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time + const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3)); + kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5)); + + // Just so that optimizer doesn't remove above function calls ! + return static_cast(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) & // result of generating vector of polynomials + (byte_arr[0] ^ byte_arr[byte_arr.size() - 1]) & // result of serializing vector of polynomials + (_sink[0] ^ _sink[_sink.size() - 1]) & // result of conditional memcpy + static_cast(cond >> 24); // result of constant-time memcmp +} + +void +prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* const classes) +{ + randombytes(input_data, c->number_measurements * c->chunk_size); + + for (size_t i = 0; i < c->number_measurements; i++) { + classes[i] = randombit(); + if (classes[i] == 0) { + std::memset(input_data + i * c->chunk_size, 0x00, c->chunk_size); + } + } +} + +dudect_state_t +test_kyber512_kem() +{ + constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma` + 1 + // single byte nonce + kyber512_kem::CIPHER_LEN + // bytes holding received cipher text + kyber512_kem::CIPHER_LEN + // bytes for locally computed cipher text + SEED_LEN + // bytes for first source buffer to copy from + SEED_LEN; // bytes for second source buffer to copy from + constexpr size_t number_measurements = 1ul << 20; + + dudect_config_t config = { + chunk_size, + number_measurements, + }; + dudect_ctx_t ctx; + dudect_init(&ctx, &config); + + dudect_state_t state = DUDECT_NO_LEAKAGE_EVIDENCE_YET; + while (state == DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + state = dudect_main(&ctx); + } + + dudect_free(&ctx); + return state; +} + +int +main() +{ + if (test_kyber512_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/dudect/test_kyber768_kem.cpp b/tests/dudect/test_kyber768_kem.cpp new file mode 100644 index 0000000..43816be --- /dev/null +++ b/tests/dudect/test_kyber768_kem.cpp @@ -0,0 +1,106 @@ +#include "kyber768_kem.hpp" + +#define DUDECT_IMPLEMENTATION +#define DUDECT_VISIBLITY_STATIC +#include "dudect.h" + +constexpr size_t SEED_LEN = 32; // Byte length of seed(s) + +uint8_t +do_one_computation(uint8_t* const data) +{ + constexpr size_t doff0 = 0; + constexpr size_t doff1 = doff0 + SEED_LEN; + constexpr size_t doff2 = doff1 + 1; + constexpr size_t doff3 = doff2 + kyber768_kem::CIPHER_LEN; + constexpr size_t doff4 = doff3 + kyber768_kem::CIPHER_LEN; + constexpr size_t doff5 = doff4 + SEED_LEN; + constexpr size_t doff6 = doff5 + SEED_LEN; + + std::array poly_vec{}; + std::array byte_arr{}; + + auto sigma = std::span(data + doff0, doff1 - doff0); + const auto nonce = data[doff1]; + + // Generate new secret polynomial vector + kyber_utils::generate_vector(poly_vec, sigma, nonce); + // Apply NTT on that secret vector + kyber_utils::poly_vec_ntt(poly_vec); + // Apply iNTT on bit-reversed NTT form secret polynomial vector + kyber_utils::poly_vec_intt(poly_vec); + // Compress coefficients of polynomial vector + kyber_utils::poly_vec_compress(poly_vec); + // Serialize polynomial vector into byte array + kyber_utils::poly_vec_encode(poly_vec, byte_arr); + // Recover coefficients of polynomial vector from byte array + kyber_utils::poly_vec_decode(byte_arr, poly_vec); + // Decompress coefficients of polynomial vector + kyber_utils::poly_vec_decompress(poly_vec); + + std::array sink{}; + auto _sink = std::span(sink); + + using ctxt_t = std::span; + using seed_t = std::span; + + // Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time + const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3)); + kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5)); + + // Just so that optimizer doesn't remove above function calls ! + return static_cast(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) & // result of generating vector of polynomials + (byte_arr[0] ^ byte_arr[byte_arr.size() - 1]) & // result of serializing vector of polynomials + (_sink[0] ^ _sink[_sink.size() - 1]) & // result of conditional memcpy + static_cast(cond >> 24); // result of constant-time memcmp +} + +void +prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* const classes) +{ + randombytes(input_data, c->number_measurements * c->chunk_size); + + for (size_t i = 0; i < c->number_measurements; i++) { + classes[i] = randombit(); + if (classes[i] == 0) { + std::memset(input_data + i * c->chunk_size, 0x00, c->chunk_size); + } + } +} + +dudect_state_t +test_kyber768_kem() +{ + constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma` + 1 + // single byte nonce + kyber768_kem::CIPHER_LEN + // bytes holding received cipher text + kyber768_kem::CIPHER_LEN + // bytes for locally computed cipher text + SEED_LEN + // bytes for first source buffer to copy from + SEED_LEN; // bytes for second source buffer to copy from + constexpr size_t number_measurements = 1ul << 20; + + dudect_config_t config = { + chunk_size, + number_measurements, + }; + dudect_ctx_t ctx; + dudect_init(&ctx, &config); + + dudect_state_t state = DUDECT_NO_LEAKAGE_EVIDENCE_YET; + while (state == DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + state = dudect_main(&ctx); + } + + dudect_free(&ctx); + return state; +} + +int +main() +{ + if (test_kyber768_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/test_kem.cpp b/tests/test_kem.cpp index 3935c95..4418dad 100644 --- a/tests/test_kem.cpp +++ b/tests/test_kem.cpp @@ -21,9 +21,9 @@ test_kyber_kem() requires(klen > 0) { constexpr size_t slen = 32; - constexpr size_t pklen = kyber_utils::get_kem_public_key_len(); - constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(); - constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(); + constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k); + constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k); + constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv); std::vector d(slen); std::vector z(slen);