diff --git a/mlkem/native/aarch64/intt_clean.S b/mlkem/native/aarch64/intt_clean.S index c6a290aba..73d1eb9c2 100644 --- a/mlkem/native/aarch64/intt_clean.S +++ b/mlkem/native/aarch64/intt_clean.S @@ -30,8 +30,9 @@ #include "common.i" // Bounds: -// In: |src| < q * C -// Out: |dst| < q * (0.0508 * C + 1/2) +// If C is chosen so that |src| < q * C, then |dst| < q * (0.0508 * C + 1/2) +// +// See mlken/reduce.c and test/test_bounds.py for more details. .macro mulmodq dst, src, const, idx0, idx1 // Signed barrett multiplication using // round-to-nearest-even-integer approximation. diff --git a/mlkem/native/aarch64/intt_opt.S b/mlkem/native/aarch64/intt_opt.S index 4ff961feb..873790c96 100644 --- a/mlkem/native/aarch64/intt_opt.S +++ b/mlkem/native/aarch64/intt_opt.S @@ -29,6 +29,10 @@ // Needed to provide ASM_LOAD directive #include "common.i" +// Bounds: +// If C is chosen so that |src| < q * C, then |dst| < q * (0.0508 * C + 1/2) +// +// See mlken/reduce.c and test/test_bounds.py for more details. .macro mulmodq dst, src, const, idx0, idx1 sqrdmulh t2.8h, \src\().8h, \const\().h[\idx1\()] mul \dst\().8h, \src\().8h, \const\().h[\idx0\()] diff --git a/mlkem/native/aarch64/ntt_clean.S b/mlkem/native/aarch64/ntt_clean.S index 39298093d..e60713f4d 100644 --- a/mlkem/native/aarch64/ntt_clean.S +++ b/mlkem/native/aarch64/ntt_clean.S @@ -30,8 +30,7 @@ #include "common.i" // Bounds: -// In: |src| < q * C -// Out: |dst| < q * (0.0508 * C + 1/2) +// If C is chosen so that |src| < q * C, then |dst| < q * (0.0508 * C + 1/2) // // See mlken/reduce.c and test/test_bounds.py for more details. .macro mulmodq dst, src, const, idx0, idx1 @@ -218,13 +217,13 @@ _ntt_asm_clean: // Bounds reasoning: // - There are 7 layers // - When passing from layer N to layer N+1, each layer-N value - // is modified through the addition/subtraction of a Montgomery - // product of a twiddle of absolute value < q/2 and a layer-N value. - // - Recalling that |fqmul(a,t)| < q * (0.0254*C + 1/2) for - // |a| < C*q and |t| C * q in the above and estimate // q / 2^17 < 0.0254.