-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge tag 'random-6.12-rc1-for-linus' of git://git.kernel.org/pub/scm…
…/linux/kernel/git/crng/random Pull random number generator updates from Jason Donenfeld: "Originally I'd planned on sending each of the vDSO getrandom() architecture ports to their respective arch trees. But as we started to work on this, we found lots of interesting issues in the shared code and infrastructure, the fixes for which the various archs needed to base their work. So in the end, this turned into a nice collaborative effort fixing up issues and porting to 5 new architectures -- arm64, powerpc64, powerpc32, s390x, and loongarch64 -- with everybody pitching in and commenting on each other's code. It was a fun development cycle. This contains: - Numerous fixups to the vDSO selftest infrastructure, getting it running successfully on more platforms, and fixing bugs in it. - Additions to the vDSO getrandom & chacha selftests. Basically every time manual review unearthed a bug in a revision of an arch patch, or an ambiguity, the tests were augmented. By the time the last arch was submitted for review, s390x, v1 of the series was essentially fine right out of the gate. - Fixes to the the generic C implementation of vDSO getrandom, to build and run successfully on all archs, decoupling it from assumptions we had (unintentionally) made on x86_64 that didn't carry through to the other architectures. - Port of vDSO getrandom to LoongArch64, from Xi Ruoyao and acked by Huacai Chen. - Port of vDSO getrandom to ARM64, from Adhemerval Zanella and acked by Will Deacon. - Port of vDSO getrandom to PowerPC, in both 32-bit and 64-bit varieties, from Christophe Leroy and acked by Michael Ellerman. - Port of vDSO getrandom to S390X from Heiko Carstens, the arch maintainer. While it'd be natural for there to be things to fix up over the course of the development cycle, these patches got a decent amount of review from a fairly diverse crew of folks on the mailing lists, and, for the most part, they've been cooking in linux-next, which has been helpful for ironing out build issues. In terms of architectures, I think that mostly takes care of the important 64-bit archs with hardware still being produced and running production loads in settings where vDSO getrandom is likely to help. Arguably there's still RISC-V left, and we'll see for 6.13 whether they find it useful and submit a port" * tag 'random-6.12-rc1-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random: (47 commits) selftests: vDSO: check cpu caps before running chacha test s390/vdso: Wire up getrandom() vdso implementation s390/vdso: Move vdso symbol handling to separate header file s390/vdso: Allow alternatives in vdso code s390/module: Provide find_section() helper s390/facility: Let test_facility() generate static branch if possible s390/alternatives: Remove ALT_FACILITY_EARLY s390/facility: Disable compile time optimization for decompressor code selftests: vDSO: fix vdso_config for s390 selftests: vDSO: fix ELF hash table entry size for s390x powerpc/vdso: Wire up getrandom() vDSO implementation on VDSO64 powerpc/vdso: Wire up getrandom() vDSO implementation on VDSO32 powerpc/vdso: Refactor CFLAGS for CVDSO build powerpc/vdso32: Add crtsavres mm: Define VM_DROPPABLE for powerpc/32 powerpc/vdso: Fix VDSO data access when running in a non-root time namespace selftests: vDSO: don't include generated headers for chacha test arm64: vDSO: Wire up getrandom() vDSO implementation arm64: alternative: make alternative_has_cap_likely() VDSO compatible selftests: vDSO: also test counter in vdso_test_chacha ...
- Loading branch information
Showing
89 changed files
with
1,949 additions
and
239 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
|
||
#ifndef __ASM_VDSO_GETRANDOM_H | ||
#define __ASM_VDSO_GETRANDOM_H | ||
|
||
#ifndef __ASSEMBLY__ | ||
|
||
#include <asm/unistd.h> | ||
#include <asm/vdso/vsyscall.h> | ||
#include <vdso/datapage.h> | ||
|
||
/** | ||
* getrandom_syscall - Invoke the getrandom() syscall. | ||
* @buffer: Destination buffer to fill with random bytes. | ||
* @len: Size of @buffer in bytes. | ||
* @flags: Zero or more GRND_* flags. | ||
* Returns: The number of random bytes written to @buffer, or a negative value indicating an error. | ||
*/ | ||
static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) | ||
{ | ||
register void *buffer asm ("x0") = _buffer; | ||
register size_t len asm ("x1") = _len; | ||
register unsigned int flags asm ("x2") = _flags; | ||
register long ret asm ("x0"); | ||
register long nr asm ("x8") = __NR_getrandom; | ||
|
||
asm volatile( | ||
" svc #0\n" | ||
: "=r" (ret) | ||
: "r" (buffer), "r" (len), "r" (flags), "r" (nr) | ||
: "memory"); | ||
|
||
return ret; | ||
} | ||
|
||
static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) | ||
{ | ||
/* | ||
* The RNG data is in the real VVAR data page, but if a task belongs to a time namespace | ||
* then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ | ||
* PAGE_OFFSET points to the real VVAR page. | ||
*/ | ||
if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) | ||
return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT); | ||
return &_vdso_rng_data; | ||
} | ||
|
||
#endif /* !__ASSEMBLY__ */ | ||
|
||
#endif /* __ASM_VDSO_GETRANDOM_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/cache.h> | ||
#include <asm/assembler.h> | ||
|
||
.text | ||
|
||
#define state0 v0 | ||
#define state1 v1 | ||
#define state2 v2 | ||
#define state3 v3 | ||
#define copy0 v4 | ||
#define copy0_q q4 | ||
#define copy1 v5 | ||
#define copy2 v6 | ||
#define copy3 v7 | ||
#define copy3_d d7 | ||
#define one_d d16 | ||
#define one_q q16 | ||
#define one_v v16 | ||
#define tmp v17 | ||
#define rot8 v18 | ||
|
||
/* | ||
* ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive | ||
* number of blocks of output with nonce 0, taking an input key and 8-bytes | ||
* counter. Importantly does not spill to the stack. | ||
* | ||
* This implementation avoids d8-d15 because they are callee-save in user | ||
* space. | ||
* | ||
* void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, | ||
* const uint8_t *key, | ||
* uint32_t *counter, | ||
* size_t nblocks) | ||
* | ||
* x0: output bytes | ||
* x1: 32-byte key input | ||
* x2: 8-byte counter input/output | ||
* x3: number of 64-byte block to write to output | ||
*/ | ||
SYM_FUNC_START(__arch_chacha20_blocks_nostack) | ||
|
||
/* copy0 = "expand 32-byte k" */ | ||
mov_q x8, 0x3320646e61707865 | ||
mov_q x9, 0x6b20657479622d32 | ||
mov copy0.d[0], x8 | ||
mov copy0.d[1], x9 | ||
|
||
/* copy1,copy2 = key */ | ||
ld1 { copy1.4s, copy2.4s }, [x1] | ||
/* copy3 = counter || zero nonce */ | ||
ld1 { copy3.2s }, [x2] | ||
|
||
movi one_v.2s, #1 | ||
uzp1 one_v.4s, one_v.4s, one_v.4s | ||
|
||
.Lblock: | ||
/* copy state to auxiliary vectors for the final add after the permute. */ | ||
mov state0.16b, copy0.16b | ||
mov state1.16b, copy1.16b | ||
mov state2.16b, copy2.16b | ||
mov state3.16b, copy3.16b | ||
|
||
mov w4, 20 | ||
.Lpermute: | ||
/* | ||
* Permute one 64-byte block where the state matrix is stored in the four NEON | ||
* registers state0-state3. It performs matrix operations on four words in parallel, | ||
* but requires shuffling to rearrange the words after each round. | ||
*/ | ||
|
||
.Ldoubleround: | ||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ | ||
add state0.4s, state0.4s, state1.4s | ||
eor state3.16b, state3.16b, state0.16b | ||
rev32 state3.8h, state3.8h | ||
|
||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ | ||
add state2.4s, state2.4s, state3.4s | ||
eor tmp.16b, state1.16b, state2.16b | ||
shl state1.4s, tmp.4s, #12 | ||
sri state1.4s, tmp.4s, #20 | ||
|
||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ | ||
add state0.4s, state0.4s, state1.4s | ||
eor tmp.16b, state3.16b, state0.16b | ||
shl state3.4s, tmp.4s, #8 | ||
sri state3.4s, tmp.4s, #24 | ||
|
||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ | ||
add state2.4s, state2.4s, state3.4s | ||
eor tmp.16b, state1.16b, state2.16b | ||
shl state1.4s, tmp.4s, #7 | ||
sri state1.4s, tmp.4s, #25 | ||
|
||
/* state1[0,1,2,3] = state1[1,2,3,0] */ | ||
ext state1.16b, state1.16b, state1.16b, #4 | ||
/* state2[0,1,2,3] = state2[2,3,0,1] */ | ||
ext state2.16b, state2.16b, state2.16b, #8 | ||
/* state3[0,1,2,3] = state3[1,2,3,0] */ | ||
ext state3.16b, state3.16b, state3.16b, #12 | ||
|
||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ | ||
add state0.4s, state0.4s, state1.4s | ||
eor state3.16b, state3.16b, state0.16b | ||
rev32 state3.8h, state3.8h | ||
|
||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ | ||
add state2.4s, state2.4s, state3.4s | ||
eor tmp.16b, state1.16b, state2.16b | ||
shl state1.4s, tmp.4s, #12 | ||
sri state1.4s, tmp.4s, #20 | ||
|
||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ | ||
add state0.4s, state0.4s, state1.4s | ||
eor tmp.16b, state3.16b, state0.16b | ||
shl state3.4s, tmp.4s, #8 | ||
sri state3.4s, tmp.4s, #24 | ||
|
||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ | ||
add state2.4s, state2.4s, state3.4s | ||
eor tmp.16b, state1.16b, state2.16b | ||
shl state1.4s, tmp.4s, #7 | ||
sri state1.4s, tmp.4s, #25 | ||
|
||
/* state1[0,1,2,3] = state1[3,0,1,2] */ | ||
ext state1.16b, state1.16b, state1.16b, #12 | ||
/* state2[0,1,2,3] = state2[2,3,0,1] */ | ||
ext state2.16b, state2.16b, state2.16b, #8 | ||
/* state3[0,1,2,3] = state3[1,2,3,0] */ | ||
ext state3.16b, state3.16b, state3.16b, #4 | ||
|
||
subs w4, w4, #2 | ||
b.ne .Ldoubleround | ||
|
||
/* output0 = state0 + state0 */ | ||
add state0.4s, state0.4s, copy0.4s | ||
/* output1 = state1 + state1 */ | ||
add state1.4s, state1.4s, copy1.4s | ||
/* output2 = state2 + state2 */ | ||
add state2.4s, state2.4s, copy2.4s | ||
/* output2 = state3 + state3 */ | ||
add state3.4s, state3.4s, copy3.4s | ||
st1 { state0.16b - state3.16b }, [x0] | ||
|
||
/* | ||
* ++copy3.counter, the 'add' clears the upper half of the SIMD register | ||
* which is the expected behaviour here. | ||
*/ | ||
add copy3_d, copy3_d, one_d | ||
|
||
/* output += 64, --nblocks */ | ||
add x0, x0, 64 | ||
subs x3, x3, #1 | ||
b.ne .Lblock | ||
|
||
/* counter = copy3.counter */ | ||
st1 { copy3.2s }, [x2] | ||
|
||
/* Zero out the potentially sensitive regs, in case nothing uses these again. */ | ||
movi state0.16b, #0 | ||
movi state1.16b, #0 | ||
movi state2.16b, #0 | ||
movi state3.16b, #0 | ||
movi copy1.16b, #0 | ||
movi copy2.16b, #0 | ||
ret | ||
SYM_FUNC_END(__arch_chacha20_blocks_nostack) | ||
|
||
emit_aarch64_feature_1_and |
Oops, something went wrong.