From 4c5de1b00ab7a7b4a1cea978ff9b94c738e0c5b2 Mon Sep 17 00:00:00 2001 From: magnum Date: Tue, 10 Dec 2024 15:16:30 +0100 Subject: [PATCH] OpenCL AES formats: Adapt to new shared code Enable local memory for table-based AES. Closes #5594 Bitlocker format is not affected as it has it's own implementation, but AES performance is insignificant for it anyway. --- run/opencl/7z_kernel.cl | 3 ++- run/opencl/agile_kernel.cl | 3 ++- run/opencl/androidbackup_kernel.cl | 7 ++--- run/opencl/axcrypt2_kernel.cl | 3 ++- run/opencl/axcrypt_kernel.cl | 7 ++--- run/opencl/bitcoin_kernel.cl | 3 ++- run/opencl/bitwarden_kernel.cl | 3 ++- run/opencl/blockchain_kernel.cl | 7 ++--- run/opencl/bsd_softraid_kernel.cl | 3 ++- run/opencl/cryptosafe_kernel.cl | 4 ++- run/opencl/diskcryptor_aes_kernel.cl | 4 ++- run/opencl/dmg_kernel.cl | 10 +++---- run/opencl/encfs_kernel.cl | 7 ++--- run/opencl/enpass_kernel.cl | 6 +++-- run/opencl/ethereum_kernel.cl | 3 ++- run/opencl/fvde_kernel.cl | 3 ++- run/opencl/geli_kernel.cl | 3 ++- run/opencl/iwork_kernel.cl | 3 ++- run/opencl/keepass_kernel.cl | 24 ++++++++--------- run/opencl/keyring_kernel.cl | 3 ++- run/opencl/krb5_kernel.cl | 38 +++++++++++++-------------- run/opencl/lastpass_kernel.cl | 6 +++-- run/opencl/o5logon_kernel.cl | 3 ++- run/opencl/odf_kernel.cl | 8 +++--- run/opencl/office_kernel.cl | 18 ++++++++----- run/opencl/opencl_aes_plain.h | 2 +- run/opencl/pbkdf2_ripemd160_kernel.cl | 4 ++- run/opencl/pdf_kernel.cl | 4 ++- run/opencl/pem_kernel.cl | 7 ++--- run/opencl/pgpdisk_kernel.cl | 4 +-- run/opencl/pgpwde_kernel.cl | 7 ++--- run/opencl/rar_kernel.cl | 7 ++--- run/opencl/ssh_kernel.cl | 16 +++++------ run/opencl/strip_kernel.cl | 3 ++- run/opencl/telegram_kernel.cl | 7 ++--- run/opencl/vmx_kernel.cl | 3 ++- run/opencl/wpapsk_kernel.cl | 4 ++- src/opencl_keepass_fmt_plug.c | 9 +++++-- 38 files changed, 151 insertions(+), 108 deletions(-) diff --git a/run/opencl/7z_kernel.cl b/run/opencl/7z_kernel.cl index 985edb4c0b0..d9fdec92f74 100644 --- a/run/opencl/7z_kernel.cl +++ b/run/opencl/7z_kernel.cl @@ -153,6 +153,7 @@ __kernel void sevenzip_final(__global const sevenzip_password *inbuffer, __kernel void sevenzip_aes(__constant sevenzip_salt *salt, __global sevenzip_hash *outbuffer) { + __local aes_local_t lt; uint gid = get_global_id(0); uint i; uint pad; @@ -162,7 +163,7 @@ __kernel void sevenzip_aes(__constant sevenzip_salt *salt, /* Early rejection if possible (only decrypt last 16 bytes) */ if (pad > 0 && salt->length >= 32) { uint8_t buf[16]; - AES_KEY akey; + AES_KEY akey; akey.lt = < unsigned char iv[16]; for (i = 0; i < 16; i++) diff --git a/run/opencl/agile_kernel.cl b/run/opencl/agile_kernel.cl index 6eddeded3ea..41fc9ccb312 100644 --- a/run/opencl/agile_kernel.cl +++ b/run/opencl/agile_kernel.cl @@ -30,8 +30,9 @@ __kernel void dk_decrypt(__global pbkdf2_password *password, __global agile_out *agile_out, __constant agile_salt *salt) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint idx = get_global_id(0); - AES_KEY akey; uchar iv[16]; uchar plaintext[16]; uint i; diff --git a/run/opencl/androidbackup_kernel.cl b/run/opencl/androidbackup_kernel.cl index 9dbf5c4aa7a..2d7315673b8 100644 --- a/run/opencl/androidbackup_kernel.cl +++ b/run/opencl/androidbackup_kernel.cl @@ -21,12 +21,12 @@ typedef struct { uchar masterkey_blob[MAX_MASTERKEYBLOB_LEN]; } ab_salt; -inline int ab_decrypt(__global uchar *key, MAYBE_CONSTANT ab_salt *salt) +inline int ab_decrypt(__global uchar *key, MAYBE_CONSTANT ab_salt *salt, __local aes_local_t *lt) { uchar out[MAX_MASTERKEYBLOB_LEN]; const int length = salt->masterkey_blob_length; uchar aiv[16]; - AES_KEY akey; + AES_KEY akey; akey.lt = lt; int pad_byte; memcpy_macro(aiv, salt->iv, 16); @@ -51,7 +51,8 @@ void ab_final(MAYBE_CONSTANT ab_salt *salt, __global pbkdf2_out *pbkdf2, __global ab_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); - out[gid].cracked = ab_decrypt((__global uchar*)pbkdf2[gid].dk, salt); + out[gid].cracked = ab_decrypt((__global uchar*)pbkdf2[gid].dk, salt, <); } diff --git a/run/opencl/axcrypt2_kernel.cl b/run/opencl/axcrypt2_kernel.cl index 34bf9d428bc..e50a077e566 100644 --- a/run/opencl/axcrypt2_kernel.cl +++ b/run/opencl/axcrypt2_kernel.cl @@ -35,6 +35,8 @@ __kernel void axcrypt2_final(__global crack_t *pbkdf2, __constant axcrypt2_salt_t *salt, __global out_t *out) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); int i, k, j, nb_iterations = salt->key_wrapping_rounds; @@ -49,7 +51,6 @@ __kernel void axcrypt2_final(__global crack_t *pbkdf2, key.u[i] = SWAP64(pbkdf2[gid].hash[i]); uchar KEK[32]; - AES_KEY akey; int halfblocklen = 16 / 2; int wrappedkeylen = 56 - halfblocklen; union { diff --git a/run/opencl/axcrypt_kernel.cl b/run/opencl/axcrypt_kernel.cl index c7947b0c373..b4ad69511f1 100644 --- a/run/opencl/axcrypt_kernel.cl +++ b/run/opencl/axcrypt_kernel.cl @@ -43,7 +43,7 @@ typedef struct { (cp)[2] ^= (uchar)((value) >> 16), \ (cp)[3] ^= (uchar)((value) >> 24 ) ) -inline int axcrypt_decrypt(__global const axcrypt_password *inbuffer, uint gid, __constant axcrypt_salt *cur_salt, __global axcrypt_out *output) +inline int axcrypt_decrypt(__global const axcrypt_password *inbuffer, uint gid, __constant axcrypt_salt *cur_salt, __global axcrypt_out *output, __local aes_local_t *lt) { uchar password[PLAINTEXT_LENGTH]; uchar keyfile[4096]; @@ -59,7 +59,7 @@ inline int axcrypt_decrypt(__global const axcrypt_password *inbuffer, uint gid, uint32_t w[4]; uint64_t l[2]; } cipher; - AES_KEY akey; + AES_KEY akey; akey.lt = lt; SHA_CTX ctx; uint i; int j, nb_iterations = cur_salt->key_wrapping_rounds; @@ -117,7 +117,8 @@ void axcrypt(__global const axcrypt_password *inbuffer, __global axcrypt_out *out, __constant axcrypt_salt *salt) { + __local aes_local_t lt; uint idx = get_global_id(0); - out[idx].cracked = axcrypt_decrypt(inbuffer, idx, salt, out); + out[idx].cracked = axcrypt_decrypt(inbuffer, idx, salt, out, <); } diff --git a/run/opencl/bitcoin_kernel.cl b/run/opencl/bitcoin_kernel.cl index fa9cb1ee67f..595cf240652 100644 --- a/run/opencl/bitcoin_kernel.cl +++ b/run/opencl/bitcoin_kernel.cl @@ -84,6 +84,8 @@ __kernel void loop_sha512(__global hash512_t *state, uint count) __kernel void bitcoin_final(__constant salt_t *salt, __global hash512_t *state, __global uint *cracked) { + __local aes_local_t lt; + AES_KEY aes_key; aes_key.lt = < uint gid = get_global_id(0); uchar iv[16]; // updated IV for the final block memcpy_cp(iv, salt->cry_master + salt->cry_master_length - 32, 16); @@ -93,7 +95,6 @@ __kernel void bitcoin_final(__constant salt_t *salt, __global hash512_t *state, state[gid].W[i] = SWAP64(state[gid].W[i]); uchar output[16]; - AES_KEY aes_key; AES_set_decrypt_key(state[gid].b, 256, &aes_key); AES_cbc_decrypt(salt->cry_master + salt->cry_master_length - 16, output, 16, &aes_key, iv); diff --git a/run/opencl/bitwarden_kernel.cl b/run/opencl/bitwarden_kernel.cl index e46597de158..fe55a245f75 100644 --- a/run/opencl/bitwarden_kernel.cl +++ b/run/opencl/bitwarden_kernel.cl @@ -31,9 +31,10 @@ __kernel void bitwarden_decrypt(MAYBE_CONSTANT bitwarden_salt_t *salt, __global crack_t *out, __global uint32_t *cracked) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint32_t gid = get_global_id(0); int32_t i; - AES_KEY akey; union { uchar c[32]; uint w[32 / 4]; diff --git a/run/opencl/blockchain_kernel.cl b/run/opencl/blockchain_kernel.cl index 695ecdf0bbe..5a6cba695c5 100644 --- a/run/opencl/blockchain_kernel.cl +++ b/run/opencl/blockchain_kernel.cl @@ -34,10 +34,10 @@ typedef struct { } blockchain_salt; inline int blockchain_decrypt(__global uchar *derived_key, - __constant uchar *data) + __constant uchar *data, __local aes_local_t *lt) { + AES_KEY akey; akey.lt = lt; uchar out[SAFETY_FACTOR]; - AES_KEY akey; uchar iv[16]; AES_set_decrypt_key(derived_key, 256, &akey); @@ -68,11 +68,12 @@ __kernel void blockchain(__global const pbkdf2_password *inbuffer, __constant blockchain_salt *salt, __global blockchain_out *out) { + __local aes_local_t lt; uint idx = get_global_id(0); pbkdf2(inbuffer[idx].v, inbuffer[idx].length, salt->pbkdf2.salt, salt->pbkdf2.length, salt->pbkdf2.iterations, dk[idx].v, salt->pbkdf2.outlen, salt->pbkdf2.skip_bytes); - out[idx].cracked = blockchain_decrypt((__global uchar*)dk[idx].v, salt->data); + out[idx].cracked = blockchain_decrypt((__global uchar*)dk[idx].v, salt->data, <); } diff --git a/run/opencl/bsd_softraid_kernel.cl b/run/opencl/bsd_softraid_kernel.cl index cd88a67db02..27249d27bf6 100644 --- a/run/opencl/bsd_softraid_kernel.cl +++ b/run/opencl/bsd_softraid_kernel.cl @@ -29,11 +29,12 @@ __kernel void softraid_final(MAYBE_CONSTANT softraid_salt *salt, __global pbkdf2_out *out) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint dk[OUTLEN / 4]; uchar unmasked_keys[MASKED_KEY_SIZE]; uchar hashed_mask_key[SHA1_DIGEST_LENGTH]; - AES_KEY akey; SHA_CTX ctx; memcpy_gp(dk, out[gid].dk, OUTLEN); diff --git a/run/opencl/cryptosafe_kernel.cl b/run/opencl/cryptosafe_kernel.cl index d594952a540..6ff9ce49f54 100644 --- a/run/opencl/cryptosafe_kernel.cl +++ b/run/opencl/cryptosafe_kernel.cl @@ -83,6 +83,8 @@ void cryptoSafe(__global const uchar *pwbuf, #define GPU_LOC_3 LOC_3 #endif + __local aes_local_t lt; + /* Prepare password, pad to length 32 with ASCII '0's */ prepare(pwbuf, index, password); @@ -113,7 +115,7 @@ void cryptoSafe(__global const uchar *pwbuf, #endif #endif - AES_KEY aes_decrypt_key; + AES_KEY aes_decrypt_key; aes_decrypt_key.lt = < unsigned char plain[16], iv[16] = { 0 }; AES_set_decrypt_key(password, 256, &aes_decrypt_key); diff --git a/run/opencl/diskcryptor_aes_kernel.cl b/run/opencl/diskcryptor_aes_kernel.cl index 2a5ac908e59..d3ef6da5af8 100644 --- a/run/opencl/diskcryptor_aes_kernel.cl +++ b/run/opencl/diskcryptor_aes_kernel.cl @@ -24,6 +24,8 @@ __kernel void diskcryptor_final(__global crack_t *pbkdf2, __constant diskcryptor_salt_t *salt, __global out_t *out) { + __local aes_local_t lt; + uint gid = get_global_id(0); uchar output[96]; @@ -48,7 +50,7 @@ __kernel void diskcryptor_final(__global crack_t *pbkdf2, for (i = 0; i < 8; i++) key.u[i] = SWAP64(pbkdf2[gid].hash[i]); - AES_256_XTS_DiskCryptor(salt->header, output, key.c, 96); + AES_256_XTS_DiskCryptor(salt->header, output, key.c, 96, <); memcpy_pp(version.c, output + 72, 2); memcpy_pp(algorithm.c, output + 82, 4); if ((!memcmp_pc(output + 64, "DCRP", 4)) && (version.value == 2 || version.value == 1) && (algorithm.value >= 0 && algorithm.value <= 7)) { diff --git a/run/opencl/dmg_kernel.cl b/run/opencl/dmg_kernel.cl index 25312c85fd1..49a727efc16 100644 --- a/run/opencl/dmg_kernel.cl +++ b/run/opencl/dmg_kernel.cl @@ -12,9 +12,6 @@ typedef struct { #define pbkdf2_out dmg_out #include "pbkdf2_hmac_sha1_kernel.cl" -#if __OS_X__ -#define AES_NO_BITSLICE -#endif #define AES_SRC_TYPE MAYBE_CONSTANT #include "opencl_aes.h" #include "opencl_hmac_sha1.h" @@ -94,10 +91,10 @@ inline int check_v1hash(const uchar *derived_key, } inline int check_v2hash(const uchar *derived_key, - MAYBE_CONSTANT dmg_salt *salt) + MAYBE_CONSTANT dmg_salt *salt, __local aes_local_t *lt) { des3_context ks; - AES_KEY aes_decrypt_key; + AES_KEY aes_decrypt_key; aes_decrypt_key.lt = lt; uint buf[8192/4]; uchar *outbuf = (uchar*)buf; uchar iv[20]; @@ -155,10 +152,11 @@ __kernel void dmg_final_v2(MAYBE_CONSTANT dmg_salt *salt, __global dmg_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); uint dk[OUTLEN / 4]; memcpy_gp(dk, out[gid].dk, OUTLEN); - out[gid].cracked = check_v2hash((uchar*)dk, salt); + out[gid].cracked = check_v2hash((uchar*)dk, salt, <); } diff --git a/run/opencl/encfs_kernel.cl b/run/opencl/encfs_kernel.cl index 35fee425092..de8817986ea 100644 --- a/run/opencl/encfs_kernel.cl +++ b/run/opencl/encfs_kernel.cl @@ -130,10 +130,10 @@ inline uint encfs_common_MAC_32(MAYBE_CONSTANT encfs_salt *salt, uchar *src, inline void encfs_common_streamDecode(MAYBE_CONSTANT encfs_salt *salt, uchar *buf, uint size, uint64_t iv64, - uchar *key) + uchar *key, __local aes_local_t *lt) { uchar ivec[MAX_IVLENGTH]; - AES_KEY akey; + AES_KEY akey; akey.lt = lt; encfs_common_setIVec(salt, ivec, iv64 + 1, key); AES_set_encrypt_key(key, salt->keySize * 8, &akey); @@ -151,6 +151,7 @@ void encfs_final(MAYBE_CONSTANT encfs_salt *salt, __global pbkdf2_out *pbkdf2, __global encfs_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); uint i; uchar master[MAX_KEYLENGTH + MAX_IVLENGTH]; @@ -165,7 +166,7 @@ void encfs_final(MAYBE_CONSTANT encfs_salt *salt, checksum = (checksum << 8) | salt->data[i]; memcpy_mcp(tmpBuf, salt->data + KEY_CHECKSUM_BYTES, salt->keySize + salt->ivLength); - encfs_common_streamDecode(salt, tmpBuf, salt->keySize + salt->ivLength ,checksum, master); + encfs_common_streamDecode(salt, tmpBuf, salt->keySize + salt->ivLength ,checksum, master, <); checksum2 = encfs_common_MAC_32(salt, tmpBuf, salt->keySize + salt->ivLength, master); out[gid].cracked = (checksum2 == checksum); diff --git a/run/opencl/enpass_kernel.cl b/run/opencl/enpass_kernel.cl index d7b6a3c6cfd..b4763a10289 100644 --- a/run/opencl/enpass_kernel.cl +++ b/run/opencl/enpass_kernel.cl @@ -53,6 +53,8 @@ void enpass5_final(MAYBE_CONSTANT enpass_salt *salt, __global enpass_out *out, __global pbkdf2_state *state) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint i; uint base = state[gid].pass++ * 5; @@ -80,7 +82,6 @@ void enpass5_final(MAYBE_CONSTANT enpass_salt *salt, #endif } else { uchar data[16]; - AES_KEY akey; union { uchar c[256/8]; uint w[256/8/4]; @@ -155,10 +156,11 @@ void enpass6_final(MAYBE_CONSTANT enpass_salt *salt, __global enpass_out *out, __global crack_t *out512) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint i; uchar data[16]; - AES_KEY akey; union { uchar c[256/8]; ulong w[256/8/8]; diff --git a/run/opencl/ethereum_kernel.cl b/run/opencl/ethereum_kernel.cl index 61a74ac2b34..ffe7ab825ab 100644 --- a/run/opencl/ethereum_kernel.cl +++ b/run/opencl/ethereum_kernel.cl @@ -60,8 +60,9 @@ __kernel void ethereum_presale_process(__global crack_t *pbkdf2_out, __global state_t *state, __global hash_t *out) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint32_t gid = get_global_id(0); - AES_KEY akey; uchar iv[16]; int i; uchar seed[1024 + 1]; diff --git a/run/opencl/fvde_kernel.cl b/run/opencl/fvde_kernel.cl index 041d9e77d96..433c7400e6b 100644 --- a/run/opencl/fvde_kernel.cl +++ b/run/opencl/fvde_kernel.cl @@ -29,6 +29,8 @@ __kernel void fvde_decrypt(MAYBE_CONSTANT fvde_salt_t *salt, __global crack_t *out, __global uint32_t *cracked) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint32_t gid = get_global_id(0); MAYBE_CONSTANT uint64_t *C = salt->blob.qword; // len(C) == 3 or 5 (AES-256) int32_t n = BLOBLEN / 8 - 1; // len(C) - 1 @@ -38,7 +40,6 @@ __kernel void fvde_decrypt(MAYBE_CONSTANT fvde_salt_t *salt, uint8_t stream[16]; } todecrypt; int32_t i, j; - AES_KEY akey; uint64_t A = C[0]; if (salt->type == 1) { diff --git a/run/opencl/geli_kernel.cl b/run/opencl/geli_kernel.cl index 6e5a73b75cd..46cc9dd78e9 100644 --- a/run/opencl/geli_kernel.cl +++ b/run/opencl/geli_kernel.cl @@ -38,6 +38,8 @@ __kernel void geli_final(__global crack_t *pbkdf2, __constant geli_salt_t *salt, __global out_t *out) { + __local aes_local_t lt; + AES_KEY aes_decrypt_key; aes_decrypt_key.lt = < uint gid = get_global_id(0); __constant uchar *mmkey; const uchar nullstring[1] = { 0 }; @@ -64,7 +66,6 @@ __kernel void geli_final(__global crack_t *pbkdf2, for (nkey = 0; nkey < G_ELI_MAXMKEYS; nkey++, mmkey += G_ELI_MKEYLEN) { int bit = (1 << nkey); uchar iv[16] = { 0 }; - AES_KEY aes_decrypt_key; uchar tmpmkey[G_ELI_MKEYLEN]; const uchar *odhmac; /* On-disk HMAC. */ uchar chmac[SHA512_MDLEN]; /* Calculated HMAC. */ diff --git a/run/opencl/iwork_kernel.cl b/run/opencl/iwork_kernel.cl index 61fa05f6996..8ccd82cc639 100644 --- a/run/opencl/iwork_kernel.cl +++ b/run/opencl/iwork_kernel.cl @@ -39,9 +39,10 @@ void iwork_final(MAYBE_CONSTANT iwork_salt *salt, __global iwork_out *result, __global pbkdf2_state *state) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint i; - AES_KEY akey; int success = 1; // hash was cracked union { uchar c[BLOBLEN]; diff --git a/run/opencl/keepass_kernel.cl b/run/opencl/keepass_kernel.cl index 8b23c06d83c..63b1e779ab7 100644 --- a/run/opencl/keepass_kernel.cl +++ b/run/opencl/keepass_kernel.cl @@ -80,11 +80,10 @@ typedef struct { } keepass_result; typedef struct { - uint8_t hash[32]; #if KEEPASS_AES uint iterations; - AES_KEY akey; #endif + uint8_t hash[32]; } keepass_state; inline void calc_hmac_base_key(const void *master_seed, const void *final_key, void *result) @@ -143,15 +142,9 @@ __kernel void keepass_init(__global const keepass_password *masterkey, } #if KEEPASS_AES - // Next, encrypt the hash using the random seed as key (only for AES-KDF) if (salt->kdf == 0) { - memcpy_macro(pbuf, salt->transf_randomseed, 32); - AES_KEY akey; - AES_set_encrypt_key(pbuf, 256, &akey); - // Save state for loop kernel. state[gid].iterations = salt->t_cost; - memcpy_pg(&state[gid].akey, &akey, sizeof(AES_KEY)); } #endif @@ -160,16 +153,22 @@ __kernel void keepass_init(__global const keepass_password *masterkey, #if KEEPASS_AES // Here's the heavy part. NOTHING else is significant for performance! -__kernel void keepass_loop_aes(__global keepass_state *state) +// Encrypt the hash using the random seed as key +__kernel void keepass_loop_aes(__global keepass_state *state, MAYBE_CONSTANT keepass_salt_t *salt) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint i; + uint8_t pbuf[32]; i = MIN(state[gid].iterations, HASH_LOOPS); state[gid].iterations -= i; - AES_KEY akey; - memcpy_gp(&akey, &state[gid].akey, sizeof(AES_KEY)); + memcpy_macro(pbuf, salt->transf_randomseed, 32); + + AES_set_encrypt_key(pbuf, 256, &akey); + uint8_t hash[32]; memcpy_macro(hash, state[gid].hash, 32); @@ -217,8 +216,9 @@ __kernel void keepass_final(__global keepass_state *state, memcpy_macro(hash, state[gid].hash, 32); #if KEEPASS_AES + __local aes_local_t lt; + AES_KEY akey; akey.lt = < SHA256_CTX ctx; - AES_KEY akey; uint8_t pbuf[32]; uint8_t iv[16]; diff --git a/run/opencl/keyring_kernel.cl b/run/opencl/keyring_kernel.cl index db6a8056c6c..86feacb23ab 100644 --- a/run/opencl/keyring_kernel.cl +++ b/run/opencl/keyring_kernel.cl @@ -64,13 +64,14 @@ __kernel void keyring(__global const keyring_password *inbuffer, __global keyring_hash *outbuffer, __constant keyring_salt *salt) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint W[64/4] = { 0 }; uint o[32/4]; uint i; uint len = inbuffer[gid].length; uint iterations = salt->iterations; - AES_KEY akey; uchar buffer[LINE_BUFFER_SIZE / 2]; union { uchar c[16]; diff --git a/run/opencl/krb5_kernel.cl b/run/opencl/krb5_kernel.cl index ef6b4559750..294a57dc91c 100644 --- a/run/opencl/krb5_kernel.cl +++ b/run/opencl/krb5_kernel.cl @@ -6,9 +6,6 @@ */ #include "pbkdf2_hmac_sha1_kernel.cl" -#if __OS_X__ -#define AES_NO_BITSLICE -#endif #define AES_CTS_SRC_TYPE MAYBE_CONSTANT #define AES_CTS_DST_TYPE __global #include "opencl_aes.h" @@ -45,11 +42,11 @@ typedef struct { * how the CPU code works, I have no idea why. */ inline void dk(uchar *key_out, uchar *key_in, uint key_size, - __constant uchar *ptext, uint ptext_size) + __constant uchar *ptext, uint ptext_size, __local aes_local_t *lt) { uchar iv[16] = { 0 }; uchar plaintext[32] = { 0 }; - AES_KEY ekey; + AES_KEY ekey; ekey.lt = lt; memcpy_macro(plaintext, ptext, 16); @@ -59,10 +56,10 @@ inline void dk(uchar *key_out, uchar *key_in, uint key_size, inline void krb_decrypt(MAYBE_CONSTANT uchar *ciphertext, uint ctext_size, __global uchar *plaintext, const uchar *key, - uint key_size) + uint key_size, __local aes_local_t *lt) { uchar iv[32] = { 0 }; - AES_KEY ekey; + AES_KEY ekey; ekey.lt = lt; AES_set_decrypt_key(key, key_size * 8, &ekey); AES_cts_decrypt(ciphertext, plaintext, ctext_size, &ekey, iv); @@ -94,16 +91,17 @@ __kernel void asrep_final(MAYBE_CONSTANT asrep_salt *salt, uchar Ke[32]; uchar Ki[32]; uchar checksum[20]; + __local aes_local_t lt; plaintext += (salt->edata2len + 31) / 32 * 32 * gid; memcpy_macro(base_key, ((__global uchar*)pbkdf2[gid].dk), key_size); - dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16); + dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16, <); - dk(Ke, (uchar*)base_key, key_size, ke3input, 16); - krb_decrypt(edata2, salt->edata2len, plaintext, Ke, key_size); + dk(Ke, (uchar*)base_key, key_size, ke3input, 16, <); + krb_decrypt(edata2, salt->edata2len, plaintext, Ke, key_size, <); - dk(Ki, (uchar*)base_key, key_size, ki3input, 16); + dk(Ki, (uchar*)base_key, key_size, ki3input, 16, <); hmac_sha1(Ki, key_size, plaintext, salt->edata2len, checksum, 20); out[gid].cracked = !memcmp_pmc(checksum, salt->edata1, 12); @@ -132,16 +130,17 @@ __kernel void pa_sha1_final(MAYBE_CONSTANT pa_sha1_salt *salt, uchar Ke[32]; uchar Ki[32]; uchar checksum[20]; + __local aes_local_t lt; plaintext += (TIMESTAMP_SIZE + 63) / 64 * 64 * gid; memcpy_macro(base_key, ((__global uchar*)pbkdf2[gid].dk), key_size); - dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16); + dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16, <); - dk(Ke, (uchar*)base_key, key_size, ke1input, 16); - krb_decrypt(salt->ct, TIMESTAMP_SIZE, plaintext, Ke, key_size); + dk(Ke, (uchar*)base_key, key_size, ke1input, 16, <); + krb_decrypt(salt->ct, TIMESTAMP_SIZE, plaintext, Ke, key_size, <); - dk(Ki, (uchar*)base_key, key_size, ki1input, 16); + dk(Ki, (uchar*)base_key, key_size, ki1input, 16, <); hmac_sha1(Ki, key_size, plaintext, TIMESTAMP_SIZE, checksum, 20); memcpy_pg(out[gid].hash, checksum, BINARY_SIZE); @@ -161,6 +160,7 @@ __kernel void tgsrep_final(MAYBE_CONSTANT tgsrep_salt *salt, __global uchar *plaintext, __global tgsrep_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); const int key_size = (salt->etype == 17) ? 16 : 32; #if HAVE_LUT3 @@ -178,12 +178,12 @@ __kernel void tgsrep_final(MAYBE_CONSTANT tgsrep_salt *salt, plaintext += (salt->edata2len + 31) / 32 * 32 * gid; memcpy_macro(base_key, ((__global uchar*)pbkdf2[gid].dk), key_size); - dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16); + dk((uchar*)base_key, (uchar*)base_key, key_size, co_input, 16, <); - dk(Ke, (uchar*)base_key, key_size, ke2input, 16); - krb_decrypt(edata2, salt->edata2len, plaintext, Ke, key_size); + dk(Ke, (uchar*)base_key, key_size, ke2input, 16, <); + krb_decrypt(edata2, salt->edata2len, plaintext, Ke, key_size, <); - dk(Ki, (uchar*)base_key, key_size, ki2input, 16); + dk(Ki, (uchar*)base_key, key_size, ki2input, 16, <); hmac_sha1(Ki, key_size, plaintext, salt->edata2len, checksum, 20); out[gid].cracked = !memcmp_pmc(checksum, salt->edata1, 12); diff --git a/run/opencl/lastpass_kernel.cl b/run/opencl/lastpass_kernel.cl index 75542520e2e..24ed0103b42 100644 --- a/run/opencl/lastpass_kernel.cl +++ b/run/opencl/lastpass_kernel.cl @@ -14,10 +14,11 @@ __kernel void lastpass_final(__global crack_t *out, MAYBE_CONSTANT salt_t *salt, __global state_t *state) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint idx = get_global_id(0); uint i; uint key[8]; - AES_KEY akey; for (i = 0; i < 8; i++) key[i] = SWAP32(state[idx].hash[i]); @@ -38,11 +39,12 @@ __kernel void lastpass_cli_final(__global crack_t *out, MAYBE_CONSTANT lpcli_salt_t *salt, __global state_t *state) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint idx = get_global_id(0); uint i; uint key[8]; uchar iv[16]; - AES_KEY akey; for (i = 0; i < 8; i++) key[i] = SWAP32(state[idx].hash[i]); diff --git a/run/opencl/o5logon_kernel.cl b/run/opencl/o5logon_kernel.cl index b00e83ca247..49da6bfe006 100644 --- a/run/opencl/o5logon_kernel.cl +++ b/run/opencl/o5logon_kernel.cl @@ -32,6 +32,8 @@ __kernel void o5logon_kernel(__global const uint *keys, __constant salt_t *salt, __global const uint *index, __global uint *result) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint W[16] = { 0 }, salt_s[3], output[5]; uint gid = get_global_id(0); uint base = index[gid]; @@ -51,7 +53,6 @@ o5logon_kernel(__global const uint *keys, __constant salt_t *salt, ulong l[16 / 8]; } pt; uchar iv[16]; - AES_KEY akey; keys += base >> 6; diff --git a/run/opencl/odf_kernel.cl b/run/opencl/odf_kernel.cl index df8a2530020..84449a65f96 100644 --- a/run/opencl/odf_kernel.cl +++ b/run/opencl/odf_kernel.cl @@ -122,9 +122,10 @@ inline void odf_bf(__global const uchar *password, inline void odf_aes(__global const uchar *password, __constant odf_salt *salt, - __global uint *out) + __global uint *out, + __local aes_local_t *lt) { - AES_KEY akey; + AES_KEY akey; akey.lt = lt; uchar iv[16]; uint i, j; uint hash[256/32]; @@ -177,10 +178,11 @@ __kernel void odf(__global odf_password *password, __constant odf_salt *salt, __global odf_out *out) { + __local aes_local_t lt; uint idx = get_global_id(0); if (salt->cipher_type == 0) odf_bf(password[idx].v, salt, out[idx].v); else - odf_aes(password[idx].v, salt, out[idx].v); + odf_aes(password[idx].v, salt, out[idx].v, <); } diff --git a/run/opencl/office_kernel.cl b/run/opencl/office_kernel.cl index 9595bd965a2..a796e705a3c 100644 --- a/run/opencl/office_kernel.cl +++ b/run/opencl/office_kernel.cl @@ -125,6 +125,8 @@ void Final2007(__global ms_office_state *state, __constant ms_office_salt *salt, __constant ms_office_blob *blob) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint i; uint W[16]; union { @@ -144,7 +146,6 @@ void Final2007(__global ms_office_state *state, unsigned char c[16]; uint w[4]; } decryptedVerifierHash; - AES_KEY akey; uint result = 1; #if (50000 % HASH_LOOPS0710) @@ -240,11 +241,12 @@ inline void Decrypt(__constant ms_office_salt *salt, const uchar *verifierInputKey, __constant uchar *encryptedVerifier, uchar *decryptedVerifier, - const int length) + const int length, + __local aes_local_t *lt) { uint i; uchar iv[16]; - AES_KEY akey; + AES_KEY akey; akey.lt = lt; for (i = 0; i < 16; i++) iv[i] = salt->salt.c[i]; @@ -262,6 +264,7 @@ void Generate2010key(__global ms_office_state *state, __constant ms_office_salt *salt, __constant ms_office_blob *blob) { + __local aes_local_t lt; uint i, j, result = 1; uint W[16]; union { @@ -328,10 +331,10 @@ void Generate2010key(__global ms_office_state *state, output[1].w[i] = SWAP32(output[1].w[i]); Decrypt(salt, output[0].c, blob->encryptedVerifier, - decryptedVerifierHashInputBytes.c, 16); + decryptedVerifierHashInputBytes.c, 16, <); Decrypt(salt, output[1].c, blob->encryptedVerifierHash, - decryptedVerifierHashBytes.c, 32); + decryptedVerifierHashBytes.c, 32, <); for (i = 0; i < 4; i++) W[i] = SWAP32(decryptedVerifierHashInputBytes.w[i]); @@ -414,6 +417,7 @@ void Generate2013key(__global ms_office_state *state, __constant ms_office_salt *salt, __constant ms_office_blob *blob) { + __local aes_local_t lt; uint i, j, result = 1; ulong W[4][16]; ulong output[4][64/8]; @@ -468,10 +472,10 @@ void Generate2013key(__global ms_office_state *state, output[2][i] = SWAP64(output[2][i]); Decrypt(salt, (uchar*)output[1], blob->encryptedVerifier, - (uchar*)decryptedVerifierHashInputBytes, 16); + (uchar*)decryptedVerifierHashInputBytes, 16, <); Decrypt(salt, (uchar*)output[2], blob->encryptedVerifierHash, - (uchar*)decryptedVerifierHashBytes, 32); + (uchar*)decryptedVerifierHashBytes, 32, <); for (i = 0; i < 2; i++) W[3][i] = SWAP64(decryptedVerifierHashInputBytes[i]); diff --git a/run/opencl/opencl_aes_plain.h b/run/opencl/opencl_aes_plain.h index 6796251b7fc..7f8606080ec 100644 --- a/run/opencl/opencl_aes_plain.h +++ b/run/opencl/opencl_aes_plain.h @@ -21,7 +21,7 @@ * Copy tables to local memory. */ #ifndef AES_LOCAL_TABLES -//#define AES_LOCAL_TABLES +#define AES_LOCAL_TABLES #endif /* diff --git a/run/opencl/pbkdf2_ripemd160_kernel.cl b/run/opencl/pbkdf2_ripemd160_kernel.cl index 7e768f11307..b7dc9adcc8f 100644 --- a/run/opencl/pbkdf2_ripemd160_kernel.cl +++ b/run/opencl/pbkdf2_ripemd160_kernel.cl @@ -11,6 +11,7 @@ #include "opencl_misc.h" #include "opencl_ripemd.h" +#define AES_BITSLICE // Somehow this kernel bugs out with the table based kernel #define AES_SRC_TYPE __constant #define AES_DST_TYPE __global #include "opencl_aes.h" @@ -147,10 +148,11 @@ __kernel void tc_ripemd_aesxts(__global const pbkdf2_password *inbuffer, __global tc_hash *outbuffer, __constant tc_salt *salt) { + __local aes_local_t lt; uint idx = get_global_id(0); uint key[64 / 4]; pbkdf2(inbuffer[idx].v, inbuffer[idx].length, salt->salt, key); - AES_256_XTS_first_sector(salt->bin, outbuffer[idx].v, (uchar*)key); + AES_256_XTS_first_sector(salt->bin, outbuffer[idx].v, (uchar*)key, <); } diff --git a/run/opencl/pdf_kernel.cl b/run/opencl/pdf_kernel.cl index 6f10edde486..2a107ef18a2 100644 --- a/run/opencl/pdf_kernel.cl +++ b/run/opencl/pdf_kernel.cl @@ -568,6 +568,9 @@ void pdf_r6(__global const uchar *pwbuf, #define GPU_LOC_3 LOC_3 #endif + __local aes_local_t lt; + AES_KEY aes; aes.lt = < + /* Prepare password */ uint pw_len = prepare56(pwbuf, index, password); @@ -604,7 +607,6 @@ void pdf_r6(__global const uchar *pwbuf, uint block_size = 32; uint data_len = 0; uint i, j, sum, magic = 0; - AES_KEY aes; uint start_clean; uint md_len_pos; diff --git a/run/opencl/pem_kernel.cl b/run/opencl/pem_kernel.cl index 0228a6f269e..b2c931c0024 100644 --- a/run/opencl/pem_kernel.cl +++ b/run/opencl/pem_kernel.cl @@ -24,7 +24,7 @@ typedef struct { uchar ciphertext[CTLEN]; } pem_salt; -inline int pem_decrypt(__global uchar *key, MAYBE_CONSTANT pem_salt *salt) +inline int pem_decrypt(__global uchar *key, MAYBE_CONSTANT pem_salt *salt, __local aes_local_t *lt) { uchar out[CTLEN]; struct asn1_hdr hdr; @@ -49,7 +49,7 @@ inline int pem_decrypt(__global uchar *key, MAYBE_CONSTANT pem_salt *salt) } else { const uint aes_sz = salt->cid * 64; uchar aiv[16]; - AES_KEY akey; + AES_KEY akey; akey.lt = lt; block_size = 16; memcpy_macro(aiv, salt->iv, 16); @@ -109,7 +109,8 @@ void pem_final(MAYBE_CONSTANT pem_salt *salt, __global pbkdf2_out *pbkdf2, __global pem_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); - out[gid].cracked = pem_decrypt((__global uchar*)pbkdf2[gid].dk, salt); + out[gid].cracked = pem_decrypt((__global uchar*)pbkdf2[gid].dk, salt, <); } diff --git a/run/opencl/pgpdisk_kernel.cl b/run/opencl/pgpdisk_kernel.cl index 8d230cf2ee8..0477a9e8503 100644 --- a/run/opencl/pgpdisk_kernel.cl +++ b/run/opencl/pgpdisk_kernel.cl @@ -9,7 +9,6 @@ #include "opencl_misc.h" #include "opencl_sha1_ctx.h" -#define AES_NO_BITSLICE #include "opencl_aes.h" #include "opencl_twofish.h" #include "opencl_cast.h" @@ -82,9 +81,10 @@ __kernel void pgpdisk_aes(__global const pgpdisk_password *inbuffer, __global pgpdisk_hash *outbuffer, __constant pgpdisk_salt *salt) { + __local aes_local_t lt; + AES_KEY aes_key; aes_key.lt = < uint idx = get_global_id(0); uchar key[32]; - AES_KEY aes_key; pgpdisk_kdf(inbuffer[idx].v, inbuffer[idx].length, salt->salt, salt->saltlen, salt->iterations, diff --git a/run/opencl/pgpwde_kernel.cl b/run/opencl/pgpwde_kernel.cl index 196a063000f..0b4dd09766e 100644 --- a/run/opencl/pgpwde_kernel.cl +++ b/run/opencl/pgpwde_kernel.cl @@ -137,9 +137,9 @@ inline int PKCS1oaepMGF1Unpack(uchar *in, uint32_t inlen) return memcmp_pp(nullhash, msg + hashlen / 4, hashlen); } -inline int pgpwde_decrypt_and_verify(uchar *key, __constant uchar *esk) +inline int pgpwde_decrypt_and_verify(uchar *key, __constant uchar *esk, __local aes_local_t *lt) { - AES_KEY aes_key; + AES_KEY aes_key; aes_key.lt = lt; uchar iv[16] = { 8, 0 }; uchar out[128]; @@ -156,11 +156,12 @@ __kernel void pgpwde(__global const pgpwde_password *inbuffer, __global pgpwde_hash *outbuffer, __constant pgpwde_salt *salt) { + __local aes_local_t lt; uint idx = get_global_id(0); uint key[8]; pgpwde_kdf(inbuffer[idx].v, inbuffer[idx].length, salt->salt, salt->bytes, key); - outbuffer[idx].cracked = pgpwde_decrypt_and_verify((uchar*)key, salt->esk); + outbuffer[idx].cracked = pgpwde_decrypt_and_verify((uchar*)key, salt->esk, <); } diff --git a/run/opencl/rar_kernel.cl b/run/opencl/rar_kernel.cl index a5a3746a0ad..b9d1cc49de1 100644 --- a/run/opencl/rar_kernel.cl +++ b/run/opencl/rar_kernel.cl @@ -239,9 +239,9 @@ inline int check_huffman(uchar *next) { /* * Returns 0 for early rejection, 1 if passed */ -inline int check_rar(__global rar_file *cur_file, __global uint *_key, __global uint *_iv) +inline int check_rar(__global rar_file *cur_file, __global uint *_key, __global uint *_iv, __local aes_local_t *lt) { - AES_KEY aes_ctx; + AES_KEY aes_ctx; aes_ctx.lt = lt; uchar iv[16]; uchar plain[16 + 8]; /* Some are safety margin for check_huffman() */ __global uchar *key = (__global uchar*)_key; @@ -341,8 +341,9 @@ __kernel void RarFinal(const __global uint *pw_len, __global rar_out *output) __kernel void RarCheck(__global rar_out *output, __global rar_file *file) { + __local aes_local_t lt; uint gid = get_global_id(0); /* GPU-side early reject */ - output[gid].sha[4] = check_rar(file, output[gid].sha, output[gid].iv); + output[gid].sha[4] = check_rar(file, output[gid].sha, output[gid].iv, <); } diff --git a/run/opencl/ssh_kernel.cl b/run/opencl/ssh_kernel.cl index 2172e12035b..ba424395ed7 100644 --- a/run/opencl/ssh_kernel.cl +++ b/run/opencl/ssh_kernel.cl @@ -194,8 +194,9 @@ inline int check_padding_and_structure(uchar *out, uint length, uint strict_mode return 1; } -inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *osalt, uchar *out, uint full_decrypt) +inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *osalt, uchar *out, uint full_decrypt, __local aes_local_t *lt) { + AES_KEY akey; akey.lt = lt; uchar salt[16]; memcpy_macro(salt, osalt->salt, osalt->sl); @@ -217,7 +218,6 @@ inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *os } } else if (osalt->cipher == 1) { // RSA/DSA keys with AES-128 uchar key[16]; - AES_KEY akey; uchar iv[16]; memcpy_macro(iv, osalt->salt, 16); @@ -236,7 +236,6 @@ inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *os #endif } else if (osalt->cipher == 3) { // EC keys with AES-128 uchar key[16]; - AES_KEY akey; uchar iv[16]; memcpy_macro(iv, osalt->salt, 16); @@ -246,7 +245,6 @@ inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *os AES_cbc_decrypt(osalt->ct, out, osalt->ctl, &akey, iv); } else if (osalt->cipher == 4) { // RSA/DSA keys with AES-192 uchar key[24]; - AES_KEY akey; uchar iv[16]; memcpy_macro(iv, osalt->salt, 16); @@ -261,7 +259,6 @@ inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *os } } else if (osalt->cipher == 5) { // RSA/DSA keys with AES-256 uchar key[32]; - AES_KEY akey; uchar iv[16]; memcpy_macro(iv, osalt->salt, 16); @@ -280,12 +277,12 @@ inline void common_crypt_code(uchar *password, uint len, __constant ssh_salt *os #define QUICK 0 #define FULL 1 -inline int ssh_decrypt(uchar *password, uint len, __constant ssh_salt *osalt, __global ssh_out *output) +inline int ssh_decrypt(uchar *password, uint len, __constant ssh_salt *osalt, __global ssh_out *output, __local aes_local_t *lt) { uchar out[CTLEN]; int block_size = osalt->cipher == 0 ? 8 : 16; - common_crypt_code(password, len, osalt, out, QUICK); + common_crypt_code(password, len, osalt, out, QUICK, lt); if (osalt->cipher == 3) // EC keys with AES-128 return check_padding_and_structure_EC(out, osalt->ctl); @@ -293,7 +290,7 @@ inline int ssh_decrypt(uchar *password, uint len, __constant ssh_salt *osalt, __ if (!check_padding_and_structure(out, osalt->ctl, QUICK, block_size)) return 0; - common_crypt_code(password, len, osalt, out, FULL); + common_crypt_code(password, len, osalt, out, FULL, lt); return check_padding_and_structure(out, osalt->ctl, FULL, block_size); } @@ -302,10 +299,11 @@ __kernel void ssh(__global const ssh_password *inbuffer, __global ssh_out *out, __constant ssh_salt *salt) { + __local aes_local_t lt; uchar password[PLAINTEXT_LENGTH]; uint gid = get_global_id(0); memcpy_gp(password, inbuffer[gid].v, inbuffer[gid].length); - out[gid].cracked = ssh_decrypt(password, inbuffer[gid].length, salt, out); + out[gid].cracked = ssh_decrypt(password, inbuffer[gid].length, salt, out, <); } diff --git a/run/opencl/strip_kernel.cl b/run/opencl/strip_kernel.cl index c4595b6fcc9..fe9e2347921 100644 --- a/run/opencl/strip_kernel.cl +++ b/run/opencl/strip_kernel.cl @@ -58,7 +58,8 @@ __kernel void strip(__global const pbkdf2_password *inbuffer, const int page_sz = 1008; /* 1024 - strlen(SQLITE_FILE_HEADER) */ const int reserve_sz = 16; /* for HMAC off case */ const int size = page_sz - reserve_sz; - AES_KEY akey; + __local aes_local_t lt; + AES_KEY akey; akey.lt = < pbkdf2(inbuffer[idx].v, inbuffer[idx].length, salt->pbkdf2.salt, salt->pbkdf2.length, salt->pbkdf2.iterations, diff --git a/run/opencl/telegram_kernel.cl b/run/opencl/telegram_kernel.cl index ef4df1d6fce..52b1b0b5942 100644 --- a/run/opencl/telegram_kernel.cl +++ b/run/opencl/telegram_kernel.cl @@ -19,7 +19,7 @@ typedef struct { uchar encrypted_blob[ENCRYPTED_BLOB_LEN]; } telegram_salt; -inline int telegram_decrypt(__global uchar *authkey, MAYBE_CONSTANT telegram_salt *salt) +inline int telegram_decrypt(__global uchar *authkey, MAYBE_CONSTANT telegram_salt *salt, __local aes_local_t *lt) { // variables uchar data_a[48]; @@ -39,7 +39,7 @@ inline int telegram_decrypt(__global uchar *authkey, MAYBE_CONSTANT telegram_sal int encrypted_data_length = salt->encrypted_blob_length - 16; SHA_CTX ctx; SHA_CTX fctx; - AES_KEY aeskey; + AES_KEY aeskey; aeskey.lt = lt; int i; // setup buffers @@ -105,7 +105,8 @@ void telegram_final(MAYBE_CONSTANT telegram_salt *salt, __global pbkdf2_out *pbkdf2, __global telegram_out *out) { + __local aes_local_t lt; uint gid = get_global_id(0); - out[gid].cracked = telegram_decrypt((__global uchar*)pbkdf2[gid].dk, salt); + out[gid].cracked = telegram_decrypt((__global uchar*)pbkdf2[gid].dk, salt, <); } diff --git a/run/opencl/vmx_kernel.cl b/run/opencl/vmx_kernel.cl index f02d394acad..9d1e086d28b 100644 --- a/run/opencl/vmx_kernel.cl +++ b/run/opencl/vmx_kernel.cl @@ -41,6 +41,8 @@ void vmx_final(MAYBE_CONSTANT vmx_salt *salt, __global pbkdf2_state *state, __global vmx_state *vstate) { + __local aes_local_t lt; + AES_KEY akey; akey.lt = < uint gid = get_global_id(0); uint i; #if !OUTLEN || OUTLEN > 20 @@ -71,7 +73,6 @@ void vmx_final(MAYBE_CONSTANT vmx_salt *salt, #endif } else { uchar data[16]; - AES_KEY akey; int success = 0; union { uchar c[256 / 8]; diff --git a/run/opencl/wpapsk_kernel.cl b/run/opencl/wpapsk_kernel.cl index 5f6ff50b6f5..b5522a1e6ac 100644 --- a/run/opencl/wpapsk_kernel.cl +++ b/run/opencl/wpapsk_kernel.cl @@ -10,6 +10,7 @@ #include "opencl_md5.h" #include "opencl_sha1.h" #include "opencl_sha2_ctx.h" +#define CMAC_SINGLE_UPDATE #include "opencl_cmac.h" typedef struct { @@ -658,6 +659,7 @@ void wpapsk_final_sha256(__global wpapsk_state *state, MAYBE_CONSTANT wpapsk_data *data, __global mic_t *mic) { + __local aes_local_t lt; uchar ptk[48]; uchar cmic[16]; uint outbuffer[8]; @@ -674,7 +676,7 @@ void wpapsk_final_sha256(__global wpapsk_state *state, sha256_prf_bits((uchar*)outbuffer, 32, (MAYBE_CONSTANT uchar*)data->data, 76, ptk, 48 * 8); /* CMAC is kinda like a HMAC but using AES */ - AES_CMAC_Init(&ctx); + AES_CMAC_Init(&ctx, <); AES_CMAC_SetKey(&ctx, ptk); AES_CMAC_Update(&ctx, (MAYBE_CONSTANT uchar*)data->eapol, data->eapol_size); AES_CMAC_Final(cmic, &ctx); diff --git a/src/opencl_keepass_fmt_plug.c b/src/opencl_keepass_fmt_plug.c index ce6dec67801..3d9f5337124 100644 --- a/src/opencl_keepass_fmt_plug.c +++ b/src/opencl_keepass_fmt_plug.c @@ -53,10 +53,13 @@ typedef struct { uint32_t cracked; } result; +#define AES_MAXNR 14 + typedef struct { - uint8_t hash[32]; +#if KEEPASS_AES uint32_t iterations; - uint8_t akey[724]; /* sizeof(AES_CTX) on GPU side */ +#endif + uint8_t hash[32]; } keepass_state; static int new_keys; @@ -131,6 +134,7 @@ static void create_clobj(size_t gws, struct fmt_main *self) CLKERNELARG(kernel_init, 2, mem_state); CLKERNELARG(kernel_loop_aes, 0, mem_state); + CLKERNELARG(kernel_loop_aes, 1, mem_salt); CLKERNELARG(kernel_final, 0, mem_state); CLKERNELARG(kernel_final, 1, mem_salt); @@ -272,6 +276,7 @@ static void set_salt(void *salt) saltsize = sizeof(keepass_salt_t) + keepass_salt->content_size - 1; CLCREATEBUFFER(mem_salt, CL_RO, saltsize); CLKERNELARG(kernel_init, 1, mem_salt); + CLKERNELARG(kernel_loop_aes, 1, mem_salt); CLKERNELARG(kernel_final, 1, mem_salt); #if KEEPASS_ARGON2 CLKERNELARG(kernel_argon2, 1, mem_salt);