-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor(tools/rand): speed up random string generation #13150
Conversation
A simple performance test:
the result of master: 0.20499992370605 This PR is about 20% faster than the master. |
If the string is not CSRNG at first, we can just use math.random
this one is fastest local ffi = require "ffi"
local C = ffi.C
local ffi_new = ffi.new
local encode_base64 = ngx.encode_base64
local char = string.char
local rand = math.random
ffi.cdef[[
typedef unsigned char u_char;
int RAND_bytes(u_char *buf, int num);
unsigned long ERR_get_error(void);
void ERR_load_crypto_strings(void);
void ERR_free_strings(void);
const char *ERR_reason_error_string(unsigned long e);
int open(const char * filename, int flags, ...);
size_t read(int fd, void *buf, size_t count);
int write(int fd, const void *ptr, int numbytes);
int close(int fd);
char *strerror(int errnum);
]]
local _M = {}
local get_rand_bytes
do
local ngx_log = ngx.log
local WARN = ngx.WARN
local ffi_fill = ffi.fill
local ffi_str = ffi.string
local bytes_buf_t = ffi.typeof "char[?]"
local function urandom_bytes(buf, size)
local fd = C.open("/dev/urandom", O_RDONLY, 0) -- mode is ignored
if fd < 0 then
ngx_log(WARN, "Error opening random fd: ",
ffi_str(C.strerror(ffi.errno())))
return false
end
local res = C.read(fd, buf, size)
if res <= 0 then
ngx_log(WARN, "Error reading from urandom: ",
ffi_str(C.strerror(ffi.errno())))
return false
end
if C.close(fd) ~= 0 then
ngx_log(WARN, "Error closing urandom: ",
ffi_str(C.strerror(ffi.errno())))
end
return true
end
-- try to get n_bytes of CSPRNG data, first via /dev/urandom,
-- and then falling back to OpenSSL if necessary
get_rand_bytes = function(n_bytes, urandom)
local buf = ffi_new(bytes_buf_t, n_bytes)
ffi_fill(buf, n_bytes, 0x0)
-- only read from urandom if we were explicitly asked
if urandom then
local rc = urandom_bytes(buf, n_bytes)
-- if the read of urandom was successful, we returned true
-- and buf is filled with our bytes, so return it as a string
if rc then
return ffi_str(buf, n_bytes)
end
end
if C.RAND_bytes(buf, n_bytes) == 0 then
-- get error code
local err_code = C.ERR_get_error()
if err_code == 0 then
return nil, "could not get SSL error code from the queue"
end
-- get human-readable error string
C.ERR_load_crypto_strings()
local err = C.ERR_reason_error_string(err_code)
C.ERR_free_strings()
return nil, "could not get random bytes (" ..
"reason:" .. ffi_str(err) .. ") "
end
return ffi_str(buf, n_bytes)
end
end
math.randomseed(os.clock() ^ 5)
local sets = {{97, 122}, {65, 90}, {48, 57}} -- a-z, A-Z, 0-9
local function string_random(chars)
local str = ""
for i = 1, chars do
local charset = sets[ math.random(1, #sets) ]
str = str .. string.char(math.random(charset[1], charset[2]))
end
return str
end
abc = {"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","r","s","t","u","v","z","y","w","q","1","2","3","4","5","6","7","8","9","0"}
local string = table.new(32, 0)
local function GeneratedString()
table.clear(string)
for i=1,32 do
local rdm = math.random(1,35)
string[i] = abc[rdm]
end
return table.concat(string, "")
end
local t = ngx.now()
local function timeit()
ngx.update_time()
local tt = ngx.now()
local elapsed = tt - t
t = tt
return elapsed
end
local ITER = 10000
local random_string = function()
-- get 24 bytes, which will return a 32 char string after encoding
-- this is done in attempt to maintain backwards compatibility as
-- much as possible while improving the strength of this function
local str = encode_base64(get_rand_bytes(24, false))
if str:find("/", 1, true) then
str = str:gsub("/", char(rand(48, 57))) -- 0 - 10
end
if str:find("+", 1, true) then
str = str:gsub("+", char(rand(65, 90))) -- A - Z
end
if str:find("=", 1, true) then
str = str:gsub("=", char(rand(97, 122))) -- a - z
end
return str
end
timeit()
for i = 1, ITER do
random_string()
end
print(timeit())
print(random_string())
timeit()
for i = 1, ITER do
string_random(32)
end
print(timeit())
print(string_random(32))
timeit()
for i = 1, ITER do
GeneratedString()
end
print(timeit())
print(GeneratedString()) |
@fffonion , It may be a big change, we will try it in another PR. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some even faster ways of doing this.
kong/tools/rand.lua
Outdated
local str = encode_base64(get_rand_bytes(24, true)) | ||
|
||
if str:find("/", 1, true) then | ||
str = str:gsub("/", char(rand(48, 57))) -- 0 - 10 | ||
end | ||
|
||
if str:find("+", 1, true) then | ||
str = str:gsub("+", char(rand(65, 90))) -- A - Z | ||
end | ||
|
||
if str:find("=", 1, true) then | ||
str = str:gsub("=", char(rand(97, 122))) -- a - z | ||
end | ||
|
||
return str |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably something like:
return encode_base58(get_rand_bytes(24, true))
would be the fastest.
Or doing:
local byte = string.byte
local OUTPUT = require("string.buffer").new(32)
local SLASH_BYTE = byte("/")
local PLUS_BYTE = byte("+")
local EQ_BYTE = byte("=")
random_string = function()
OUTPUT:reset()
local str = encode_base64(get_rand_bytes(24, true))
for i = 1, 32 do
local b = byte(str, i)
if b == SLASH_BYTE then
OUTPUT:putf("%c", rand(48, 57))
elseif b == PLUS_BYTE then
OUTPUT:putf("%c", rand(65, 90))
elseif b == EQ_BYTE then
OUTPUT:putf("%c", rand(97, 122))
else
OUTPUT:putf("%c", b)
end
end
str = OUTPUT:get()
return str
end
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually even faster:
local byte = string.byte
local OUTPUT = require("string.buffer").new(32)
local SLASH_BYTE = byte("/")
local PLUS_BYTE = byte("+")
random_string = function()
OUTPUT:reset()
local str = encode_base64(get_rand_bytes(24, true), true)
for i = 1, 32 do
local b = byte(str, i)
if b == SLASH_BYTE then
OUTPUT:putf("%c", rand(48, 57))
elseif b == PLUS_BYTE then
OUTPUT:putf("%c", rand(65, 90))
else
OUTPUT:putf("%c", b)
end
end
str = OUTPUT:get()
return str
end
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is another alternative, but it is as fast as above:
local byte = string.byte
local OUTPUT = require("string.buffer").new(32)
local ALNUM = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
}
local function random_string()
OUTPUT:reset()
local str = get_rand_bytes(32, true)
for i = 1, 32 do
OUTPUT:put(ALNUM[byte(str, i) % 62 + 1])
end
str = OUTPUT:get()
return str
end
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is one without string buffer:
local byte = string.byte
local concat = table.concat
local OUTPUT = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", }
local ALNUM = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
}
local function random_string()
local str = get_rand_bytes(32, true)
for i = 1, 32 do
OUTPUT[i] = ALNUM[byte(str, i) % 62 + 1]
end
str = concat(OUTPUT)
return str
end
It also seems reading from /dev/urandom is slower than using openssl rand bytes itself. It feels Put that apart, doing encode_base64 to generate strings is also seems not worth than just doing |
Here is the original related commit: It is also really interesting that |
These discussion seems to be out of this PR's scope, if we want to change the algorithm of random, should we create a ticket? |
I tried one suggestion (OUTPUT + ALNUM) in the same test, the result is |
Yes, but those are more linear than yours, which is good. So far either this: Looks the best. Yours (my result with the improvement: if str:find("=", 1, true) then
str = str:gsub("=", char(rand(97, 122))) -- a - z
end These microbenchmarks are dominated by get_rand_bytes. |
I'd like to read some reference where does it create |
I removed Perhaps we could keep it and make this |
Why keep it when it does nothing (there will never be padding)? Ah you mean when we generate n-size string. Then it might have padding. But you could always just generate enough bytes so that there is no padding , I think. |
This generates a 32 bytes random and a 44 bytes string. https://github.com/Kong/kong/blob/master/kong/plugins/session/schema.lua#L56 |
If we eliminate the getrandbytes the scores are here:
|
Then we can just generate more random bytes so that padding doesn't happen (padding just reduces entropy). |
I removed |
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
@chronolaw, here it is: #13186 So this PR can now be closed? |
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
I'm not referring the openssl rand_bytes vs. /dev/urandom is securer or weaker, but it depends on our usage.
So I'm just suggesting to have a pure lua math.rand flavor and a openssl rand bytes variant (no encode_base64, just use to_hex) to |
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
### Summary This PR optimizes the `kong.tools.rand.random_string`. Results can be seen in PR that this PR replaces: #13150 (comment) KAG-4673 Signed-off-by: Aapo Talvensaari <[email protected]>
Summary
This PR tries
string.find
beforestring.gsub
, avoiding the relatively expensive cost ofgsub
.KAG-4673
Checklist
changelog/unreleased/kong
orskip-changelog
label added on PR if changelog is unnecessary. README.mdIssue reference
Fix #[issue number]