From 582d5acf5953330c6e04b5b8c0c108127abe207b Mon Sep 17 00:00:00 2001 From: Aapo Talvensaari Date: Wed, 12 Jun 2024 11:44:35 +0300 Subject: [PATCH] refactor(tools/string): speed up strip (whitespace) (#13168) ### Summary With simple microbenchmark: ```lua ngx.update_time() local s = ngx.now() for i = 1, 100000 do local a = strip(" \t \ndogestr \f\t\r ") end ngx.update_time() local e = ngx.now() print("took: ", (e * 1000) - (s * 1000), " ms") ``` I get these results: Current: `took: 57 ms` PR: `took: 7 ms` Signed-off-by: Aapo Talvensaari --- kong/tools/string.lua | 72 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/kong/tools/string.lua b/kong/tools/string.lua index 1920d7e970b7..ef2d844e62d1 100644 --- a/kong/tools/string.lua +++ b/kong/tools/string.lua @@ -1,13 +1,20 @@ local pl_stringx = require "pl.stringx" -local type = type -local ipairs = ipairs -local tostring = tostring -local lower = string.lower -local fmt = string.format -local find = string.find -local gsub = string.gsub +local type = type +local ipairs = ipairs +local tostring = tostring +local lower = string.lower +local sub = string.sub +local fmt = string.format +local find = string.find +local gsub = string.gsub +local byte = string.byte + + +local SPACE_BYTE = byte(" ") +local TAB_BYTE = byte("\t") +local CR_BYTE = byte("\r") local _M = {} @@ -24,16 +31,52 @@ _M.split = pl_stringx.split --- strips whitespace from a string. -- @function strip -_M.strip = function(str) - if str == nil then +_M.strip = function(value) + if value == nil then return "" end - str = tostring(str) - if #str > 200 then - return str:gsub("^%s+", ""):reverse():gsub("^%s+", ""):reverse() - else - return str:match("^%s*(.-)%s*$") + + -- TODO: do we want to operate on non-string values (kept for backward compatibility)? + if type(value) ~= "string" then + value = tostring(value) or "" + end + + if value == "" then + return "" end + + local len = #value + local s = 1 -- position of the leftmost non-whitespace char + for i = 1, len do + local b = byte(value, i) + if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then + s = s + 1 + else + break + end + end + + if s > len then + return "" + end + + local e = len -- position of the rightmost non-whitespace char + if s < e then + for i = e, 1, -1 do + local b = byte(value, i) + if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then + e = e - 1 + else + break + end + end + end + + if s ~= 1 or e ~= len then + value = sub(value, s, e) + end + + return value end @@ -180,4 +223,3 @@ _M.replace_dashes_lower = replace_dashes_lower return _M -