Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip -- do not review] feat: add support for big values in SeederV2 #4222

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions tests/dragonfly/replication_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from . import dfly_args
from .proxy import Proxy
from .seeder import StaticSeeder
from .seeder import SeederBase

ADMIN_PORT = 1211

Expand Down Expand Up @@ -61,6 +62,34 @@ async def wait_for_replicas_state(*clients, state="online", node_role="slave", t
pytest.param(
8, [8, 8], dict(key_target=1_000_000, units=16), 50_000, False, marks=M_STRESS
),
# Quick general test that replication is working
(
1,
3 * [1],
dict(
key_target=1_000,
huge_value_percentage=10,
huge_value_size=4096 * 2,
collection_size=10,
types=SeederBase.BIG_VALUE_TYPES,
),
500,
True,
),
# Big value
(
4,
[4, 4],
dict(
key_target=10_000,
huge_value_percentage=5,
huge_value_size=4096 * 4,
collection_size=50,
types=SeederBase.BIG_VALUE_TYPES,
),
500,
True,
),
],
)
@pytest.mark.parametrize("mode", [({}), ({"cache_mode": "true"})])
Expand Down Expand Up @@ -132,6 +161,12 @@ async def check():
# Check data after stable state stream
await check()

if big_value:
info = await c_master.info()
preemptions = info["big_value_preemptions"]
logging.info(f"Preemptions {preemptions}")
assert preemptions > 0


async def check_replica_finished_exec(c_replica: aioredis.Redis, m_offset):
role = await c_replica.role()
Expand Down
8 changes: 8 additions & 0 deletions tests/dragonfly/seeder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class SeederBase:
UID_COUNTER = 1 # multiple generators should not conflict on keys
CACHED_SCRIPTS = {}
DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]

def __init__(self, types: typing.Optional[typing.List[str]] = None):
self.uid = SeederBase.UID_COUNTER
Expand Down Expand Up @@ -137,6 +138,8 @@ def __init__(
data_size=100,
collection_size=None,
types: typing.Optional[typing.List[str]] = None,
huge_value_percentage=0,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I will keep a flat probability for each key in the key target to contain a huge value

huge_value_size=0,
):
SeederBase.__init__(self, types)
self.key_target = key_target
Expand All @@ -146,6 +149,9 @@ def __init__(
else:
self.collection_size = collection_size

self.huge_value_percentage = huge_value_percentage
self.huge_value_size = huge_value_size

self.units = [
Seeder.Unit(
prefix=f"k-s{self.uid}u{i}-",
Expand All @@ -166,6 +172,8 @@ async def run(self, client: aioredis.Redis, target_ops=None, target_deviation=No
target_deviation if target_deviation is not None else -1,
self.data_size,
self.collection_size,
self.huge_value_percentage,
self.huge_value_size,
]

sha = await client.script_load(Seeder._load_script("generate"))
Expand Down
6 changes: 5 additions & 1 deletion tests/dragonfly/seeder/script-generate.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ local total_ops = tonumber(ARGV[6])
local min_dev = tonumber(ARGV[7])
local data_size = tonumber(ARGV[8])
local collection_size = tonumber(ARGV[9])
-- Probability of each key in key_target to be a big value
local huge_value_percentage = tonumber(ARGV[10])
local huge_value_size = tonumber(ARGV[11])

-- collect all keys belonging to this script
-- assumes exclusive ownership
local keys = LU_collect_keys(prefix, type)

LG_funcs.init(data_size, collection_size)
LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size)
local addfunc = LG_funcs['add_' .. string.lower(type)]
local modfunc = LG_funcs['mod_' .. string.lower(type)]

Expand Down Expand Up @@ -85,6 +88,7 @@ while true do
if counter % 10 == 0 then
-- calculate intensity (not normalized probabilities)
-- please see attached plots in PR to undertand convergence
-- https://github.com/dragonflydb/dragonfly/pull/2556
Copy link
Contributor Author

@kostasrim kostasrim Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

took me a few seconds to find this so I thought I should include the link. Now we can jump straight to the pr if needed (and it will be needed for anyone who changes this code).


-- the add intensity is monotonically decreasing with keycount growing,
-- the delete intensity is monotonically increasing with keycount growing,
Expand Down
95 changes: 83 additions & 12 deletions tests/dragonfly/seeder/script-genlib.lua
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
local LG_funcs = {}

function LG_funcs.init(dsize, csize)
function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz)
LG_funcs.dsize = dsize
LG_funcs.csize = csize
LG_funcs.esize = math.ceil(dsize / csize)
LG_funcs.huge_value_percentage = large_val_perc
LG_funcs.huge_value_size = large_val_sz
end

local function huge_entry()
local perc = LG_funcs.huge_value_percentage / 100
-- [0, 1]
local rand = math.random()
local huge_entry = (perc > rand)
return huge_entry
end

-- strings
Expand All @@ -27,12 +37,18 @@ end
-- lists
-- store list of random blobs of default container/element sizes

function LG_funcs.add_list(key)
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
function LG_funcs.add_list(key, huge_value)
local elements
if huge_entry() then
elements = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
else
elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end

redis.apcall('LPUSH', key, unpack(elements))
end

function LG_funcs.mod_list(key)
function LG_funcs.mod_list(key, huge_value)
-- equally likely pops and pushes, we rely on the list size being large enough
-- to "highly likely" not get emptied out by consequitve pops
local action = math.random(1, 4)
Expand All @@ -41,9 +57,23 @@ function LG_funcs.mod_list(key)
elseif action == 2 then
redis.apcall('LPOP', key)
elseif action == 3 then
redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.huge_value_size)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('LPUSH', key, str)
else
redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.huge_value_size)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('RPUSH', key, str)
end
end

Expand All @@ -62,7 +92,15 @@ function LG_funcs.add_set(key, keys)
end
redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
else
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local elements
if huge_entry() then
-- Hard coded 10 here, meaning up to 10 huge entries per set
Copy link
Contributor Author

@kostasrim kostasrim Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

//TODO so I don't forget to fix it. Replace 10 with LG_funcs.csize()

-- TODO make this configurable
elements = dragonfly.randstr(LG_funcs.large_val_sz, 10)
else
elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end

redis.apcall('SADD', key, unpack(elements))
end
end
Expand All @@ -72,7 +110,14 @@ function LG_funcs.mod_set(key)
if math.random() < 0.5 then
redis.apcall('SPOP', key)
else
redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
local rand_str
if huge_entry() then
rand_str = dragonfly.randstr(LG_funcs.huge_value_size)
else
rand_str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('SADD', key, rand_str)
end
end

Expand All @@ -82,7 +127,13 @@ end
-- where `value` is a random string for even indices and a number for odd indices

function LG_funcs.add_hash(key)
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
local blobs
if huge_entry() then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
else
blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
end

local htable = {}
for i = 1, LG_funcs.csize, 2 do
htable[i * 2 - 1] = tostring(i)
Expand All @@ -100,15 +151,28 @@ function LG_funcs.mod_hash(key)
if idx % 2 == 1 then
redis.apcall('HINCRBY', key, tostring(idx), 1)
else
redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.large_val_sz)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('HSET', key, tostring(idx), str)
end
end

-- sorted sets

function LG_funcs.add_zset(key, keys)
-- TODO: We don't support ZDIFFSTORE
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local blobs
if huge_entry() then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
else
blobs = dragonfly.randstr(LG_funcs.csize, LG_funcs.csize)
end

local ztable = {}
for i = 1, LG_funcs.csize do
ztable[i * 2 - 1] = tostring(i)
Expand All @@ -120,7 +184,14 @@ end
function LG_funcs.mod_zset(key, dbsize)
local action = math.random(1, 4)
if action <= 2 then
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.large_val_sz)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), str)
elseif action == 3 then
redis.apcall('ZPOPMAX', key)
else
Expand Down
Loading