Skip to content

Commit

Permalink
feat: support re-opening existing db
Browse files Browse the repository at this point in the history
  • Loading branch information
eiri committed Mar 11, 2024
1 parent 9cd023e commit 384ac0e
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
31 changes: 29 additions & 2 deletions src/py_bitcask/bitcask.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import uuid
from dataclasses import dataclass
from functools import reduce
from struct import pack
from struct import pack, unpack
from typing import Any, Callable, List, Optional, Union
from zlib import crc32

Expand Down Expand Up @@ -41,6 +41,8 @@ class KeyRec:

class Bitcask(metaclass=Singleton):
DEFAULT_THRESHOLD = 1024
HEADER_FORMAT = ">I16sLL"
header_size = 28 # struct.calcsize(HEADER_FORMAT)

def __init__(self, threshold: Optional[int] = DEFAULT_THRESHOLD) -> None:
"""
Expand Down Expand Up @@ -74,9 +76,34 @@ def open(self, dataDir: str) -> bool:
f"The path '{dataDir}' is not a directory."
)
self.__datadir = dataDir
if self.__datadir != ":memory":
self._open()
self._reactivate()
return True

def _open(self) -> None:
for file in sorted(os.listdir(self.__datadir)):
file_name = os.path.join(self.__datadir, file)
if os.path.isfile(file_name) and os.path.getsize(file_name) > 128:
current = open(file_name, "rb")
uid = id(current)
self.__dir[uid] = current
while current.tell() < os.path.getsize(file_name):
data = current.read(self.header_size)
_, ts_bytes, key_sz, value_sz = unpack(
self.HEADER_FORMAT, data
)
tstamp = uuid.UUID(int=int.from_bytes(ts_bytes, "big"))
key = current.read(key_sz)
value_pos = current.tell()
self.__keydir[key] = KeyRec(
uid,
value_sz,
value_pos,
tstamp,
)
current.seek(value_sz, 1)

def _reactivate(self) -> None:
"""
Reactivates the storage by creating a new active storage file.
Expand Down Expand Up @@ -159,7 +186,7 @@ def _put(self, key: bytes, value: bytes) -> bool:
tstamp = uuid7()
key_sz = len(key)
value_sz = len(value)
head = bytes(tstamp.bytes + pack(">LL", key_sz, value_sz))
head = pack(">16sLL", tstamp.bytes, key_sz, value_sz)
crc = crc32(head)
crc = crc32(key, crc)
crc = pack(">I", crc32(value, crc))
Expand Down
31 changes: 30 additions & 1 deletion tests/test_bitcask.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_open_invalid_dir(self, db):
with pytest.raises(NotADirectoryError):
db.open("missing")

def test_open_again(self, db):
def test_open_opened(self, db):
ok = db.open(TEST_DIR)
assert ok

Expand Down Expand Up @@ -153,10 +153,39 @@ def test_sync(self, db):
ok = db.sync()
assert ok


class TestBitcaskReopen:
def test_open(self, db):
ok = db.open(TEST_DIR)
assert ok

def test_put(self, db, randomized):
for key, value in randomized.items():
ok = db.put(key, value)
assert ok

def test_get(self, db, randomized):
for key, expect in randomized.items():
value = db.get(key)
assert value == expect

def test_close(self, db):
ok = db.close()
assert ok

def test_reopen(self, db):
ok = db.open(TEST_DIR)
assert ok

def test_reread(self, db, randomized):
for key, expect in randomized.items():
value = db.get(key)
assert value == expect

def test_close_again(self, db):
ok = db.close()
assert ok


class TestInMemBitcask:
def test_open(self):
Expand Down

0 comments on commit 384ac0e

Please sign in to comment.