Skip to content

Commit

Permalink
fix: _read_hint to actually work on open after merge
Browse files Browse the repository at this point in the history
  • Loading branch information
eiri committed Mar 27, 2024
1 parent c11e7be commit e6e170e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 24 deletions.
42 changes: 22 additions & 20 deletions src/py_bitcask/bitcask.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import shutil
import uuid
from collections import namedtuple
from dataclasses import dataclass
from functools import reduce
from io import BytesIO
Expand Down Expand Up @@ -136,9 +137,9 @@ def _open_with_hints(self, hint_files) -> None:
Returns:
None
"""
for uid, hints in hint_files.items():
file_id = crc32(uid.encode("utf-8"))
file_name = os.path.join(self.__dirname, uid + ".db")
for file_stem, hints in hint_files.items():
file_id = crc32(file_stem.encode("utf-8"))
file_name = os.path.join(self.__dirname, file_stem + ".db")
current = open(file_name, "rb")
self.__datadir[file_id] = current
for hint in hints:
Expand All @@ -160,20 +161,19 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
"""
if self.__dirname == ":memory":
return
hint_files = {}
seen = {}
deleted = {}
KeyState = namedtuple("KeyState", "tstamp deleted file_id hint")
keys = {}
files = os.listdir(self.__dirname)
files.sort()
files.reverse()
for file in files:
file_id, ext = os.path.splitext(file)
# TODO: check if hint file is here and read it instead
if ext != ".db":
continue
file_name = os.path.join(self.__dirname, file)
if (
os.path.isfile(file_name)
and os.path.getsize(file_name) >= self.header_size
):
uid, _ = os.path.splitext(file)
# TODO: check if hint file is here and read it instead
current = open(file_name, "rb")
while current.tell() < os.path.getsize(file_name):
data = current.read(self.header_size)
Expand All @@ -183,16 +183,18 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
tstamp = uuid.UUID(int=int.from_bytes(ts_bytes, "big"))
key = current.read(key_sz)
value_pos = current.tell()
if value_sz == 0:
deleted[key] = True
continue
if key not in seen and key not in deleted:
seen[key] = True
if key not in keys or keys[key].tstamp < tstamp:
hint = Hint(tstamp, key_sz, value_sz, value_pos, key)
if uid not in hint_files:
hint_files[uid] = []
hint_files[uid].append(hint)
deleted = value_sz == 0
keys[key] = KeyState(tstamp, deleted, file_id, hint)
current.seek(value_sz, 1)
hint_files = {}
for key_state in keys.values():
if key_state.deleted:
continue
if key_state.file_id not in hint_files:
hint_files[key_state.file_id] = []
hint_files[key_state.file_id].append(key_state.hint)
return hint_files

def _reactivate(self) -> None:
Expand Down Expand Up @@ -394,8 +396,8 @@ def merge(self) -> bool:
merge_cask._reactivate()
# build and store hint fils for merged data files
hint_files = merge_cask._read_hints()
for uid, hints in hint_files.items():
hint_file_name = os.path.join(merge_dir, uid + ".hint")
for file_stem, hints in hint_files.items():
hint_file_name = os.path.join(merge_dir, file_stem + ".hint")
hint_file = open(hint_file_name, "a+b")
for hint in hints:
head = pack(
Expand Down
4 changes: 0 additions & 4 deletions tests/test_bitcask.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,16 +309,12 @@ def test_check_merge(self, db, randomized):
i += 1

def test_close(self, db):
keys = db.list_keys()
print(f"close: {len(keys)}")
ok = db.close()
assert ok

def test_reopen(self, db, test_dir):
ok = db.open(test_dir)
assert ok
keys = db.list_keys()
print(f"reopen: {len(keys)}")

def test_check_reopen(self, db, randomized):
keys = db.list_keys()
Expand Down

0 comments on commit e6e170e

Please sign in to comment.