Skip to content

Commit

Permalink
Add null byte literals to decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
kg583 committed Aug 31, 2024
1 parent ce53f3b commit 9043fbd
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 12 deletions.
25 changes: 19 additions & 6 deletions tivars/tokenizer/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ def decode(bytestream: bytes, *,
out = []
since = OsVersions.INITIAL

byte_attr = mode == "ti_ascii"

index = 0
curr_bytes = b''
while index < len(bytestream):
curr_bytes += bytestream[index:][:1]
curr_hex = curr_bytes.hex()

if curr_bytes[0]:
if curr_bytes in tokens.bytes:
Expand All @@ -51,25 +54,35 @@ def decode(bytestream: bytes, *,

elif len(curr_bytes) >= 2:
if not any(key.startswith(curr_bytes[:1]) for key in tokens.bytes):
warn(f"Unrecognized byte '{curr_bytes[0]:x}' at position {index}.",
warn(f"Unrecognized byte '0x{curr_hex}' at position {index}.",
BytesWarning)

out.append(b'?' if mode == "ti_ascii" else rf"\x{curr_bytes[0]:x}")
out.append(b'?' if byte_attr else rf"\x{curr_hex}")

else:
warn(f"Unrecognized bytes '0x{curr_bytes[0]:x}{curr_bytes[1]:x}' at position {index}.",
warn(f"Unrecognized bytes '0x{curr_hex}' at position {index}.",
BytesWarning)

out.append(b'?' if mode == "ti_ascii" else rf"\u{curr_bytes[0]:x}{curr_bytes[1]:x}")
out.append(b'?' if byte_attr else rf"\u{curr_hex}")

curr_bytes = b''

elif any(curr_bytes):
raise ValueError(f"unexpected null byte at position {index}")
count = 0
while not curr_bytes[0]:
curr_bytes = curr_bytes[1:]
count += 1
out.append(b'?' if byte_attr else r"\x00")

warn(f"There are {count} unexpected null bytes at position {index}." if count > 1 else
f"There is an unexpected null byte at position {index}.",
BytesWarning)

index -= 1

index += 1

return b''.join(out) if mode == "ti_ascii" else "".join(out), since
return b''.join(out) if byte_attr else "".join(out), since


__all__ = ["decode"]
15 changes: 9 additions & 6 deletions tivars/types/tokenized.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re

from io import BytesIO
from warnings import warn
from warnings import catch_warnings, simplefilter, warn

from tivars.data import *
from tivars.models import *
Expand Down Expand Up @@ -351,12 +351,15 @@ def string(self) -> str:
return string

def coerce(self):
try:
self.string()
doors = False
with catch_warnings():
simplefilter("error")

try:
self.string()
doors = False

except ValueError:
doors = True
except BytesWarning:
doors = True

doors &= b"\xEF\x68" in self.data and self.data.index(b"\xEF\x68") > 0

Expand Down

0 comments on commit 9043fbd

Please sign in to comment.