From 53eba54b794abd2c385619384b96b23abd83895e Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 18 Jun 2020 11:37:41 -0500 Subject: [PATCH] Fallback to reading streamer and raise better error messages on true failures. (#28) * Fallback to reading streamer and raise better error messages on true failures. * Pass context down to Cursor. * Black and flake8. * Passed context all the way down to Chunk. * This is a good error message. * Fallback for wrong streamers works. --- tests/test_0023-more-interpretations-1.py | 4 +- tests/test_0028-fallback-to-read-streamer.py | 21 +++ uproot4/compression.py | 14 +- uproot4/deserialization.py | 85 ++++++++--- uproot4/model.py | 68 +++++++-- uproot4/models/RNTuple.py | 2 +- uproot4/models/TArray.py | 4 +- uproot4/models/TAtt.py | 10 +- uproot4/models/TBasket.py | 16 +- uproot4/models/TBranch.py | 56 +++---- uproot4/models/TLeaf.py | 20 +-- uproot4/models/TList.py | 6 +- uproot4/models/TNamed.py | 4 +- uproot4/models/TObjArray.py | 8 +- uproot4/models/TObjString.py | 2 +- uproot4/models/TObject.py | 4 +- uproot4/models/TString.py | 2 +- uproot4/models/TTree.py | 20 +-- uproot4/reading.py | 145 ++++++++++++++----- uproot4/source/chunk.py | 31 ++-- uproot4/source/cursor.py | 49 ++++--- uproot4/streamers.py | 57 +++++--- 22 files changed, 413 insertions(+), 215 deletions(-) create mode 100644 tests/test_0028-fallback-to-read-streamer.py diff --git a/tests/test_0023-more-interpretations-1.py b/tests/test_0023-more-interpretations-1.py index 933b7bef3..70e9cd65d 100644 --- a/tests/test_0023-more-interpretations-1.py +++ b/tests/test_0023-more-interpretations-1.py @@ -109,9 +109,7 @@ def test_double32(): del uproot4.classes["TBranch"] del uproot4.classes["TBranchElement"] - with uproot4.open( - skhep_testdata.data_path("uproot-demo-double32.root"), - )["T"] as t: + with uproot4.open(skhep_testdata.data_path("uproot-demo-double32.root"))["T"] as t: print(t["fD64"].interpretation) print(t["fF32"].interpretation) diff --git a/tests/test_0028-fallback-to-read-streamer.py b/tests/test_0028-fallback-to-read-streamer.py new file mode 100644 index 000000000..b89b2b5c4 --- /dev/null +++ b/tests/test_0028-fallback-to-read-streamer.py @@ -0,0 +1,21 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import numpy +import pytest +import skhep_testdata + +import uproot4 + + +def test_fallback_reading(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + ) as f: + f["tree:evt/P3/P3.Py"] + assert f.file._streamers is None + + with uproot4.open(skhep_testdata.data_path("uproot-demo-double32.root")) as f: + f["T/fD64"] + assert f.file._streamers is not None diff --git a/uproot4/compression.py b/uproot4/compression.py index b2df5d8fa..7f1203eb6 100644 --- a/uproot4/compression.py +++ b/uproot4/compression.py @@ -156,24 +156,26 @@ def decompress(chunk, cursor, context, compressed_bytes, uncompressed_bytes): # https://github.com/root-project/root/blob/master/core/lzma/src/ZipLZMA.c#L81 # https://github.com/root-project/root/blob/master/core/lz4/src/ZipLZ4.cxx#L38 algo, method, c1, c2, c3, u1, u2, u3 = cursor.fields( - chunk, _decompress_header_format + chunk, _decompress_header_format, context ) block_compressed_bytes = c1 + (c2 << 8) + (c3 << 16) block_uncompressed_bytes = u1 + (u2 << 8) + (u3 << 16) if algo == b"ZL": cls = ZLIB - data = cursor.bytes(chunk, block_compressed_bytes) + data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"XZ": cls = LZMA - data = cursor.bytes(chunk, block_compressed_bytes) + data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"L4": cls = LZ4 block_compressed_bytes -= 8 - expected_checksum = cursor.field(chunk, _decompress_checksum_format) - data = cursor.bytes(chunk, block_compressed_bytes) + expected_checksum = cursor.field( + chunk, _decompress_checksum_format, context + ) + data = cursor.bytes(chunk, block_compressed_bytes, context) try: import xxhash except ImportError: @@ -197,7 +199,7 @@ def decompress(chunk, cursor, context, compressed_bytes, uncompressed_bytes): elif algo == b"ZS": cls = ZSTD - data = cursor.bytes(chunk, block_compressed_bytes) + data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"CS": raise ValueError( diff --git a/uproot4/deserialization.py b/uproot4/deserialization.py index 60eebf2ed..7e18c293a 100644 --- a/uproot4/deserialization.py +++ b/uproot4/deserialization.py @@ -51,12 +51,61 @@ def c(name, version=None): return out +class DeserializationError(Exception): + __slots__ = ["message", "context", "file_path"] + + def __init__(self, message, context, file_path): + self.message = message + self.context = context + self.file_path = file_path + + def __str__(self): + lines = [] + indent = " " + for obj in self.context.get("breadcrumbs", ()): + lines.append( + "{0}{1} version {2} as {3}.{4}".format( + indent, + obj.classname, + obj.instance_version, + type(obj).__module__, + type(obj).__name__, + ) + ) + indent = indent + " " + for v in getattr(obj, "_bases", []): + lines.append("{0}(base): {1}".format(indent, repr(v))) + for k, v in getattr(obj, "_members", {}).items(): + lines.append("{0}{1}: {2}".format(indent, k, repr(v))) + + in_parent = "" + if "TBranch" in self.context: + in_parent = "\nin TBranch {0}".format(self.context["TBranch"].object_path) + elif "TKey" in self.context: + in_parent = "\nin object {0}".format(self.context["TKey"].object_path) + + if len(lines) == 0: + return """{0} +in file {1}{2}""".format( + self.message, self.file_path, in_parent + ) + else: + return """while reading + +{0} + +{1} +in file {2}{3}""".format( + "\n".join(lines), self.message, self.file_path, in_parent + ) + + _numbytes_version_1 = struct.Struct(">IH") _numbytes_version_2 = struct.Struct(">H") -def numbytes_version(chunk, cursor, move=True): - num_bytes, version = cursor.fields(chunk, _numbytes_version_1, move=False) +def numbytes_version(chunk, cursor, context, move=True): + num_bytes, version = cursor.fields(chunk, _numbytes_version_1, context, move=False) num_bytes = numpy.int64(num_bytes) if num_bytes & uproot4.const.kByteCountMask: @@ -66,36 +115,34 @@ def numbytes_version(chunk, cursor, move=True): else: num_bytes = None - version = cursor.field(chunk, _numbytes_version_2, move=move) + version = cursor.field(chunk, _numbytes_version_2, context, move=move) return num_bytes, version -def numbytes_check(start_cursor, stop_cursor, num_bytes, classname, file_path): +def numbytes_check(start_cursor, stop_cursor, num_bytes, classname, context, file_path): if num_bytes is not None: observed = stop_cursor.displacement(start_cursor) if observed != num_bytes: - if file_path is None: - in_file = "" - else: - in_file = "\nin file {0}".format(file_path) - raise ValueError( - """instance of ROOT class {0} has {1} bytes; expected {2}{3}""".format( - classname, observed, num_bytes, in_file - ) + raise uproot4.deserialization.DeserializationError( + """expected {0} bytes but cursor moved by {1} bytes (through {2})""".format( + num_bytes, observed, classname + ), + context, + file_path, ) _map_string_string_format1 = struct.Struct(">I") -def map_string_string(chunk, cursor): +def map_string_string(chunk, cursor, context): cursor.skip(12) - size = cursor.field(chunk, _map_string_string_format1) + size = cursor.field(chunk, _map_string_string_format1, context) cursor.skip(6) - keys = [cursor.string(chunk) for i in range(size)] + keys = [cursor.string(chunk, context) for i in range(size)] cursor.skip(6) - values = [cursor.string(chunk) for i in range(size)] + values = [cursor.string(chunk, context) for i in range(size)] return dict(zip(keys, values)) @@ -111,7 +158,7 @@ def read_object_any(chunk, cursor, context, file, parent, as_class=None): # https://github.com/root-project/root/blob/c4aa801d24d0b1eeb6c1623fd18160ef2397ee54/io/io/src/TBufferFile.cxx#L2404 beg = cursor.displacement() - bcnt = numpy.int64(cursor.field(chunk, _read_object_any_format1)) + bcnt = numpy.int64(cursor.field(chunk, _read_object_any_format1, context)) if (bcnt & uproot4.const.kByteCountMask) == 0 or ( bcnt == uproot4.const.kNewClassTag @@ -123,7 +170,7 @@ def read_object_any(chunk, cursor, context, file, parent, as_class=None): else: vers = 1 start = cursor.displacement() - tag = numpy.int64(cursor.field(chunk, _read_object_any_format1)) + tag = numpy.int64(cursor.field(chunk, _read_object_any_format1, context)) bcnt = int(bcnt) if tag & uproot4.const.kClassMask == 0: @@ -146,7 +193,7 @@ def read_object_any(chunk, cursor, context, file, parent, as_class=None): elif tag == uproot4.const.kNewClassTag: # new class and object - classname = cursor.classname(chunk) + classname = cursor.classname(chunk, context) cls = file.class_named(classname) diff --git a/uproot4/model.py b/uproot4/model.py index b2682b91c..6755b78c3 100644 --- a/uproot4/model.py +++ b/uproot4/model.py @@ -10,6 +10,40 @@ import uproot4._util +bootstrap_classnames = [ + "TStreamerInfo", + "TStreamerElement", + "TStreamerArtificial", + "TStreamerBase", + "TStreamerBasicPointer", + "TStreamerBasicType", + "TStreamerLoop", + "TStreamerObject", + "TStreamerObjectAny", + "TStreamerObjectAnyPointer", + "TStreamerObjectPointer", + "TStreamerSTL", + "TStreamerSTLstring", + "TStreamerString", + "TList", + "TObjArray", + "TObjString", +] + + +def bootstrap_classes(): + import uproot4.streamers + import uproot4.models.TList + import uproot4.models.TObjArray + import uproot4.models.TObjString + + custom_classes = {} + for classname in bootstrap_classnames: + custom_classes[classname] = uproot4.classes[classname] + + return custom_classes + + class Model(object): @classmethod def read(cls, chunk, cursor, context, file, parent): @@ -22,6 +56,9 @@ def read(cls, chunk, cursor, context, file, parent): self._num_bytes = None self._instance_version = None + old_breadcrumbs = context.get("breadcrumbs", ()) + context["breadcrumbs"] = old_breadcrumbs + (self,) + self.hook_before_read(chunk=chunk, cursor=cursor, context=context) self.read_numbytes_version(chunk, cursor, context) @@ -36,7 +73,11 @@ def read(cls, chunk, cursor, context, file, parent): self.hook_before_postprocess(chunk=chunk, cursor=cursor, context=context) - return self.postprocess(chunk, cursor, context) + out = self.postprocess(chunk, cursor, context) + + context["breadcrumbs"] = old_breadcrumbs + + return out def __repr__(self): return "<{0} at 0x{1:012x}>".format( @@ -49,7 +90,7 @@ def read_numbytes_version(self, chunk, cursor, context): ( self._num_bytes, self._instance_version, - ) = uproot4.deserialization.numbytes_version(chunk, cursor) + ) = uproot4.deserialization.numbytes_version(chunk, cursor, context) def read_members(self, chunk, cursor, context): pass @@ -61,7 +102,8 @@ def check_numbytes(self, cursor, context): self._cursor, cursor, self._num_bytes, - classname_pretty(self.classname, self.class_version), + self.classname, + context, getattr(self._file, "file_path"), ) @@ -259,7 +301,7 @@ def read(cls, chunk, cursor, context, file, parent): import uproot4.deserialization num_bytes, version = uproot4.deserialization.numbytes_version( - chunk, cursor, move=False + chunk, cursor, context, move=False ) versioned_cls = cls.known_versions.get(version) @@ -399,11 +441,15 @@ def classname_pretty(classname, version): return "{0} (version {1})".format(classname, version) -def has_class_named(classname, version=None, classes=None): - if classes is None: - classes = uproot4.classes +def maybe_custom_classes(custom_classes): + if custom_classes is None: + return uproot4.classes + else: + return custom_classes - cls = classes.get(classname) + +def has_class_named(classname, version=None, custom_classes=None): + cls = maybe_custom_classes(custom_classes).get(classname) if cls is None: return False @@ -413,10 +459,10 @@ def has_class_named(classname, version=None, classes=None): return True -def class_named(classname, version=None, classes=None): - if classes is None: +def class_named(classname, version=None, custom_classes=None): + if custom_classes is None: classes = uproot4.classes - where = "the given 'classes' dict" + where = "the 'custom_classes' dict" else: where = "uproot4.classes" diff --git a/uproot4/models/RNTuple.py b/uproot4/models/RNTuple.py index a139242c5..81530f4b7 100644 --- a/uproot4/models/RNTuple.py +++ b/uproot4/models/RNTuple.py @@ -24,7 +24,7 @@ def read_members(self, chunk, cursor, context): self._members["fNBytesFooter"], self._members["fLenFooter"], self._members["fReserved"], - ) = cursor.fields(chunk, _rntuple_format1) + ) = cursor.fields(chunk, _rntuple_format1, context) uproot4.classes[ diff --git a/uproot4/models/TArray.py b/uproot4/models/TArray.py index 36f88b40a..15fe0d022 100644 --- a/uproot4/models/TArray.py +++ b/uproot4/models/TArray.py @@ -22,8 +22,8 @@ def read_numbytes_version(self, chunk, cursor, context): pass def read_members(self, chunk, cursor, context): - self._members["fN"] = cursor.field(chunk, _tarray_format1) - self._data = cursor.array(chunk, self._members["fN"], self.dtype) + self._members["fN"] = cursor.field(chunk, _tarray_format1, context) + self._data = cursor.array(chunk, self._members["fN"], self.dtype, context) def __array__(self): return self._data diff --git a/uproot4/models/TAtt.py b/uproot4/models/TAtt.py index b3fc7b85a..37402c8a0 100644 --- a/uproot4/models/TAtt.py +++ b/uproot4/models/TAtt.py @@ -17,7 +17,7 @@ def read_members(self, chunk, cursor, context): self._members["fLineColor"], self._members["fLineStyle"], self._members["fLineWidth"], - ) = cursor.fields(chunk, _tattline1_format1) + ) = cursor.fields(chunk, _tattline1_format1, context) base_names_versions = [] member_names = ["fLineColor", "fLineStyle", "fLineWidth"] @@ -32,7 +32,7 @@ def read_members(self, chunk, cursor, context): self._members["fLineColor"], self._members["fLineStyle"], self._members["fLineWidth"], - ) = cursor.fields(chunk, _tattline2_format1) + ) = cursor.fields(chunk, _tattline2_format1, context) base_names_versions = [] member_names = ["fLineColor", "fLineStyle", "fLineWidth"] @@ -48,7 +48,7 @@ def read_members(self, chunk, cursor, context): class Model_TAttFill_v1(uproot4.model.VersionedModel): def read_members(self, chunk, cursor, context): self._members["fFillColor"], self._members["fFillStyle"] = cursor.fields( - chunk, _tattfill1_format1 + chunk, _tattfill1_format1, context ) base_names_versions = [] @@ -61,7 +61,7 @@ def read_members(self, chunk, cursor, context): class Model_TAttFill_v2(uproot4.model.VersionedModel): def read_members(self, chunk, cursor, context): self._members["fFillColor"], self._members["fFillStyle"] = cursor.fields( - chunk, _tattfill2_format1 + chunk, _tattfill2_format1, context ) base_names_versions = [] @@ -80,7 +80,7 @@ def read_members(self, chunk, cursor, context): self._members["fMarkerColor"], self._members["fMarkerStyle"], self._members["fMarkerSize"], - ) = cursor.fields(chunk, _tattmarker2_format1) + ) = cursor.fields(chunk, _tattmarker2_format1, context) base_names_versions = [] member_names = ["fMarkerColor", "fMarkerStyle", "fMarkserSize"] diff --git a/uproot4/models/TBasket.py b/uproot4/models/TBasket.py index 045a1af8a..25391ffcb 100644 --- a/uproot4/models/TBasket.py +++ b/uproot4/models/TBasket.py @@ -38,7 +38,7 @@ def read_members(self, chunk, cursor, context): self._members["fDatime"], self._members["fKeylen"], self._members["fCycle"], - ) = cursor.fields(chunk, _tbasket_format1) + ) = cursor.fields(chunk, _tbasket_format1, context) # skip the class name, name, and title cursor.move_to( @@ -51,15 +51,15 @@ def read_members(self, chunk, cursor, context): self._members["fNevBufSize"], self._members["fNevBuf"], self._members["fLast"], - ) = cursor.fields(chunk, _tbasket_format2) + ) = cursor.fields(chunk, _tbasket_format2, context) cursor.skip(1) if self.is_embedded: if self._members["fNevBufSize"] > 8: - raw_byte_offsets = cursor.bytes(chunk, 8 + self.num_entries * 4).view( - _tbasket_offsets_dtype - ) + raw_byte_offsets = cursor.bytes( + chunk, 8 + self.num_entries * 4, context + ).view(_tbasket_offsets_dtype) cursor.skip(-4) # subtracting fKeylen makes a new buffer and converts to native endian @@ -74,17 +74,17 @@ def read_members(self, chunk, cursor, context): cursor.skip(self._members["fKeylen"]) self._raw_data = None - self._data = cursor.bytes(chunk, self.border, copy_if_memmap=True) + self._data = cursor.bytes(chunk, self.border, context, copy_if_memmap=True) else: if self.compressed_bytes != self.uncompressed_bytes: uncompressed = uproot4.compression.decompress( chunk, cursor, {}, self.compressed_bytes, self.uncompressed_bytes, ) - self._raw_data = uncompressed.get(0, self.uncompressed_bytes) + self._raw_data = uncompressed.get(0, self.uncompressed_bytes, context) else: self._raw_data = cursor.bytes( - chunk, self.uncompressed_bytes, copy_if_memmap=True + chunk, self.uncompressed_bytes, context, copy_if_memmap=True ) if self.border != self.uncompressed_bytes: diff --git a/uproot4/models/TBranch.py b/uproot4/models/TBranch.py index 676ec2d8c..7043a3044 100644 --- a/uproot4/models/TBranch.py +++ b/uproot4/models/TBranch.py @@ -43,7 +43,7 @@ def read_members(self, chunk, cursor, context): self._members["fEntries"], self._members["fTotBytes"], self._members["fZipBytes"], - ) = cursor.fields(chunk, _tbranch10_format1) + ) = cursor.fields(chunk, _tbranch10_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -52,7 +52,7 @@ def read_members(self, chunk, cursor, context): ) self._cursor_baskets = cursor.copy() if self._file.options["minimal_ttree_metadata"]: - cursor.skip_over(chunk) + cursor.skip_over(chunk, context) else: self._members["fBaskets"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self @@ -61,20 +61,20 @@ def read_members(self, chunk, cursor, context): if context.get("speedbump", True): cursor.skip(1) self._members["fBasketBytes"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch10_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fBasketEntry"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch10_dtype3 if context.get("speedbump", True): - if cursor.bytes(chunk, 1)[0] == 2: + if cursor.bytes(chunk, 1, context)[0] == 2: tmp = numpy.dtype(">i8") self._members["fBasketSeek"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) if self._file.options["minimal_ttree_metadata"]: cursor.skip_after(self) @@ -149,7 +149,7 @@ def read_members(self, chunk, cursor, context): self._members["fFirstEntry"], self._members["fTotBytes"], self._members["fZipBytes"], - ) = cursor.fields(chunk, _tbranch11_format1) + ) = cursor.fields(chunk, _tbranch11_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -158,7 +158,7 @@ def read_members(self, chunk, cursor, context): ) self._cursor_baskets = cursor.copy() if self._file.options["minimal_ttree_metadata"]: - cursor.skip_over(chunk) + cursor.skip_over(chunk, context) else: self._members["fBaskets"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self @@ -167,20 +167,20 @@ def read_members(self, chunk, cursor, context): if context.get("speedbump", True): cursor.skip(1) self._members["fBasketBytes"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch11_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fBasketEntry"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch11_dtype3 if context.get("speedbump", True): - if cursor.bytes(chunk, 1)[0] == 2: + if cursor.bytes(chunk, 1, context)[0] == 2: tmp = numpy.dtype(">i8") self._members["fBasketSeek"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) if self._file.options["minimal_ttree_metadata"]: cursor.skip_after(self) @@ -256,7 +256,7 @@ def read_members(self, chunk, cursor, context): self._members["fFirstEntry"], self._members["fTotBytes"], self._members["fZipBytes"], - ) = cursor.fields(chunk, _tbranch12_format1) + ) = cursor.fields(chunk, _tbranch12_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -265,7 +265,7 @@ def read_members(self, chunk, cursor, context): ) self._cursor_baskets = cursor.copy() if self._file.options["minimal_ttree_metadata"]: - cursor.skip_over(chunk) + cursor.skip_over(chunk, context) else: self._members["fBaskets"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self @@ -274,20 +274,20 @@ def read_members(self, chunk, cursor, context): if context.get("speedbump", True): cursor.skip(1) self._members["fBasketBytes"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch12_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fBasketEntry"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch12_dtype3 if context.get("speedbump", True): - if cursor.bytes(chunk, 1)[0] == 2: + if cursor.bytes(chunk, 1, context)[0] == 2: tmp = numpy.dtype(">i8") self._members["fBasketSeek"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) if self._file.options["minimal_ttree_metadata"]: cursor.skip_after(self) @@ -357,7 +357,7 @@ def read_members(self, chunk, cursor, context): self._members["fEntryOffsetLen"], self._members["fWriteBasket"], self._members["fEntryNumber"], - ) = cursor.fields(chunk, _tbranch13_format1) + ) = cursor.fields(chunk, _tbranch13_format1, context) self._members["fIOFeatures"] = self.class_named("ROOT::TIOFeatures").read( chunk, cursor, context, self._file, self ) @@ -369,7 +369,7 @@ def read_members(self, chunk, cursor, context): self._members["fFirstEntry"], self._members["fTotBytes"], self._members["fZipBytes"], - ) = cursor.fields(chunk, _tbranch13_format2) + ) = cursor.fields(chunk, _tbranch13_format2, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -378,7 +378,7 @@ def read_members(self, chunk, cursor, context): ) self._cursor_baskets = cursor.copy() if self._file.options["minimal_ttree_metadata"]: - cursor.skip_over(chunk) + cursor.skip_over(chunk, context) else: self._members["fBaskets"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self @@ -387,20 +387,20 @@ def read_members(self, chunk, cursor, context): if context.get("speedbump", True): cursor.skip(1) self._members["fBasketBytes"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch13_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fBasketEntry"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) tmp = _tbranch13_dtype3 if context.get("speedbump", True): - if cursor.bytes(chunk, 1)[0] == 2: + if cursor.bytes(chunk, 1, context)[0] == 2: tmp = numpy.dtype(">i8") self._members["fBasketSeek"] = cursor.array( - chunk, self.member("fMaxBaskets"), tmp + chunk, self.member("fMaxBaskets"), tmp, context ) if self._file.options["minimal_ttree_metadata"]: cursor.skip_after(self) @@ -482,7 +482,7 @@ def read_members(self, chunk, cursor, context): self._members["fType"], self._members["fStreamerType"], self._members["fMaximum"], - ) = cursor.fields(chunk, _tbranchelement8_format1) + ) = cursor.fields(chunk, _tbranchelement8_format1, context) self._members["fBranchCount"] = uproot4.deserialization.read_object_any( chunk, cursor, context, self._file, self._parent ) @@ -538,7 +538,7 @@ def read_members(self, chunk, cursor, context): self._members["fType"], self._members["fStreamerType"], self._members["fMaximum"], - ) = cursor.fields(chunk, _tbranchelement9_format1) + ) = cursor.fields(chunk, _tbranchelement9_format1, context) self._members["fBranchCount"] = uproot4.deserialization.read_object_any( chunk, cursor, context, self._file, self._parent ) @@ -594,7 +594,7 @@ def read_members(self, chunk, cursor, context): self._members["fType"], self._members["fStreamerType"], self._members["fMaximum"], - ) = cursor.fields(chunk, _tbranchelement10_format1) + ) = cursor.fields(chunk, _tbranchelement10_format1, context) self._members["fBranchCount"] = uproot4.deserialization.read_object_any( chunk, cursor, context, self._file, self._parent ) diff --git a/uproot4/models/TLeaf.py b/uproot4/models/TLeaf.py index e51baeb36..509e768e9 100644 --- a/uproot4/models/TLeaf.py +++ b/uproot4/models/TLeaf.py @@ -24,7 +24,7 @@ def read_members(self, chunk, cursor, context): self._members["fOffset"], self._members["fIsRange"], self._members["fIsUnsigned"], - ) = cursor.fields(chunk, _tleaf2_format0) + ) = cursor.fields(chunk, _tleaf2_format0, context) self._members["fLeafCount"] = uproot4.deserialization.read_object_any( chunk, cursor, context, self._file, self._parent ) @@ -58,7 +58,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafb1_format1 + chunk, _tleafb1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -83,7 +83,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafc1_format1 + chunk, _tleafc1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -108,7 +108,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafd1_format1 + chunk, _tleafd1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -133,7 +133,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleaff1_format1 + chunk, _tleaff1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -158,7 +158,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafi1_format1 + chunk, _tleafi1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -183,7 +183,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafl1_format0 + chunk, _tleafl1_format0, context ) base_names_versions = [("TLeaf", 2)] @@ -208,7 +208,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafO1_format1 + chunk, _tleafO1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -233,7 +233,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fMinimum"], self._members["fMaximum"] = cursor.fields( - chunk, _tleafs1_format1 + chunk, _tleafs1_format1, context ) base_names_versions = [("TLeaf", 2)] @@ -258,7 +258,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fID"], self._members["fType"] = cursor.fields( - chunk, _tleafelement1_format1 + chunk, _tleafelement1_format1, context ) base_names_versions = [("TLeaf", 2)] diff --git a/uproot4/models/TList.py b/uproot4/models/TList.py index 2bf10f962..9b56bc615 100644 --- a/uproot4/models/TList.py +++ b/uproot4/models/TList.py @@ -26,8 +26,8 @@ def read_members(self, chunk, cursor, context): ) ) - self._members["fName"] = cursor.string(chunk) - self._members["fSize"] = cursor.field(chunk, _tlist_format1) + self._members["fName"] = cursor.string(chunk, context) + self._members["fSize"] = cursor.field(chunk, _tlist_format1, context) self._data = [] for i in range(self._members["fSize"]): @@ -37,7 +37,7 @@ def read_members(self, chunk, cursor, context): self._data.append(item) # ignore "option" - n = cursor.field(chunk, _tlist_format2) + n = cursor.field(chunk, _tlist_format2, context) cursor.skip(n) def __getitem__(self, where): diff --git a/uproot4/models/TNamed.py b/uproot4/models/TNamed.py index 8435d51de..f0ee259ed 100644 --- a/uproot4/models/TNamed.py +++ b/uproot4/models/TNamed.py @@ -14,8 +14,8 @@ def read_members(self, chunk, cursor, context): ) ) - self._members["fName"] = cursor.string(chunk) - self._members["fTitle"] = cursor.string(chunk) + self._members["fName"] = cursor.string(chunk, context) + self._members["fTitle"] = cursor.string(chunk, context) uproot4.classes["TNamed"] = Model_TNamed diff --git a/uproot4/models/TObjArray.py b/uproot4/models/TObjArray.py index 454cd7e76..1cacc13c4 100644 --- a/uproot4/models/TObjArray.py +++ b/uproot4/models/TObjArray.py @@ -26,9 +26,9 @@ def read_members(self, chunk, cursor, context): ) ) - self._members["fName"] = cursor.string(chunk) + self._members["fName"] = cursor.string(chunk, context) self._members["fSize"], self._members["fLowerBound"] = cursor.fields( - chunk, _tobjarray_format1 + chunk, _tobjarray_format1, context ) self._data = [] @@ -63,9 +63,9 @@ def read_members(self, chunk, cursor, context): ) ) - self._members["fName"] = cursor.string(chunk) + self._members["fName"] = cursor.string(chunk, context) self._members["fSize"], self._members["fLowerBound"] = cursor.fields( - chunk, _tobjarray_format1 + chunk, _tobjarray_format1, context ) self._data = [] diff --git a/uproot4/models/TObjString.py b/uproot4/models/TObjString.py index 77af5dcb2..38a257aba 100644 --- a/uproot4/models/TObjString.py +++ b/uproot4/models/TObjString.py @@ -13,7 +13,7 @@ def read_members(self, chunk, cursor, context): chunk, cursor, context, self._file, self._parent ) ) - self._data = cursor.string(chunk) + self._data = cursor.string(chunk, context) def postprocess(self, chunk, cursor, context): out = Model_TObjString(self._data) diff --git a/uproot4/models/TObject.py b/uproot4/models/TObject.py index 5192d5e7a..fd08da27e 100644 --- a/uproot4/models/TObject.py +++ b/uproot4/models/TObject.py @@ -19,11 +19,11 @@ def read_numbytes_version(self, chunk, cursor, context): pass def read_members(self, chunk, cursor, context): - self._instance_version = cursor.field(chunk, _tobject_format1) + self._instance_version = cursor.field(chunk, _tobject_format1, context) if numpy.int64(self._instance_version) & uproot4.const.kByteCountVMask: cursor.skip(4) self._members["fUniqueID"], self._members["fBits"] = cursor.fields( - chunk, _tobject_format2 + chunk, _tobject_format2, context ) self._members["fBits"] = ( numpy.uint32(self._members["fBits"]) | uproot4.const.kIsOnHeap diff --git a/uproot4/models/TString.py b/uproot4/models/TString.py index 0fecb43fd..d78c0a003 100644 --- a/uproot4/models/TString.py +++ b/uproot4/models/TString.py @@ -10,7 +10,7 @@ def read_numbytes_version(self, chunk, cursor, context): pass def read_members(self, chunk, cursor, context): - self._data = cursor.string(chunk) + self._data = cursor.string(chunk, context) def postprocess(self, chunk, cursor, context): out = Model_TString(self._data) diff --git a/uproot4/models/TTree.py b/uproot4/models/TTree.py index 9b7961b17..9a937af39 100644 --- a/uproot4/models/TTree.py +++ b/uproot4/models/TTree.py @@ -50,7 +50,7 @@ def read_members(self, chunk, cursor, context): self._members["fMaxVirtualSize"], self._members["fAutoSave"], self._members["fEstimate"], - ) = cursor.fields(chunk, _ttree16_format1) + ) = cursor.fields(chunk, _ttree16_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -167,7 +167,7 @@ def read_members(self, chunk, cursor, context): self._members["fMaxVirtualSize"], self._members["fAutoSave"], self._members["fEstimate"], - ) = cursor.fields(chunk, _ttree17_format1) + ) = cursor.fields(chunk, _ttree17_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -286,7 +286,7 @@ def read_members(self, chunk, cursor, context): self._members["fAutoSave"], self._members["fAutoFlush"], self._members["fEstimate"], - ) = cursor.fields(chunk, _ttree18_format1) + ) = cursor.fields(chunk, _ttree18_format1, context) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self ) @@ -410,18 +410,18 @@ def read_members(self, chunk, cursor, context): self._members["fAutoSave"], self._members["fAutoFlush"], self._members["fEstimate"], - ) = cursor.fields(chunk, _ttree19_format1) + ) = cursor.fields(chunk, _ttree19_format1, context) tmp = _ttree19_dtype1 if context.get("speedbump", True): cursor.skip(1) self._members["fClusterRangeEnd"] = cursor.array( - chunk, self.member("fNClusterRange"), tmp + chunk, self.member("fNClusterRange"), tmp, context ) tmp = _ttree19_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fClusterSize"] = cursor.array( - chunk, self.member("fNClusterRange"), tmp + chunk, self.member("fNClusterRange"), tmp, context ) self._members["fBranches"] = self.class_named("TObjArray").read( chunk, cursor, context, self._file, self @@ -549,18 +549,18 @@ def read_members(self, chunk, cursor, context): self._members["fAutoSave"], self._members["fAutoFlush"], self._members["fEstimate"], - ) = cursor.fields(chunk, _ttree20_format1) + ) = cursor.fields(chunk, _ttree20_format1, context) tmp = _ttree20_dtype1 if context.get("speedbump", True): cursor.skip(1) self._members["fClusterRangeEnd"] = cursor.array( - chunk, self.member("fNClusterRange"), tmp + chunk, self.member("fNClusterRange"), tmp, context ) tmp = _ttree20_dtype2 if context.get("speedbump", True): cursor.skip(1) self._members["fClusterSize"] = cursor.array( - chunk, self.member("fNClusterRange"), tmp + chunk, self.member("fNClusterRange"), tmp, context ) self._members["fIOFeatures"] = self.class_named("ROOT::TIOFeatures").read( chunk, cursor, context, self._file, self @@ -663,7 +663,7 @@ class Model_TTree(uproot4.model.DispatchByVersion): class Model_ROOT_3a3a_TIOFeatures(uproot4.model.Model): def read_members(self, chunk, cursor, context): cursor.skip(4) - self._members["fIOBits"] = cursor.field(chunk, _tiofeatures_format1) + self._members["fIOBits"] = cursor.field(chunk, _tiofeatures_format1, context) uproot4.classes["TTree"] = Model_TTree diff --git a/uproot4/reading.py b/uproot4/reading.py index f61c87178..9df473f03 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -32,9 +32,7 @@ from uproot4._util import no_filter -def open( - path, object_cache=100, array_cache="100 MB", classes=uproot4.classes, **options -): +def open(path, object_cache=100, array_cache="100 MB", custom_classes=None, **options): """ Args: path (str or Path): Path or URL to open, which may include a colon @@ -47,9 +45,9 @@ def open( array_cache (None, MutableMapping, or memory size): Cache of arrays drawn from TTrees; if None, do not use a cache; if a memory size, create a new cache of this size. - classes (None or MutableMapping): If None, defaults to uproot4.classes; - otherwise, a container of class definitions that is both used to - fill with new classes and search for dependencies. + custom_classes (None or MutableMapping): If None, classes come from + uproot4.classes; otherwise, a container of class definitions that + is both used to fill with new classes and search for dependencies. options: see below. Opens a ROOT file, possibly through a remote protocol. @@ -74,7 +72,7 @@ def open( file_path, object_cache=object_cache, array_cache=array_cache, - classes=classes, + custom_classes=custom_classes, **options ) @@ -108,13 +106,13 @@ def __init__( file_path, object_cache=100, array_cache="100 MB", - classes=uproot4.classes, + custom_classes=None, **options ): self._file_path = file_path self.object_cache = object_cache self.array_cache = array_cache - self.classes = classes + self.custom_classes = custom_classes self._options = dict(open.defaults) self._options.update(options) @@ -163,7 +161,7 @@ def __init__( self._fUUID_version, self._fUUID, ) = uproot4.source.cursor.Cursor(0).fields( - self._begin_chunk, _file_header_fields_small + self._begin_chunk, _file_header_fields_small, {} ) if self.is_64bit: @@ -183,7 +181,7 @@ def __init__( self._fUUID_version, self._fUUID, ) = uproot4.source.cursor.Cursor(0).fields( - self._begin_chunk, _file_header_fields_big + self._begin_chunk, _file_header_fields_big, {} ) self.hook_after_read(magic=magic) @@ -258,17 +256,21 @@ def array_cache(self, value): ) @property - def classes(self): - return self._classes + def custom_classes(self): + return self._custom_classes - @classes.setter - def classes(self, value): - if value is None: - self._classes = uproot4.classes - elif isinstance(value, MutableMapping): - self._classes = value + @custom_classes.setter + def custom_classes(self, value): + if value is None or isinstance(value, MutableMapping): + self._custom_classes = value else: - raise TypeError("classes must be None or a MutableMapping") + raise TypeError("custom_classes must be None or a MutableMapping") + + def remove_class(self, classname): + if self._custom_classes is None: + self._custom_classes = dict(uproot4.classes) + if classname in self._custom_classes: + del self._custom_classes[classname] @property def options(self): @@ -301,10 +303,19 @@ def root_directory(self): return ReadOnlyDirectory( (), uproot4.source.cursor.Cursor(self._fBEGIN + self._fNbytesName), + {}, self, self, ) + def is_custom_class(self, classname): + if self._custom_classes is None: + return False + else: + mine = self._custom_classes.get(classname) + theirs = uproot4.classes.get(classname) + return mine is not None and mine is not theirs + @property def streamers(self): import uproot4.streamers @@ -349,7 +360,8 @@ def streamers(self): streamer_chunk=streamer_chunk, ) - tlist = self._classes["TList"].read( + classes = uproot4.model.maybe_custom_classes(self._custom_classes) + tlist = classes["TList"].read( streamer_chunk, streamer_cursor, {}, self, self ) @@ -440,7 +452,8 @@ def streamers_named(self, classname): return list(self.streamers[classname].values()) def class_named(self, classname, version=None): - cls = self._classes.get(classname) + classes = uproot4.model.maybe_custom_classes(self._custom_classes) + cls = classes.get(classname) if cls is None: streamers = self.streamers_named(classname) @@ -462,7 +475,7 @@ def class_named(self, classname, version=None): (uproot4.model.DispatchByVersion,), {"known_versions": {}}, ) - self._classes[classname] = cls + classes[classname] = cls if version is not None and issubclass(cls, uproot4.model.DispatchByVersion): if not uproot4._util.isint(version): @@ -610,7 +623,7 @@ def __init__(self, chunk, cursor, context, file, parent, read_strings=False): self._fCycle, self._fSeekKey, self._fSeekPdir, - ) = cursor.fields(chunk, self._format_small, move=False) + ) = cursor.fields(chunk, self._format_small, context, move=False) if self.is_64bit: ( @@ -622,7 +635,7 @@ def __init__(self, chunk, cursor, context, file, parent, read_strings=False): self._fCycle, self._fSeekKey, self._fSeekPdir, - ) = cursor.fields(chunk, self._format_big) + ) = cursor.fields(chunk, self._format_big, context) else: cursor.skip(self._format_small.size) @@ -637,9 +650,9 @@ def __init__(self, chunk, cursor, context, file, parent, read_strings=False): read_strings=read_strings, ) - self._fClassName = cursor.string(chunk) - self._fName = cursor.string(chunk) - self._fTitle = cursor.string(chunk) + self._fClassName = cursor.string(chunk, context) + self._fName = cursor.string(chunk, context) + self._fTitle = cursor.string(chunk, context) else: self._fClassName = None @@ -778,7 +791,8 @@ def get_uncompressed_chunk_cursor(self): ) else: uncompressed_chunk = uproot4.source.chunk.Chunk.wrap( - chunk.source, chunk.get(data_start, data_stop) + chunk.source, + chunk.get(data_start, data_stop, {"breadcrumbs": (), "TKey": self}), ) return uncompressed_chunk, cursor @@ -789,7 +803,10 @@ def cache_key(self): @property def object_path(self): - return "" + if isinstance(self._parent, ReadOnlyDirectory): + return self._parent.object_path + self.name(False) + else: + return "(seek pos {0})/{1}".format(self.data_cursor.index, self.name(False)) def get(self): if self._file.object_cache is not None: @@ -805,13 +822,43 @@ def get(self): "TDirectoryFile", ): out = ReadOnlyDirectory( - self._parent.path + (self.fName,), self.data_cursor, self._file, self, + self._parent.path + (self.fName,), + self.data_cursor, + {}, + self._file, + self, ) else: chunk, cursor = self.get_uncompressed_chunk_cursor() + start_cursor = cursor.copy() cls = self._file.class_named(self._fClassName) - out = cls.read(chunk, cursor, {}, self._file, self) + context = {"breadcrumbs": (), "TKey": self} + + try: + out = cls.read(chunk, cursor, context, self._file, self) + + except uproot4.deserialization.DeserializationError: + breadcrumbs = context.get("breadcrumbs") + if breadcrumbs is None or all( + breadcrumb_cls.classname in uproot4.model.bootstrap_classnames + or self._file.is_custom_class(breadcrumb_cls.classname) + for breadcrumb_cls in breadcrumbs + ): + # we're already using the most specialized versions of each class + raise + + for breadcrumb_cls in breadcrumbs: + if ( + breadcrumb_cls.classname + not in uproot4.model.bootstrap_classnames + ): + self._file.remove_class(breadcrumb_cls.classname) + + cursor = start_cursor + cls = self._file.class_named(self._fClassName) + context = {"breadcrumbs": (), "TKey": self} + out = cls.read(chunk, cursor, context, self._file, self) if self._file.object_cache is not None: self._file.object_cache[self.cache_key] = out @@ -823,7 +870,7 @@ class ReadOnlyDirectory(Mapping): _format_big = struct.Struct(">hIIiiqqq") _format_num_keys = struct.Struct(">i") - def __init__(self, path, cursor, file, parent): + def __init__(self, path, cursor, context, file, parent): self._path = path self._cursor = cursor.copy() self._file = file @@ -846,7 +893,7 @@ def __init__(self, path, cursor, file, parent): self._fSeekDir, self._fSeekParent, self._fSeekKeys, - ) = cursor.fields(chunk, self._format_small, move=False) + ) = cursor.fields(chunk, self._format_small, context, move=False) if self.is_64bit: ( @@ -858,7 +905,7 @@ def __init__(self, path, cursor, file, parent): self._fSeekDir, self._fSeekParent, self._fSeekKeys, - ) = cursor.fields(chunk, self._format_big) + ) = cursor.fields(chunk, self._format_big, context) else: cursor.skip(self._format_small.size) @@ -892,7 +939,7 @@ def __init__(self, path, cursor, file, parent): keys_chunk, keys_cursor, {}, file, self, read_strings=True ) - num_keys = keys_cursor.field(keys_chunk, self._format_num_keys) + num_keys = keys_cursor.field(keys_chunk, self._format_num_keys, context) self.hook_before_keys( path=path, @@ -1038,11 +1085,11 @@ def show_streamers(self, classname=None, stream=sys.stdout): @property def cache_key(self): - return self.file.hex_uuid + ":" + "/".join(self.path) + "/" + return self.file.hex_uuid + ":" + self.object_path @property def object_path(self): - return "/".join(self.path) + "/" + return "/".join(("",) + self._path + ("",)).replace("//", "/") @property def object_cache(self): @@ -1240,15 +1287,32 @@ def _ipython_key_completions_(self): return self.iterkeys() def __getitem__(self, where): - if "/" in where: + if "/" in where or ":" in where: items = where.split("/") step = self + for i, item in enumerate(items): if item != "": if isinstance(step, ReadOnlyDirectory): - step = step = step[item] + if ":" in item and item not in step: + index = item.index(":") + head, tail = item[:index], item[index + 1 :] + step = step[head] + if isinstance(step, uproot4.behaviors.TBranch.HasBranches): + return step["/".join([tail] + items[i + 1 :])] + else: + raise uproot4.KeyInFileError( + where, + self._file.file_path, + because=repr(head) + + " is not a TDirectory, TTree, or TBranch", + ) + else: + step = step[item] + elif isinstance(step, uproot4.behaviors.TBranch.HasBranches): return step["/".join(items[i:])] + else: raise uproot4.KeyInFileError( where, @@ -1256,6 +1320,7 @@ def __getitem__(self, where): because=repr(item) + " is not a TDirectory, TTree, or TBranch", ) + return step else: diff --git a/uproot4/source/chunk.py b/uproot4/source/chunk.py index 268e7ff1d..1e5475276 100644 --- a/uproot4/source/chunk.py +++ b/uproot4/source/chunk.py @@ -12,6 +12,7 @@ import numpy +import uproot4.deserialization import uproot4.source.futures import uproot4.source.cursor @@ -339,13 +340,14 @@ def raw_data(self): self.wait() return self._raw_data - def get(self, start, stop): + def get(self, start, stop, context): """ Args: start (int): Starting byte position to extract (inclusive, global in Source). stop (int): Stopping byte position to extract (exclusive, global in Source). + context (dict): Information about the current state of deserialization. Returns a subinterval of the `raw_data` using global coordinates as a NumPy array with dtype uint8. @@ -362,22 +364,24 @@ def get(self, start, stop): return self._raw_data[local_start:local_stop] elif self._exact: - raise OSError( + raise uproot4.deserialization.DeserializationError( """attempting to get bytes {0}:{1} - outside expected range {2}:{3} for this Chunk -of file path {4}""".format( - start, stop, self._start, self._stop, self._source.file_path, - ) +outside expected range {2}:{3} for this Chunk""".format( + start, stop, self._start, self._stop + ), + context, + self._source.file_path, ) else: raise RefineChunk(start, stop, self._start, self._stop) - def remainder(self, start): + def remainder(self, start, context): """ Args: start (int): Starting byte position to extract (inclusive, global in Source). + context (dict): Information about the current state of deserialization. Returns a subinterval of the `raw_data` from `start` to the end of the Chunk as a NumPy array with dtype uint8. @@ -393,10 +397,11 @@ def remainder(self, start): return self._raw_data[local_start:] else: - raise OSError( - """attempting to get byte {0} - outside expected range {1}:{2} for this Chunk -of file path {3}""".format( - start, self._start, self._stop, self._source.file_path, - ) + raise uproot4.deserialization.DeserializationError( + """attempting to get bytes after {0} +outside expected range {1}:{2} for this Chunk""".format( + start, self._start, self._stop + ), + context, + self._source.file_path, ) diff --git a/uproot4/source/cursor.py b/uproot4/source/cursor.py index 5f592dbfd..674134be1 100644 --- a/uproot4/source/cursor.py +++ b/uproot4/source/cursor.py @@ -130,13 +130,13 @@ def skip_after(self, obj): ) self._index = start_cursor.index + num_bytes - def skip_over(self, chunk): + def skip_over(self, chunk, context): """ Move the index after serialized data for an object with numbytes_version. """ num_bytes, version = uproot4.deserialization.numbytes_version( - chunk, self, move=False + chunk, self, context, move=False ) if num_bytes is None: raise TypeError( @@ -145,7 +145,7 @@ def skip_over(self, chunk): ) self._index += num_bytes - def fields(self, chunk, format, move=True): + def fields(self, chunk, format, context, move=True): """ Interpret data at this index of the Chunk with a `struct.Struct` format. Returns a tuple (length determined by `format`). @@ -156,9 +156,9 @@ def fields(self, chunk, format, move=True): stop = start + format.size if move: self._index = stop - return format.unpack(chunk.get(start, stop)) + return format.unpack(chunk.get(start, stop, context)) - def field(self, chunk, format, move=True): + def field(self, chunk, format, context, move=True): """ Interpret data at this index of the Chunk with a `struct.Struct` format, returning a single item instead of a tuple (the first). @@ -169,9 +169,9 @@ def field(self, chunk, format, move=True): stop = start + format.size if move: self._index = stop - return format.unpack(chunk.get(start, stop))[0] + return format.unpack(chunk.get(start, stop, context))[0] - def bytes(self, chunk, length, move=True, copy_if_memmap=False): + def bytes(self, chunk, length, context, move=True, copy_if_memmap=False): """ Interpret data at this index of the Chunk as raw bytes with a given `length`. @@ -184,7 +184,7 @@ def bytes(self, chunk, length, move=True, copy_if_memmap=False): stop = start + length if move: self._index = stop - out = chunk.get(start, stop) + out = chunk.get(start, stop, context) if copy_if_memmap: step = out while getattr(step, "base", None) is not None: @@ -193,7 +193,7 @@ def bytes(self, chunk, length, move=True, copy_if_memmap=False): step = step.base return out - def array(self, chunk, length, dtype, move=True): + def array(self, chunk, length, dtype, context, move=True): """ Interpret data at this index of the Chunk as an array with a given `length` and `dtype`. @@ -204,12 +204,12 @@ def array(self, chunk, length, dtype, move=True): stop = start + length * dtype.itemsize if move: self._index = stop - return numpy.frombuffer(chunk.get(start, stop), dtype=dtype) + return numpy.frombuffer(chunk.get(start, stop, context), dtype=dtype) _u1 = numpy.dtype("u1") _i4 = numpy.dtype(">i4") - def bytestring(self, chunk, move=True): + def bytestring(self, chunk, context, move=True): """ Interpret data at this index of the Chunk as a ROOT bytestring (first 1 or 5 bytes indicate size). @@ -218,20 +218,19 @@ def bytestring(self, chunk, move=True): """ start = self._index stop = start + 1 - length = chunk.get(start, stop)[0] + length = chunk.get(start, stop, context)[0] if length == 255: start = stop stop = start + 4 - length = numpy.frombuffer(chunk.get(start, stop), dtype=self._u1).view( - self._i4 - )[0] + length_data = chunk.get(start, stop, context) + length = numpy.frombuffer(length_data, dtype=self._u1).view(self._i4)[0] start = stop stop = start + length if move: self._index = stop - return chunk.get(start, stop).tostring() + return chunk.get(start, stop, context).tostring() - def string(self, chunk, move=True): + def string(self, chunk, context, move=True): """ Interpret data at this index of the Chunk as a Python str (first 1 or 5 bytes indicate size). @@ -240,13 +239,13 @@ def string(self, chunk, move=True): If `move` is False, only peek: don't update the index. """ - out = self.bytestring(chunk, move=move) + out = self.bytestring(chunk, context, move=move) if uproot4._util.py2: return out else: return out.decode(errors="surrogateescape") - def classname(self, chunk, move=True): + def classname(self, chunk, context, move=True): """ Interpret data at this index of the Chunk as a ROOT class name, which is the only usage of null-terminated strings (rather than @@ -256,7 +255,7 @@ def classname(self, chunk, move=True): If `move` is False, only peek: don't update the index. """ - remainder = chunk.remainder(self._index) + remainder = chunk.remainder(self._index, context) local_stop = 0 char = None while char != 0: @@ -285,7 +284,13 @@ def classname(self, chunk, move=True): ) def debug( - self, chunk, limit_bytes=None, dtype=None, offset=0, stream=sys.stdout, + self, + chunk, + context={}, + limit_bytes=None, + dtype=None, + offset=0, + stream=sys.stdout, ): """ Args: @@ -316,7 +321,7 @@ def debug( --- --- --- C J --- --- C --- --- --- { { 101.0 202.0 303.0 """ - data = chunk.remainder(self._index) + data = chunk.remainder(self._index, context) if limit_bytes is not None: data = data[:limit_bytes] diff --git a/uproot4/streamers.py b/uproot4/streamers.py index 6eaa20577..078f3f24d 100644 --- a/uproot4/streamers.py +++ b/uproot4/streamers.py @@ -135,7 +135,7 @@ def read_members(self, chunk, cursor, context): ) self._members["fCheckSum"], self._members["fClassVersion"] = cursor.fields( - chunk, _tstreamerinfo_format1 + chunk, _tstreamerinfo_format1, context ) self._members["fElements"] = uproot4.deserialization.read_object_any( @@ -190,8 +190,9 @@ def show(self, stream=sys.stdout): def new_class(self, file): class_code = self.class_code() class_name = uproot4.model.classname_encode(self.name, self.class_version) + classes = uproot4.model.maybe_custom_classes(file.custom_classes) return uproot4.deserialization.compile_class( - file, file.classes, class_code, class_name + file, classes, class_code, class_name ) def class_code(self): @@ -289,19 +290,19 @@ def read_members(self, chunk, cursor, context): self._members["fSize"], self._members["fArrayLength"], self._members["fArrayDim"], - ) = cursor.fields(chunk, _tstreamerelement_format1) + ) = cursor.fields(chunk, _tstreamerelement_format1, context) if self._instance_version == 1: - n = cursor.field(chunk, _tstreamerelement_format2) + n = cursor.field(chunk, _tstreamerelement_format2, context) self._members["fMaxIndex"] = cursor.array( - chunk, n, _tstreamerelement_dtype1 + chunk, n, _tstreamerelement_dtype1, context ) else: self._members["fMaxIndex"] = cursor.array( - chunk, 5, _tstreamerelement_dtype1 + chunk, 5, _tstreamerelement_dtype1, context ) - self._members["fTypeName"] = _canonical_typename(cursor.string(chunk)) + self._members["fTypeName"] = _canonical_typename(cursor.string(chunk, context)) if self._members["fType"] == 11 and self._members["fTypeName"] in ( "Bool_t" or "bool" @@ -394,7 +395,9 @@ def read_members(self, chunk, cursor, context): ) ) if self._instance_version >= 2: - self._members["fBaseVersion"] = cursor.field(chunk, _tstreamerbase_format1) + self._members["fBaseVersion"] = cursor.field( + chunk, _tstreamerbase_format1, context + ) @property def base_version(self): @@ -449,10 +452,10 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fCountVersion"] = cursor.field( - chunk, _tstreamerbasicpointer_format1 + chunk, _tstreamerbasicpointer_format1, context ) - self._members["fCountName"] = cursor.string(chunk) - self._members["fCountClass"] = cursor.string(chunk) + self._members["fCountName"] = cursor.string(chunk, context) + self._members["fCountClass"] = cursor.string(chunk, context) @property def count_name(self): @@ -474,13 +477,15 @@ def class_code( read_members.append(" tmp = self._dtype{0}".format(len(dtypes))) if streamerinfo.name == "TBranch" and self.name == "fBasketSeek": read_members.append(" if context.get('speedbump', True):") - read_members.append(" if cursor.bytes(chunk, 1)[0] == 2:") + read_members.append( + " if cursor.bytes(chunk, 1, context)[0] == 2:" + ) read_members.append(" tmp = numpy.dtype('>i8')") else: read_members.append(" if context.get('speedbump', True):") read_members.append(" cursor.skip(1)") read_members.append( - " self._members[{0}] = cursor.array(chunk, self.member({1}), tmp)".format( + " self._members[{0}] = cursor.array(chunk, self.member({1}), tmp, context)".format( repr(self.name), repr(self.count_name) ) ) @@ -592,11 +597,13 @@ def class_code( if len(fields[-1]) == 1: read_members.append( " self._members['{0}'] = cursor.field(chunk, " - "self._format{1})".format(fields[-1][0], len(formats) - 1) + "self._format{1}, context)".format( + fields[-1][0], len(formats) - 1 + ) ) else: read_members.append( - " {0} = cursor.fields(chunk, self._format{1})".format( + " {0} = cursor.fields(chunk, self._format{1}, context)".format( ", ".join( "self._members[{0}]".format(repr(x)) for x in fields[-1] ), @@ -607,7 +614,7 @@ def class_code( else: read_members.append( " self._members[{0}] = cursor.array(chunk, {1}, " - "self._dtype{2})".format( + "self._dtype{2}, context)".format( repr(self.name), self.array_length, len(dtypes) ) ) @@ -626,9 +633,11 @@ def read_members(self, chunk, cursor, context): chunk, cursor, context, self._file, self._parent ) ) - self._members["fCountVersion"] = cursor.field(chunk, _tstreamerloop_format1) - self._members["fCountName"] = cursor.string(chunk) - self._members["fCountClass"] = cursor.string(chunk) + self._members["fCountVersion"] = cursor.field( + chunk, _tstreamerloop_format1, context + ) + self._members["fCountName"] = cursor.string(chunk, context) + self._members["fCountClass"] = cursor.string(chunk, context) @property def count_name(self): @@ -678,7 +687,7 @@ def read_members(self, chunk, cursor, context): ) ) self._members["fSTLtype"], self._members["fCtype"] = cursor.fields( - chunk, _tstreamerstl_format1 + chunk, _tstreamerstl_format1, context ) if self._members["fSTLtype"] in ( @@ -801,7 +810,7 @@ def class_code( if self.is_string: read_members.append(" cursor.skip(6)") read_members.append( - " self._members[{0}] = cursor.string(chunk)".format( + " self._members[{0}] = cursor.string(chunk, context)".format( repr(self.name) ) ) @@ -809,20 +818,20 @@ def class_code( elif self.is_vector_dtype: read_members.append(" cursor.skip(6)") read_members.append( - " tmp = cursor.field(chunk, self._format{0})".format( + " tmp = cursor.field(chunk, self._format{0}, context)".format( len(formats) ) ) read_members.append( " self._members[{0}] = cursor.array(chunk, tmp, " - "self._dtype{1})".format(repr(self.name), len(dtypes)) + "self._dtype{1}, context)".format(repr(self.name), len(dtypes)) ) formats.append(["i"]) dtypes.append(self.vector_dtype) elif self.is_map_string_string: read_members.append( - " self._members[{0}] = map_string_string(chunk, cursor)" + " self._members[{0}] = map_string_string(chunk, cursor, context)" ) else: