From 4bf6fb3c187f118ae8648969c91510243571c679 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 25 Jun 2020 19:56:42 -0500 Subject: [PATCH] Test STL containers. (#31) * Test STL containers. * Start working on interpreting STL containers in TTrees. * Added 'context' to 'basket_array'. * All of the STL collection branches have the right interpretations. * 'context' tricks to read headers outside of TTrees but not inside. * Reading nested vectors, too. Got rid of the 'multiplicity' thing. * AsSTLContainers are explicitly labled by whether they read headers or not. * Deserialized first std::map in TTree. * All STL collections can now be read. * SAVE WORK; everything will break. * fParentName-based solution to finding streamers. * Remove commented-out bad code and fix filename. * Interpretations produce typenames now. * Another string case works. * All of the STL tests work; only 3 (non-STL) are skip/FIXME. --- tests/test_0018-array-fetching-interface.py | 22 +- tests/test_0023-more-interpretations-1.py | 2 - tests/test_0028-fallback-to-read-streamer.py | 10 +- tests/test_0029-more-string-types.py | 138 +++-- tests/test_0031-test-stl-containers.py | 597 ++++++++++++++++++ uproot4/_util.py | 20 - uproot4/behaviors/TBranch.py | 78 +-- uproot4/deserialization.py | 45 +- uproot4/interpretation/__init__.py | 16 +- uproot4/interpretation/identify.py | 353 ++++++++++- uproot4/interpretation/jagged.py | 37 +- uproot4/interpretation/numerical.py | 87 ++- uproot4/interpretation/objects.py | 194 +++++- uproot4/interpretation/strings.py | 36 +- uproot4/model.py | 26 +- uproot4/models/TBasket.py | 2 +- uproot4/reading.py | 18 +- uproot4/source/cursor.py | 36 +- uproot4/stl_containers.py | 612 ++++++++----------- uproot4/streamers.py | 14 +- 20 files changed, 1796 insertions(+), 547 deletions(-) create mode 100644 tests/test_0031-test-stl-containers.py diff --git a/tests/test_0018-array-fetching-interface.py b/tests/test_0018-array-fetching-interface.py index cea50c427..454c72a74 100644 --- a/tests/test_0018-array-fetching-interface.py +++ b/tests/test_0018-array-fetching-interface.py @@ -56,37 +56,37 @@ def test_leaf_interpretation(): assert sample["n"].typename == "int32_t" assert sample["b"].typename == "bool" - assert sample["ab"].typename == "bool[]" + assert sample["ab"].typename == "bool[3]" assert sample["Ab"].typename == "bool[]" assert sample["i1"].typename == "int8_t" - assert sample["ai1"].typename == "int8_t[]" + assert sample["ai1"].typename == "int8_t[3]" assert sample["Ai1"].typename == "int8_t[]" assert sample["u1"].typename == "uint8_t" - assert sample["au1"].typename == "uint8_t[]" + assert sample["au1"].typename == "uint8_t[3]" assert sample["Au1"].typename == "uint8_t[]" assert sample["i2"].typename == "int16_t" - assert sample["ai2"].typename == "int16_t[]" + assert sample["ai2"].typename == "int16_t[3]" assert sample["Ai2"].typename == "int16_t[]" assert sample["u2"].typename == "uint16_t" - assert sample["au2"].typename == "uint16_t[]" + assert sample["au2"].typename == "uint16_t[3]" assert sample["Au2"].typename == "uint16_t[]" assert sample["i4"].typename == "int32_t" - assert sample["ai4"].typename == "int32_t[]" + assert sample["ai4"].typename == "int32_t[3]" assert sample["Ai4"].typename == "int32_t[]" assert sample["u4"].typename == "uint32_t" - assert sample["au4"].typename == "uint32_t[]" + assert sample["au4"].typename == "uint32_t[3]" assert sample["Au4"].typename == "uint32_t[]" assert sample["i8"].typename == "int64_t" - assert sample["ai8"].typename == "int64_t[]" + assert sample["ai8"].typename == "int64_t[3]" assert sample["Ai8"].typename == "int64_t[]" assert sample["u8"].typename == "uint64_t" - assert sample["au8"].typename == "uint64_t[]" + assert sample["au8"].typename == "uint64_t[3]" assert sample["Au8"].typename == "uint64_t[]" assert sample["f4"].typename == "float" - assert sample["af4"].typename == "float[]" + assert sample["af4"].typename == "float[3]" assert sample["Af4"].typename == "float[]" assert sample["f8"].typename == "double" - assert sample["af8"].typename == "double[]" + assert sample["af8"].typename == "double[3]" assert sample["Af8"].typename == "double[]" diff --git a/tests/test_0023-more-interpretations-1.py b/tests/test_0023-more-interpretations-1.py index b4dc52084..14307f4dc 100644 --- a/tests/test_0023-more-interpretations-1.py +++ b/tests/test_0023-more-interpretations-1.py @@ -49,7 +49,6 @@ def test_strings1(): assert result.tolist() == ["hey-{0}".format(i) for i in range(30)] -@pytest.mark.skip(reason="FIXME: implement std::vector") def test_strings4(): with uproot4.open( skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") @@ -71,7 +70,6 @@ def test_strings4(): ] -@pytest.mark.skip(reason="FIXME: implement std::vector>") def test_strings4(): with uproot4.open(skhep_testdata.data_path("uproot-vectorVectorDouble.root"))[ "t/x" diff --git a/tests/test_0028-fallback-to-read-streamer.py b/tests/test_0028-fallback-to-read-streamer.py index 1e90368b5..b89b2b5c4 100644 --- a/tests/test_0028-fallback-to-read-streamer.py +++ b/tests/test_0028-fallback-to-read-streamer.py @@ -10,11 +10,11 @@ def test_fallback_reading(): - # with uproot4.open( - # skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") - # ) as f: - # f["tree:evt/P3/P3.Py"] - # assert f.file._streamers is None + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + ) as f: + f["tree:evt/P3/P3.Py"] + assert f.file._streamers is None with uproot4.open(skhep_testdata.data_path("uproot-demo-double32.root")) as f: f["T/fD64"] diff --git a/tests/test_0029-more-string-types.py b/tests/test_0029-more-string-types.py index 9223c4916..59646e200 100644 --- a/tests/test_0029-more-string-types.py +++ b/tests/test_0029-more-string-types.py @@ -10,7 +10,7 @@ import skhep_testdata import uproot4 -from uproot4.stl_containers import parse_typename +from uproot4.interpretation.identify import parse_typename from uproot4.stl_containers import AsString from uproot4.stl_containers import AsVector from uproot4.stl_containers import AsSet @@ -19,45 +19,59 @@ def test_parse_typename(): assert parse_typename("TTree") is uproot4.classes["TTree"] - assert parse_typename("string") == AsString() - assert parse_typename("std::string") == AsString() - assert parse_typename("std :: string") == AsString() - assert parse_typename("char*") == AsString(is_stl=False) - assert parse_typename("char *") == AsString(is_stl=False) - assert parse_typename("TString") == AsString(is_stl=False) - assert parse_typename("vector") == AsVector(uproot4.classes["TTree"]) - assert parse_typename("vector") == AsVector(">i4") - assert parse_typename("vector") == AsVector("?") - assert parse_typename("vector") == AsVector(AsString()) - assert parse_typename("vector < string >") == AsVector(AsString()) - assert parse_typename("std::vector") == AsVector(AsString()) - assert parse_typename("vector>") == AsVector(AsVector(">i4")) - assert parse_typename("vector>") == AsVector(AsVector(AsString())) + assert parse_typename("string") == AsString(False) + assert parse_typename("std::string") == AsString(False) + assert parse_typename("std :: string") == AsString(False) + assert parse_typename("char*") == AsString(False) + assert parse_typename("char *") == AsString(False) + assert parse_typename("TString") == AsString(False) + assert parse_typename("vector") == AsVector(True, uproot4.classes["TTree"]) + assert parse_typename("vector") == AsVector(True, ">i4") + assert parse_typename("vector") == AsVector(True, "?") + assert parse_typename("vector") == AsVector(True, AsString(False)) + assert parse_typename("vector < string >") == AsVector(True, AsString(False)) + assert parse_typename("std::vector") == AsVector(True, AsString(False)) + assert parse_typename("vector>") == AsVector( + True, AsVector(False, ">i4") + ) + assert parse_typename("vector>") == AsVector( + True, AsVector(False, AsString(False)) + ) assert parse_typename("vector>") == AsVector( - AsVector(AsString(is_stl=False)) + True, AsVector(False, AsString(False)) + ) + assert parse_typename("set") == AsSet(True, ">u2") + assert parse_typename("std::set") == AsSet(True, ">u2") + assert parse_typename("set") == AsSet(True, AsString(False)) + assert parse_typename("set>") == AsSet( + True, AsVector(False, AsString(False)) + ) + assert parse_typename("set >") == AsSet( + True, AsVector(False, AsString(False)) + ) + assert parse_typename("map") == AsMap(True, ">i4", ">f8") + assert parse_typename("map") == AsMap(True, AsString(True), ">f8") + assert parse_typename("map") == AsMap(True, ">i4", AsString(True)) + assert parse_typename("map") == AsMap( + True, AsString(True), AsString(True) + ) + assert parse_typename("map") == AsMap( + True, AsString(True), AsString(True) + ) + assert parse_typename("map< string,string >") == AsMap( + True, AsString(True), AsString(True) ) - assert parse_typename("set") == AsSet(">u2") - assert parse_typename("std::set") == AsSet(">u2") - assert parse_typename("set") == AsSet(AsString()) - assert parse_typename("set>") == AsSet(AsVector(AsString())) - assert parse_typename("set >") == AsSet(AsVector(AsString())) - assert parse_typename("map") == AsMap(">i4", ">f8") - assert parse_typename("map") == AsMap(AsString(), ">f8") - assert parse_typename("map") == AsMap(">i4", AsString()) - assert parse_typename("map") == AsMap(AsString(), AsString()) - assert parse_typename("map") == AsMap(AsString(), AsString()) - assert parse_typename("map< string,string >") == AsMap(AsString(), AsString()) assert parse_typename("map>") == AsMap( - AsString(), AsVector(">i4") + True, AsString(True), AsVector(True, ">i4") ) assert parse_typename("map, string>") == AsMap( - AsVector(">i4"), AsString() + True, AsVector(True, ">i4"), AsString(True) ) assert parse_typename("map, set>") == AsMap( - AsVector(">i4"), AsSet(">f4") + True, AsVector(True, ">i4"), AsSet(True, ">f4") ) assert parse_typename("map, set>>") == AsMap( - AsVector(">i4"), AsSet(AsSet(">f4")) + True, AsVector(True, ">i4"), AsSet(True, AsSet(False, ">f4")) ) with pytest.raises(ValueError): @@ -125,37 +139,46 @@ def test_map_string_string_in_object(): } -@pytest.mark.skip( - reason="FIXME: test works, but the file is not in scikit-hep-testdata yet" -) def test_map_long_int_in_object(): - with uproot4.open( - "/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue283.root" - ) as f: - print(f["config/detector"]) - - # raise Exception - + with uproot4.open(skhep_testdata.data_path("uproot-issue283.root")) as f: + map_long_int = f["config/detector"].member("ChannelIDMap") + assert (map_long_int.keys().min(), map_long_int.keys().max()) == ( + 46612627560, + 281410180683757, + ) + assert (map_long_int.values().min(), map_long_int.values().max()) == (0, 5159) + + +def test_top_level_vectors(): + with uproot4.open(skhep_testdata.data_path("uproot-issue38a.root"))[ + "ntupler/tree" + ] as tree: + assert [x.tolist() for x in tree["v_int16"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_int16"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_int32"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_int64"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_uint16"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_uint32"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_uint64"].array(library="np")] == [[1, 2, 3]] + assert [x.tolist() for x in tree["v_bool"].array(library="np")] == [ + [False, True] + ] + assert [x.tolist() for x in tree["v_float"].array(library="np")] == [ + [999.0, -999.0] + ] + assert [x.tolist() for x in tree["v_double"].array(library="np")] == [ + [999.0, -999.0] + ] -# has STL vectors at top-level: -# -# python -c 'import uproot; t = uproot.open("/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue38a.root")["ntupler/tree"]; print("\n".join(str((x._fName, getattr(x, "_fStreamerType", None), getattr(x, "_fClassName", None), getattr(x, "_fType", None), x.interpretation)) for x in t.allvalues()))' -# has STL map as described here: -# -# https://github.com/scikit-hep/uproot/issues/468#issuecomment-646325842 -# -# python -c 'import uproot; t = uproot.open("/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue468.root")["Geant4Data/Geant4Data./Geant4Data.particles"]; print(t.array(uproot.asdebug)[0][:1000])' - -# def test_strings1(): -# with uproot4.open( -# skhep_testdata.data_path("uproot-issue31.root") -# )["T/name"] as branch: -# result = branch.array(library="np") -# assert result.tolist() == ["one", "two", "three", "four", "five"] +def test_strings1(): + with uproot4.open(skhep_testdata.data_path("uproot-issue31.root"))[ + "T/name" + ] as branch: + result = branch.array(library="np") + assert result.tolist() == ["one", "two", "three", "four", "five"] -@pytest.mark.skip(reason="FIXME: implement strings specified by a TStreamer") def test_strings2(): with uproot4.open( skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") @@ -164,7 +187,6 @@ def test_strings2(): assert result.tolist() == ["evt-{0:03d}".format(i) for i in range(100)] -@pytest.mark.skip(reason="FIXME: implement std::string") def test_strings3(): with uproot4.open( skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") diff --git a/tests/test_0031-test-stl-containers.py b/tests/test_0031-test-stl-containers.py new file mode 100644 index 000000000..67322240d --- /dev/null +++ b/tests/test_0031-test-stl-containers.py @@ -0,0 +1,597 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import sys +import json + +import numpy +import pytest +import skhep_testdata + +import uproot4 +from uproot4.interpretation.objects import AsObjects +from uproot4.stl_containers import AsString +from uproot4.stl_containers import AsVector +from uproot4.stl_containers import AsSet +from uproot4.stl_containers import AsMap + + +def test_typename(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert tree["vector_int32"].interpretation == AsObjects( + AsVector(True, numpy.dtype(">i4")) + ) + assert tree["vector_string"].interpretation == AsObjects( + AsVector(True, AsString(False)) + ) + assert tree["vector_vector_int32"].interpretation == AsObjects( + AsVector(True, AsVector(False, numpy.dtype(">i4"))) + ) + assert tree["vector_vector_string"].interpretation == AsObjects( + AsVector(True, AsVector(False, AsString(False))) + ) + assert tree["vector_set_int32"].interpretation == AsObjects( + AsVector(True, AsSet(False, numpy.dtype(">i4"))) + ) + assert tree["vector_set_string"].interpretation == AsObjects( + AsVector(True, AsSet(False, AsString(False))) + ) + assert tree["set_int32"].interpretation == AsObjects( + AsSet(True, numpy.dtype(">i4")) + ) + assert tree["set_string"].interpretation == AsObjects( + AsSet(True, AsString(False)) + ) + assert tree["map_int32_int16"].interpretation == AsObjects( + AsMap(True, numpy.dtype(">i4"), numpy.dtype(">i2")) + ) + assert tree["map_int32_vector_int16"].interpretation == AsObjects( + AsMap(True, numpy.dtype(">i4"), AsVector(True, numpy.dtype(">i2"))) + ) + assert tree["map_int32_vector_string"].interpretation == AsObjects( + AsMap(True, numpy.dtype(">i4"), AsVector(True, AsString(False))) + ) + assert tree["map_int32_set_int16"].interpretation == AsObjects( + AsMap(True, numpy.dtype(">i4"), AsSet(True, numpy.dtype(">i2"))) + ) + assert tree["map_int32_set_string"].interpretation == AsObjects( + AsMap(True, numpy.dtype(">i4"), AsSet(True, AsString(False))) + ) + assert tree["map_string_int16"].interpretation == AsObjects( + AsMap(True, AsString(True), numpy.dtype(">i2")) + ) + assert tree["map_string_vector_int16"].interpretation == AsObjects( + AsMap(True, AsString(True), AsVector(True, numpy.dtype(">i2"))) + ) + assert tree["map_string_vector_string"].interpretation == AsObjects( + AsMap(True, AsString(True), AsVector(True, AsString(False))) + ) + assert tree["map_string_set_int16"].interpretation == AsObjects( + AsMap(True, AsString(True), AsSet(True, numpy.dtype(">i2"))) + ) + assert tree["map_string_set_string"].interpretation == AsObjects( + AsMap(True, AsString(True), AsSet(True, AsString(False))) + ) + assert tree["map_int32_vector_vector_int16"].interpretation == AsObjects( + AsMap( + True, + numpy.dtype(">i4"), + AsVector(True, AsVector(False, numpy.dtype(">i2"))), + ) + ) + assert tree["map_int32_vector_set_int16"].interpretation == AsObjects( + AsMap( + True, + numpy.dtype(">i4"), + AsVector(True, AsSet(False, numpy.dtype(">i2"))), + ) + ) + assert tree["map_string_string"].interpretation == AsObjects( + AsMap(True, AsString(True), AsString(True)) + ) + + +def test_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert tree["string"].array(library="np").tolist() == [ + "one", + "two", + "three", + "four", + "five", + ] + + +def test_tstring(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert tree["tstring"].array(library="np").tolist() == [ + "one", + "two", + "three", + "four", + "five", + ] + + +def test_vector_int32(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["vector_int32"].array(library="np")] == [ + [1], + [1, 2], + [1, 2, 3], + [1, 2, 3, 4], + [1, 2, 3, 4, 5], + ] + + +def test_vector_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["vector_string"].array(library="np")] == [ + ["one"], + ["one", "two"], + ["one", "two", "three"], + ["one", "two", "three", "four"], + ["one", "two", "three", "four", "five"], + ] + + +def test_vector_tstring(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["vector_tstring"].array(library="np")] == [ + ["one"], + ["one", "two"], + ["one", "two", "three"], + ["one", "two", "three", "four"], + ["one", "two", "three", "four", "five"], + ] + + +def test_vector_vector_int32(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["vector_vector_int32"].array(library="np") + ] == [ + [[1]], + [[1], [1, 2]], + [[1], [1, 2], [1, 2, 3]], + [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], + [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]], + ] + + +def test_vector_vector_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["vector_vector_string"].array(library="np") + ] == [ + [["one"]], + [["one"], ["one", "two"]], + [["one"], ["one", "two"], ["one", "two", "three"]], + [ + ["one"], + ["one", "two"], + ["one", "two", "three"], + ["one", "two", "three", "four"], + ], + [ + ["one"], + ["one", "two"], + ["one", "two", "three"], + ["one", "two", "three", "four"], + ["one", "two", "three", "four", "five"], + ], + ] + + +def test_vector_set_int32(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["vector_set_int32"].array(library="np")] == [ + [set([1])], + [set([1]), set([1, 2])], + [set([1]), set([1, 2]), set([1, 2, 3])], + [set([1]), set([1, 2]), set([1, 2, 3]), set([1, 2, 3, 4])], + [ + set([1]), + set([1, 2]), + set([1, 2, 3]), + set([1, 2, 3, 4]), + set([1, 2, 3, 4, 5]), + ], + ] + + +def test_vector_set_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["vector_set_string"].array(library="np")] == [ + [set(["one"])], + [set(["one"]), set(["one", "two"])], + [set(["one"]), set(["one", "two"]), set(["one", "two", "three"])], + [ + set(["one"]), + set(["one", "two"]), + set(["one", "two", "three"]), + set(["one", "two", "three", "four"]), + ], + [ + set(["one"]), + set(["one", "two"]), + set(["one", "two", "three"]), + set(["one", "two", "three", "four"]), + set(["one", "two", "three", "four", "five"]), + ], + ] + + +def test_set_int32(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["set_int32"].array(library="np")] == [ + set([1]), + set([1, 2]), + set([1, 2, 3]), + set([1, 2, 3, 4]), + set([1, 2, 3, 4, 5]), + ] + + +def test_set_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["set_string"].array(library="np")] == [ + set(["one"]), + set(["one", "two"]), + set(["one", "two", "three"]), + set(["one", "two", "three", "four"]), + set(["one", "two", "three", "four", "five"]), + ] + + +def test_map_int32_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["map_int32_int16"].array(library="np")] == [ + {1: 1}, + {1: 1, 2: 2}, + {1: 1, 2: 2, 3: 3}, + {1: 1, 2: 2, 3: 3, 4: 4}, + {1: 1, 2: 2, 3: 3, 4: 4, 5: 5}, + ] + + +def test_map_int32_vector_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_int32_vector_int16"].array(library="np") + ] == [ + {1: [1]}, + {1: [1], 2: [1, 2]}, + {1: [1], 2: [1, 2], 3: [1, 2, 3]}, + {1: [1], 2: [1, 2], 3: [1, 2, 3], 4: [1, 2, 3, 4]}, + {1: [1], 2: [1, 2], 3: [1, 2, 3], 4: [1, 2, 3, 4], 5: [1, 2, 3, 4, 5]}, + ] + + +def test_map_int32_vector_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_int32_vector_string"].array(library="np") + ] == [ + {1: ["one"]}, + {1: ["one"], 2: ["one", "two"]}, + {1: ["one"], 2: ["one", "two"], 3: ["one", "two", "three"]}, + { + 1: ["one"], + 2: ["one", "two"], + 3: ["one", "two", "three"], + 4: ["one", "two", "three", "four"], + }, + { + 1: ["one"], + 2: ["one", "two"], + 3: ["one", "two", "three"], + 4: ["one", "two", "three", "four"], + 5: ["one", "two", "three", "four", "five"], + }, + ] + + +def test_map_int32_set_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_int32_set_int16"].array(library="np") + ] == [ + {1: set([1])}, + {1: set([1]), 2: set([1, 2])}, + {1: set([1]), 2: set([1, 2]), 3: set([1, 2, 3])}, + {1: set([1]), 2: set([1, 2]), 3: set([1, 2, 3]), 4: set([1, 2, 3, 4])}, + { + 1: set([1]), + 2: set([1, 2]), + 3: set([1, 2, 3]), + 4: set([1, 2, 3, 4]), + 5: set([1, 2, 3, 4, 5]), + }, + ] + + +def test_map_int32_set_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_int32_set_string"].array(library="np") + ] == [ + {1: set(["one"])}, + {1: set(["one"]), 2: set(["one", "two"])}, + {1: set(["one"]), 2: set(["one", "two"]), 3: set(["one", "two", "three"])}, + { + 1: set(["one"]), + 2: set(["one", "two"]), + 3: set(["one", "two", "three"]), + 4: set(["one", "two", "three", "four"]), + }, + { + 1: set(["one"]), + 2: set(["one", "two"]), + 3: set(["one", "two", "three"]), + 4: set(["one", "two", "three", "four"]), + 5: set(["one", "two", "three", "four", "five"]), + }, + ] + + +def test_map_string_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["map_string_int16"].array(library="np")] == [ + {"one": 1}, + {"one": 1, "two": 2}, + {"one": 1, "two": 2, "three": 3}, + {"one": 1, "two": 2, "three": 3, "four": 4}, + {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}, + ] + + +def test_map_string_vector_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_string_vector_int16"].array(library="np") + ] == [ + {"one": [1]}, + {"one": [1], "two": [1, 2]}, + {"one": [1], "two": [1, 2], "three": [1, 2, 3]}, + {"one": [1], "two": [1, 2], "three": [1, 2, 3], "four": [1, 2, 3, 4]}, + { + "one": [1], + "two": [1, 2], + "three": [1, 2, 3], + "four": [1, 2, 3, 4], + "five": [1, 2, 3, 4, 5], + }, + ] + + +def test_map_string_vector_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_string_vector_string"].array(library="np") + ] == [ + {"one": ["one"]}, + {"one": ["one"], "two": ["one", "two"]}, + {"one": ["one"], "two": ["one", "two"], "three": ["one", "two", "three"]}, + { + "one": ["one"], + "two": ["one", "two"], + "three": ["one", "two", "three"], + "four": ["one", "two", "three", "four"], + }, + { + "one": ["one"], + "two": ["one", "two"], + "three": ["one", "two", "three"], + "four": ["one", "two", "three", "four"], + "five": ["one", "two", "three", "four", "five"], + }, + ] + + +def test_map_string_set_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_string_set_int16"].array(library="np") + ] == [ + {"one": set([1])}, + {"one": set([1]), "two": set([1, 2])}, + {"one": set([1]), "two": set([1, 2]), "three": set([1, 2, 3])}, + { + "one": set([1]), + "two": set([1, 2]), + "three": set([1, 2, 3]), + "four": set([1, 2, 3, 4]), + }, + { + "one": set([1]), + "two": set([1, 2]), + "three": set([1, 2, 3]), + "four": set([1, 2, 3, 4]), + "five": set([1, 2, 3, 4, 5]), + }, + ] + + +def test_map_string_set_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_string_set_string"].array(library="np") + ] == [ + {"one": set(["one"])}, + {"one": set(["one"]), "two": set(["one", "two"])}, + { + "one": set(["one"]), + "two": set(["one", "two"]), + "three": set(["one", "two", "three"]), + }, + { + "one": set(["one"]), + "two": set(["one", "two"]), + "three": set(["one", "two", "three"]), + "four": set(["one", "two", "three", "four"]), + }, + { + "one": set(["one"]), + "two": set(["one", "two"]), + "three": set(["one", "two", "three"]), + "four": set(["one", "two", "three", "four"]), + "five": set(["one", "two", "three", "four", "five"]), + }, + ] + + +def test_map_int32_vector_vector_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() + for x in tree["map_int32_vector_vector_int16"].array(library="np") + ] == [ + {1: [[1]]}, + {1: [[1]], 2: [[1], [1, 2]]}, + {1: [[1]], 2: [[1], [1, 2]], 3: [[1], [1, 2], [1, 2, 3]]}, + { + 1: [[1]], + 2: [[1], [1, 2]], + 3: [[1], [1, 2], [1, 2, 3]], + 4: [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], + }, + { + 1: [[1]], + 2: [[1], [1, 2]], + 3: [[1], [1, 2], [1, 2, 3]], + 4: [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], + 5: [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]], + }, + ] + + +def test_map_int32_vector_set_int16(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [ + x.tolist() for x in tree["map_int32_vector_set_int16"].array(library="np") + ] == [ + {1: [set([1])]}, + {1: [set([1])], 2: [set([1]), set([1, 2])]}, + { + 1: [set([1])], + 2: [set([1]), set([1, 2])], + 3: [set([1]), set([1, 2]), set([1, 2, 3])], + }, + { + 1: [set([1])], + 2: [set([1]), set([1, 2])], + 3: [set([1]), set([1, 2]), set([1, 2, 3])], + 4: [set([1]), set([1, 2]), set([1, 2, 3]), set([1, 2, 3, 4])], + }, + { + 1: [set([1])], + 2: [set([1]), set([1, 2])], + 3: [set([1]), set([1, 2]), set([1, 2, 3])], + 4: [set([1]), set([1, 2]), set([1, 2, 3]), set([1, 2, 3, 4])], + 5: [ + set([1]), + set([1, 2]), + set([1, 2, 3]), + set([1, 2, 3, 4]), + set([1, 2, 3, 4, 5]), + ], + }, + ] + + +def test_map_string_string(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["map_string_string"].array(library="np")] == [ + {"one": "ONE"}, + {"one": "ONE", "two": "TWO"}, + {"one": "ONE", "two": "TWO", "three": "THREE"}, + {"one": "ONE", "two": "TWO", "three": "THREE", "four": "FOUR"}, + { + "one": "ONE", + "two": "TWO", + "three": "THREE", + "four": "FOUR", + "five": "FIVE", + }, + ] + + +def test_map_string_tstring(): + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert [x.tolist() for x in tree["map_string_tstring"].array(library="np")] == [ + {"one": "ONE"}, + {"one": "ONE", "two": "TWO"}, + {"one": "ONE", "two": "TWO", "three": "THREE"}, + {"one": "ONE", "two": "TWO", "three": "THREE", "four": "FOUR"}, + { + "one": "ONE", + "two": "TWO", + "three": "THREE", + "four": "FOUR", + "five": "FIVE", + }, + ] + + +@pytest.mark.skip(reason="FIXME: implement map") +def test_map_int_struct(): + # as described here: + # + # https://github.com/scikit-hep/uproot/issues/468#issuecomment-646325842 + # + # python -c 'import uproot; t = uproot.open("/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue468.root")["Geant4Data/Geant4Data./Geant4Data.particles"]; print(t.array(uproot.asdebug)[0][:1000])' + pass diff --git a/uproot4/_util.py b/uproot4/_util.py index de08b7329..311cb4541 100644 --- a/uproot4/_util.py +++ b/uproot4/_util.py @@ -101,26 +101,6 @@ def no_filter(x): return True -def exact_filter(filter): - if filter is None: - return False - elif callable(filter): - return False - if isstr(filter): - m = _regularize_filter_regex.match(filter) - if m is not None: - return False - elif "*" in filter or "?" in filter or "[" in filter: - return False - else: - return True - else: - raise TypeError( - "filter must be callable, a regex string between slashes, or a " - "glob pattern, not {0}".format(repr(filter)) - ) - - def regularize_filter(filter): if filter is None: return no_filter diff --git a/uproot4/behaviors/TBranch.py b/uproot4/behaviors/TBranch.py index 11822a9e7..fcbd71cce 100644 --- a/uproot4/behaviors/TBranch.py +++ b/uproot4/behaviors/TBranch.py @@ -21,6 +21,8 @@ import numpy import uproot4.source.cursor +import uproot4.streamers +import uproot4.stl_containers import uproot4.interpretation import uproot4.interpretation.numerical import uproot4.interpretation.jagged @@ -435,7 +437,7 @@ def basket_to_array(basket): basket_arrays = branchid_arrays[id(branch)] basket_arrays[basket.basket_num] = interpretation.basket_array( - basket.data, basket.byte_offsets, basket, branch + basket.data, basket.byte_offsets, basket, branch, branch.context ) if basket.num_entries != len(basket_arrays[basket.basket_num]): raise ValueError( @@ -576,9 +578,9 @@ def iteritems( ) for branch in self.branches: if ( - filter_name(branch.name) - and filter_typename(branch.typename) - and filter_branch(branch) + (filter_name is no_filter or filter_name(branch.name)) + and (filter_typename is no_filter or filter_typename(branch.typename)) + and (filter_branch is no_filter or filter_branch(branch)) ): yield branch.name, branch @@ -594,7 +596,7 @@ def iteritems( k2 = "{0}/{1}".format(branch.name, k1) else: k2 = k1 - if filter_name(k2): + if filter_name is no_filter or filter_name(k2): yield k2, v def items( @@ -683,7 +685,6 @@ def values( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, - full_paths=False, ) ) @@ -845,7 +846,11 @@ def postprocess(self, chunk, cursor, context): self._interpretation = None self._count_branch = None self._count_leaf = None + self._typename = None self._streamer = None + self._context = dict(context) + self._context["breadcrumbs"] = () + self._context["in_TBranch"] = True self._num_normal_baskets = 0 for i, x in enumerate(self.member("fBasketSeek")): @@ -881,13 +886,15 @@ def postprocess(self, chunk, cursor, context): @property def tree(self): - import uproot4.behaviors.TTree - out = self while not isinstance(out, uproot4.behaviors.TTree.TTree): out = out.parent return out + @property + def context(self): + return self._context + @property def aliases(self): return self.tree.aliases @@ -959,40 +966,35 @@ def title(self): @property def typename(self): - if self._streamer is not None: - return self._streamer.typename - - def leaf_to_typename(leaf): - dim = leaf.member("fTitle").count("[") - u = "u" if leaf.member("fIsUnsigned") else "" - - if leaf.classname == "TLeafO": - return "bool" + "[]" * dim - elif leaf.classname == "TLeafB": - return u + "int8_t" + "[]" * dim - elif leaf.classname == "TLeafS": - return u + "int16_t" + "[]" * dim - elif leaf.classname == "TLeafI": - return u + "int32_t" + "[]" * dim - elif leaf.classname == "TLeafL": - return u + "int64_t" + "[]" * dim - elif leaf.classname == "TLeafF": - return "float" + "[]" * dim - elif leaf.classname == "TLeafD": - return "double" + "[]" * dim - elif leaf.classname == "TLeafC": - return "char*" + "*" * dim - else: - return "???" - - if len(self.member("fLeaves")) == 1: - return leaf_to_typename(self.member("fLeaves")[0]) + if self.interpretation is None: + return "unknown" else: - leaf_list = [leaf_to_typename(leaf) for leaf in self.member("fLeaves")] - return ":".join(leaf_list) + return self.interpretation.typename + + @property + def top_level(self): + return isinstance(self.parent, uproot4.behaviors.TTree.TTree) @property def streamer(self): + if self._streamer is None: + nodotname = self.name.split(".")[-1] + fParentName = self.member("fParentName", none_if_missing=True) + fClassName = self.member("fClassName", none_if_missing=True) + + if fParentName is not None and fParentName != "": + matches = self._file.streamers.get(fParentName) + if matches is not None: + for element in matches[max(matches)].elements: + if element.name == nodotname: + self._streamer = element + break + + elif fClassName is not None and fClassName != "": + matches = self._file.streamers.get(fClassName) + if matches is not None: + self._streamer = matches[max(matches)] + return self._streamer @property diff --git a/uproot4/deserialization.py b/uproot4/deserialization.py index 44c2a988f..efba9f909 100644 --- a/uproot4/deserialization.py +++ b/uproot4/deserialization.py @@ -65,6 +65,7 @@ def __init__(self, message, chunk, cursor, context, file_path): def __str__(self): lines = [] indent = " " + last = None for obj in self.context.get("breadcrumbs", ()): lines.append( "{0}{1} version {2} as {3}.{4} ({5} bytes)".format( @@ -81,6 +82,44 @@ def __str__(self): lines.append("{0}(base): {1}".format(indent, repr(v))) for k, v in getattr(obj, "_members", {}).items(): lines.append("{0}{1}: {2}".format(indent, k, repr(v))) + last = obj + + if last is not None: + base_names_versions = getattr(last, "base_names_versions", None) + bases = getattr(last, "_bases", None) + if base_names_versions is not None and bases is not None: + base_names = [n for n, v in base_names_versions] + for c in bases: + classname = getattr(c, "classname", None) + if classname is not None: + if classname in base_names: + base_names[base_names.index(classname)] = ( + "(" + classname + ")" + ) + else: + base_names.append(classname + "?") + if len(base_names) != 0: + lines.append( + "Base classes for {0}: {1}".format( + last.classname, ", ".join(base_names) + ) + ) + + member_names = getattr(last, "member_names", None) + members = getattr(last, "_members", None) + if member_names is not None and members is not None: + member_names = list(member_names) + for n in members: + if n in member_names: + member_names[member_names.index(n)] = "(" + n + ")" + else: + member_names.append(n + "?") + if len(member_names) != 0: + lines.append( + "Members for {0}: {1}".format( + last.classname, ", ".join(member_names) + ) + ) in_parent = "" if "TBranch" in self.context: @@ -152,7 +191,9 @@ def numbytes_version(chunk, cursor, context, move=True): return num_bytes, version -def numbytes_check(start_cursor, stop_cursor, num_bytes, classname, context, file_path): +def numbytes_check( + chunk, start_cursor, stop_cursor, num_bytes, classname, context, file_path +): if num_bytes is not None: observed = stop_cursor.displacement(start_cursor) if observed != num_bytes: @@ -160,6 +201,8 @@ def numbytes_check(start_cursor, stop_cursor, num_bytes, classname, context, fil """expected {0} bytes but cursor moved by {1} bytes (through {2})""".format( num_bytes, observed, classname ), + chunk, + stop_cursor, context, file_path, ) diff --git a/uproot4/interpretation/__init__.py b/uproot4/interpretation/__init__.py index 995f268fe..8953b3461 100644 --- a/uproot4/interpretation/__init__.py +++ b/uproot4/interpretation/__init__.py @@ -10,11 +10,13 @@ class Interpretation(object): * `cache_key`: Used to distinguish the same array read with different interpretations in a cache. + * `typename`: Name of the C++ data type from which this Interpretation + was derived. * `numpy_dtype`: Data type (including any shape elements after the first dimension) of the NumPy array that would be created. * `awkward_form`: Form of the Awkward Array that would be created (requires `awkward1`); used by the `ak.type` function. - * `basket_array(data, byte_offsets, basket, branch)`: Create a + * `basket_array(data, byte_offsets, basket, branch, context)`: Create a basket_array from a basket's `data` and `byte_offsets`. * `final_array(basket_arrays, entry_start, entry_stop, entry_offsets, library)`: Combine basket_arrays with basket excess trimmed and in the form @@ -25,6 +27,10 @@ class Interpretation(object): def cache_key(self): raise AssertionError + @property + def typename(self): + raise AssertionError + @property def numpy_dtype(self): raise AssertionError @@ -33,7 +39,7 @@ def numpy_dtype(self): def awkward_form(self): raise AssertionError - def basket_array(self, data, byte_offsets, basket, branch): + def basket_array(self, data, byte_offsets, basket, branch, context): raise AssertionError def final_array( @@ -41,6 +47,12 @@ def final_array( ): raise AssertionError + def __eq__(self, other): + raise AssertionError + + def __ne__(self, other): + raise not self == other + def hook_before_basket_array(self, *args, **kwargs): pass diff --git a/uproot4/interpretation/identify.py b/uproot4/interpretation/identify.py index 2f1f32de1..3e4b255a2 100644 --- a/uproot4/interpretation/identify.py +++ b/uproot4/interpretation/identify.py @@ -9,6 +9,9 @@ import uproot4.const import uproot4.interpretation.numerical +import uproot4.interpretation.strings +import uproot4.interpretation.objects +import uproot4.stl_containers import uproot4.streamers import uproot4._util @@ -33,6 +36,10 @@ def __str__(self): self.reason, self.file_path, self.object_path ) + @property + def typename(self): + return "unknown" + @property def cache_key(self): raise self @@ -170,6 +177,310 @@ def _leaf_to_dtype(leaf): raise NotNumerical() +_tokenize_typename_pattern = re.compile( + r"(\b([A-Za-z_][A-Za-z_0-9]*)(\s*::\s*[A-Za-z_][A-Za-z_0-9]*)*\b(\s*\*)*|<|>|,)" +) + +_simplify_token_1 = re.compile(r"\s*\*") +_simplify_token_2 = re.compile(r"\s*::\s*") + + +def _simplify_token(token): + return _simplify_token_2.sub("::", _simplify_token_1.sub("*", token.group(0))) + + +def _parse_error(pos, typename, file): + in_file = "" + if file is not None: + in_file = "\nin file {0}".format(file.file_path) + raise ValueError( + """invalid C++ type name syntax at char {0} + + {1} +{2}{3}""".format( + pos, typename, "-" * (4 + pos) + "^", in_file + ) + ) + + +def _parse_expect(what, tokens, i, typename, file): + if i >= len(tokens): + _parse_error(len(typename), typename, file) + + if what is not None and tokens[i].group(0) != what: + _parse_error(tokens[i].start() + 1, typename, file) + + +def _parse_maybe_quote(quoted, quote): + if quote: + return quoted + else: + return eval(quoted) + + +def _parse_node(tokens, i, typename, file, quote, header, inner_header): + _parse_expect(None, tokens, i, typename, file) + + has2 = i + 1 < len(tokens) + + if tokens[i].group(0) == ",": + _parse_error(tokens[i].start() + 1, typename, file) + + elif tokens[i].group(0) == "Bool_t": + return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote) + elif tokens[i].group(0) == "bool": + return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote) + + elif tokens[i].group(0) == "Char_t": + return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote) + elif tokens[i].group(0) == "char": + return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote) + elif tokens[i].group(0) == "UChar_t": + return i + 1, _parse_maybe_quote('numpy.dtype("u1")', quote) + elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "char": + return i + 2, _parse_maybe_quote('numpy.dtype("u1")', quote) + + elif tokens[i].group(0) == "Short_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote) + elif tokens[i].group(0) == "short": + return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote) + elif tokens[i].group(0) == "UShort_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">u2")', quote) + elif ( + has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "short" + ): + return i + 2, _parse_maybe_quote('numpy.dtype(">u2")', quote) + + elif tokens[i].group(0) == "Int_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote) + elif tokens[i].group(0) == "int": + return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote) + elif tokens[i].group(0) == "UInt_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">u4")', quote) + elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "int": + return i + 2, _parse_maybe_quote('numpy.dtype(">u4")', quote) + + elif tokens[i].group(0) == "Long_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) + elif tokens[i].group(0) == "Long64_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) + elif tokens[i].group(0) == "long": + return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) + elif tokens[i].group(0) == "ULong_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote) + elif tokens[i].group(0) == "ULong64_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote) + elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "long": + return i + 2, _parse_maybe_quote('numpy.dtype(">u8")', quote) + + elif tokens[i].group(0) == "Float_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote) + elif tokens[i].group(0) == "float": + return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote) + + elif tokens[i].group(0) == "Double_t": + return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote) + elif tokens[i].group(0) == "double": + return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote) + + elif tokens[i].group(0) == "string" or _simplify_token(tokens[i]) == "std::string": + return ( + i + 1, + _parse_maybe_quote( + "uproot4.stl_containers.AsString({0})".format(header), quote + ), + ) + elif tokens[i].group(0) == "TString": + return ( + i + 1, + _parse_maybe_quote( + "uproot4.stl_containers.AsString(False, typename='TString')", quote + ), + ) + elif _simplify_token(tokens[i]) == "char*": + return ( + i + 1, + _parse_maybe_quote( + "uproot4.stl_containers.AsString(False, size_1to5_bytes=False, typename='char*')", + quote, + ), + ) + elif ( + has2 + and tokens[i].group(0) == "const" + and _simplify_token(tokens[i + 1]) == "char*" + ): + return ( + i + 2, + _parse_maybe_quote( + "uproot4.stl_containers.AsString(False, size_1to5_bytes=False, typename='char*')", + quote, + ), + ) + + elif tokens[i].group(0) == "vector" or _simplify_token(tokens[i]) == "std::vector": + _parse_expect("<", tokens, i + 1, typename, file) + i, values = _parse_node( + tokens, i + 2, typename, file, quote, inner_header, inner_header + ) + _parse_expect(">", tokens, i, typename, file) + if quote: + return ( + i + 1, + "uproot4.stl_containers.AsVector({0}, {1})".format(header, values), + ) + else: + return i + 1, uproot4.stl_containers.AsVector(header, values) + + elif tokens[i].group(0) == "set" or _simplify_token(tokens[i]) == "std::set": + _parse_expect("<", tokens, i + 1, typename, file) + i, keys = _parse_node( + tokens, i + 2, typename, file, quote, inner_header, inner_header + ) + _parse_expect(">", tokens, i, typename, file) + if quote: + return i + 1, "uproot4.stl_containers.AsSet({0}, {1})".format(header, keys) + else: + return i + 1, uproot4.stl_containers.AsSet(header, keys) + + elif tokens[i].group(0) == "map" or _simplify_token(tokens[i]) == "std::map": + _parse_expect("<", tokens, i + 1, typename, file) + i, keys = _parse_node( + tokens, i + 2, typename, file, quote, header, inner_header + ) + _parse_expect(",", tokens, i, typename, file) + i, values = _parse_node( + tokens, i + 1, typename, file, quote, header, inner_header + ) + _parse_expect(">", tokens, i, typename, file) + if quote: + return ( + i + 1, + "uproot4.stl_containers.AsMap({0}, {1}, {2})".format( + header, keys, values + ), + ) + else: + return i + 1, uproot4.stl_containers.AsMap(header, keys, values) + + else: + start, stop = tokens[i].span() + + if has2 and tokens[i + 1].group(0) == "<": + i, keys = _parse_node( + tokens, i + 1, typename, file, quote, inner_header, inner_header + ) + _parse_expect(">", tokens, i + 1, typename, file) + stop = tokens[i + 1].span()[1] + i += 1 + + classname = typename[start:stop] + + if quote: + return "c({0})".format(repr(classname)) + elif file is None: + cls = uproot4.classes[classname] + else: + cls = file.class_named(classname) + + return i + 1, cls + + +def parse_typename( + typename, + file=None, + quote=False, + outer_header=True, + inner_header=False, + string_header=True, +): + tokens = list(_tokenize_typename_pattern.finditer(typename)) + + if ( + string_header + and len(tokens) != 0 + and ( + tokens[0].group(0) == "string" + or _simplify_token(tokens[0]) == "std::string" + ) + ): + i, out = 1, _parse_maybe_quote("uproot4.stl_containers.AsString(False)", quote) + + else: + i, out = _parse_node( + tokens, 0, typename, file, quote, outer_header, inner_header + ) + + if i < len(tokens): + _parse_error(tokens[i].start(), typename, file) + + return out + + +def _parse_node_for_streamer(tokens, i, typename, file): + _parse_expect(None, tokens, i, typename, file) + + has2 = i + 1 < len(tokens) + + if tokens[i].group(0) == ",": + _parse_error(tokens[i].start() + 1, typename, file) + + elif tokens[i].group(0) == "string" or _simplify_token(tokens[i]) == "std::string": + return i + 1, "string" + elif tokens[i].group(0) == "TString": + return i + 1, "TString" + elif _simplify_token(tokens[i]) == "char*": + return i + 1, "char*" + elif ( + has2 + and tokens[i].group(0) == "const" + and _simplify_token(tokens[i + 1]) == "char*" + ): + return i + 2, "char*" + + elif tokens[i].group(0) == "vector" or _simplify_token(tokens[i]) == "std::vector": + _parse_expect("<", tokens, i + 1, typename, file) + i, values = _parse_node_for_streamer(tokens, i + 2, typename, file) + _parse_expect(">", tokens, i, typename, file) + return i + 1, values + + elif tokens[i].group(0) == "set" or _simplify_token(tokens[i]) == "std::set": + _parse_expect("<", tokens, i + 1, typename, file) + i, keys = _parse_node_for_streamer(tokens, i + 2, typename, file) + _parse_expect(">", tokens, i, typename, file) + return i + 1, keys + + elif tokens[i].group(0) == "map" or _simplify_token(tokens[i]) == "std::map": + _parse_expect("<", tokens, i + 1, typename, file) + i, keys = _parse_node_for_streamer(tokens, i + 2, typename, file) + _parse_expect(",", tokens, i, typename, file) + i, values = _parse_node_for_streamer(tokens, i + 1, typename, file) + _parse_expect(">", tokens, i, typename, file) + return i + 1, values + + else: + start, stop = tokens[i].span() + + if has2 and tokens[i + 1].group(0) == "<": + i, keys = _parse_node_for_streamer(tokens, i + 1, typename, file) + _parse_expect(">", tokens, i + 1, typename, file) + stop = tokens[i + 1].span()[1] + i += 1 + + return typename[start:stop] + + +def parse_typename_for_streamer(typename, file): + tokens = list(_tokenize_typename_pattern.finditer(typename)) + + i, out = _parse_node_for_streamer(tokens, 0, typename, file) + + if i < len(tokens): + _parse_error(tokens[i].start(), typename, None) + + return out + + _title_has_dims = re.compile(r"^([^\[\]]+)(\[[^\[\]]+\])+") _item_dim_pattern = re.compile(r"\[([1-9][0-9]*)\]") _item_any_pattern = re.compile(r"\[(.*)\]") @@ -350,14 +661,6 @@ def interpretation_of(branch, context): elif len(branch.member("fLeaves")) == 1: leaf = branch.member("fLeaves")[0] - if isinstance( - branch.streamer, uproot4.streamers.Model_TStreamerObjectPointer - ): - typename = branch.streamer.typename - if typename.endswith("*"): - typename = typename[:-1] - raise NotImplementedError("obj_or_genobj") - leaftype = uproot4.const.kBase if leaf.classname == "TLeafElement": leaftype = _normalize_ftype(leaf.member("fType")) @@ -411,10 +714,10 @@ def interpretation_of(branch, context): except NotNumerical: if ( - branch.has_member("fStreamerType") - and branch.member("fStreamerType") == uproot4.const.kTString + branch.member("fStreamerType", none_if_missing=True) + == uproot4.const.kTString ): - return uproot4.interpretation.strings.AsStrings(size_1to5_bytes=True) + return uproot4.interpretation.strings.AsStrings(typename="TString") if len(branch.member("fLeaves")) != 1: raise UnknownInterpretation( @@ -428,9 +731,35 @@ def interpretation_of(branch, context): leaf = branch.member("fLeaves")[0] if leaf.classname == "TLeafC": - return uproot4.interpretation.strings.AsStrings(size_1to5_bytes=True) + return uproot4.interpretation.strings.AsStrings() + + if branch.top_level and branch.has_member("fClassName"): + model_cls = parse_typename( + branch.member("fClassName"), + file=branch.file, + outer_header=True, + inner_header=False, + string_header=True, + ) + return uproot4.interpretation.objects.AsObjects(model_cls) + + if branch.streamer is not None: + model_cls = parse_typename( + branch.streamer.typename, + file=branch.file, + outer_header=True, + inner_header=False, + string_header=False, + ) + return uproot4.interpretation.objects.AsObjects(model_cls) if leaf.classname == "TLeafElement": raise NotImplementedError + if isinstance(branch.streamer, uproot4.streamers.Model_TStreamerObjectPointer): + typename = branch.streamer.typename + if typename.endswith("*"): + typename = typename[:-1] + raise NotImplementedError("obj_or_genobj") + raise NotImplementedError diff --git a/uproot4/interpretation/jagged.py b/uproot4/interpretation/jagged.py index 4e959408d..d6e9ce282 100644 --- a/uproot4/interpretation/jagged.py +++ b/uproot4/interpretation/jagged.py @@ -59,11 +59,12 @@ def fast_divide(array, divisor): class AsJagged(uproot4.interpretation.Interpretation): - def __init__(self, content, header_bytes=0): + def __init__(self, content, header_bytes=0, typename=None): if not isinstance(content, uproot4.interpretation.numerical.Numerical): raise TypeError("AsJagged content can only be Numerical") self._content = content self._header_bytes = header_bytes + self._typename = typename @property def content(self): @@ -81,6 +82,13 @@ def __repr__(self): repr(self._content), self._header_bytes ) + def __eq__(self, other): + return ( + isinstance(other, AsJagged) + and self._content == other._content + and self._header_bytes == other._header_bytes + ) + @property def numpy_dtype(self): return numpy.dtype(numpy.object) @@ -95,16 +103,32 @@ def cache_key(self): type(self).__name__, self._content.cache_key, self._header_bytes ) - def basket_array(self, data, byte_offsets, basket, branch): + @property + def typename(self): + if self._typename is None: + content = self._content.typename + try: + i = content.index("[") + return content[:i] + "[]" + content[i:] + except ValueError: + return content + "[]" + else: + return self._typename + + def basket_array(self, data, byte_offsets, basket, branch, context): self.hook_before_basket_array( - data=data, byte_offsets=byte_offsets, basket=basket, branch=branch + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, ) assert basket.byte_offsets is not None if self._header_bytes == 0: offsets = fast_divide(basket.byte_offsets, self._content.itemsize) - content = self._content.basket_array(data, None, basket, branch) + content = self._content.basket_array(data, None, basket, branch, context) output = JaggedArray(offsets, content) else: @@ -117,7 +141,7 @@ def basket_array(self, data, byte_offsets, basket, branch): numpy.cumsum(mask, out=mask) data = data[mask.view(numpy.bool_)] - content = self._content.basket_array(data, None, basket, branch) + content = self._content.basket_array(data, None, basket, branch, context) byte_counts = byte_stops - byte_starts counts = fast_divide(byte_counts, self._content.itemsize) @@ -128,11 +152,12 @@ def basket_array(self, data, byte_offsets, basket, branch): output = JaggedArray(offsets, content) - self.hook_before_basket_array( + self.hook_after_basket_array( data=data, byte_offsets=byte_offsets, basket=basket, branch=branch, + context=context, output=output, ) diff --git a/uproot4/interpretation/numerical.py b/uproot4/interpretation/numerical.py index d594662c6..dcd681c78 100644 --- a/uproot4/interpretation/numerical.py +++ b/uproot4/interpretation/numerical.py @@ -114,6 +114,29 @@ def final_array( return output +_numpy_byteorder_to_cache_key = { + "!": "B", + ">": "B", + "<": "L", + "|": "L", + "=": "B" if numpy.dtype(">f8").isnative else "L", +} + +_dtype_kind_itemsize_to_typename = { + ("b", 1): "bool", + ("i", 1): "int8_t", + ("u", 1): "uint8_t", + ("i", 2): "int16_t", + ("u", 2): "uint16_t", + ("i", 4): "int32_t", + ("u", 4): "uint32_t", + ("i", 8): "int64_t", + ("u", 8): "uint64_t", + ("f", 4): "float", + ("f", 8): "double", +} + + class AsDtype(Numerical): def __init__(self, from_dtype, to_dtype=None): self._from_dtype = numpy.dtype(from_dtype) @@ -130,6 +153,13 @@ def __repr__(self): repr(str(self._from_dtype)), repr(str(self._to_dtype)) ) + def __eq__(self, other): + return ( + type(other) is AsDtype + and self._from_dtype == other._from_dtype + and self._to_dtype == other._to_dtype + ) + @property def from_dtype(self): return self._from_dtype @@ -138,58 +168,72 @@ def from_dtype(self): def itemsize(self): return self._from_dtype.itemsize - _numpy_byteorder_to_cache_key = { - "!": "B", - ">": "B", - "<": "L", - "|": "L", - "=": "B" if numpy.dtype(">f8").isnative else "L", - } - @property def cache_key(self): def form(dtype, name): d, s = _dtype_shape(dtype) return "{0}{1}{2}({3}{4})".format( - self._numpy_byteorder_to_cache_key[d.byteorder], + _numpy_byteorder_to_cache_key[d.byteorder], d.kind, d.itemsize, ",".join(repr(x) for x in s), name, ) - if self._from_dtype.names is None: - from_dtype = form(self._from_dtype, "") + if self.from_dtype.names is None: + from_dtype = form(self.from_dtype, "") else: from_dtype = ( "[" + ",".join( - form(self._from_dtype[n], "," + repr(n)) - for n in self._from_dtype.names + form(self.from_dtype[n], "," + repr(n)) + for n in self.from_dtype.names ) + "]" ) - if self._to_dtype.names is None: - to_dtype = form(self._to_dtype, "") + if self.to_dtype.names is None: + to_dtype = form(self.to_dtype, "") else: to_dtype = ( "[" + ",".join( - form(self._to_dtype[n], "," + repr(n)) for n in self._to_dtype.names + form(self.to_dtype[n], "," + repr(n)) for n in self.to_dtype.names ) + "]" ) return "{0}({1},{2})".format(type(self).__name__, from_dtype, to_dtype) - def basket_array(self, data, byte_offsets, basket, branch): + @property + def typename(self): + def form(dtype): + d, s = _dtype_shape(dtype) + return _dtype_kind_itemsize_to_typename[d.kind, d.itemsize] + "".join( + "[" + str(dim) + "]" for dim in s + ) + + if self.from_dtype.names is None: + return form(self.from_dtype) + else: + return ( + "struct {" + + " ".join( + "{0} {1};".format(form(self.from_dtype[n]), n) + for n in self.from_dtype.names + ) + + "}" + ) + + def basket_array(self, data, byte_offsets, basket, branch, context): self.hook_before_basket_array( - data=data, byte_offsets=byte_offsets, basket=basket, branch=branch, + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, ) - assert byte_offsets is None - dtype, shape = _dtype_shape(self._from_dtype) try: output = data.view(dtype).reshape((-1,) + shape) @@ -206,11 +250,12 @@ def basket_array(self, data, byte_offsets, basket, branch): ) ) - self.hook_before_basket_array( + self.hook_after_basket_array( data=data, byte_offsets=byte_offsets, basket=basket, branch=branch, + context=context, output=output, ) diff --git a/uproot4/interpretation/objects.py b/uproot4/interpretation/objects.py index d911cef22..88557cfe5 100644 --- a/uproot4/interpretation/objects.py +++ b/uproot4/interpretation/objects.py @@ -2,8 +2,200 @@ from __future__ import absolute_import +import numpy + import uproot4.interpretation +import uproot4.stl_containers +import uproot4.model +import uproot4.source.chunk +import uproot4.source.cursor +import uproot4._util class ObjectArray(uproot4.interpretation.Interpretation): - pass + def __init__(self, model, branch, context, byte_offsets, byte_content): + self._model = model + self._branch = branch + self._context = context + self._byte_offsets = byte_offsets + self._byte_content = byte_content + + def __repr__(self): + return "ObjectArray({0}, {1}, {2}, {3}, {4})".format( + self._model, + self._branch, + self._context, + self._byte_offsets, + self._byte_content, + ) + + @property + def model(self): + return self._model + + @property + def branch(self): + return self._branch + + @property + def context(self): + return self._context + + @property + def byte_offsets(self): + return self._byte_offsets + + @property + def byte_content(self): + return self._byte_content + + def __len__(self): + return len(self._byte_offsets) - 1 + + def chunk(self, entry_num): + byte_start = self._byte_offsets[entry_num] + byte_stop = self._byte_offsets[entry_num + 1] + data = self._byte_content[byte_start:byte_stop] + return uproot4.source.chunk.Chunk.wrap(self._branch.file.source, data) + + def __getitem__(self, where): + if uproot4._util.isint(where): + chunk = self.chunk(where) + cursor = uproot4.source.cursor.Cursor(0) + return self._model.read( + chunk, cursor, self._context, self._branch.file, self._branch + ) + + elif isinstance(where, slice): + wheres = range(*where.indicies(len(self))) + out = numpy.empty(len(wheres), dtype=numpy.object) + for i in wheres: + out[i] = self[i] + return out + + else: + raise NotImplementedError(repr(where)) + + +class AsObjects(uproot4.interpretation.Interpretation): + def __init__(self, model): + self._model = model + + @property + def model(self): + return self._model + + def __repr__(self): + return "AsObjects({0})".format(repr(self._model)) + + def __eq__(self, other): + return isinstance(other, AsObjects) and self._model == other._model + + @property + def numpy_dtype(self): + return numpy.dtype(numpy.object) + + @property + def awkward_form(self): + raise NotImplementedError + + @property + def cache_key(self): + content_key = uproot4.stl_containers._content_cache_key(self._model) + return "{0}({1})".format(type(self).__name__, content_key) + + @property + def typename(self): + if isinstance(self._model, uproot4.stl_containers.AsSTLContainer): + return self._model.typename + else: + return uproot4.model.classname_decode(self._model.__name__)[0] + + def basket_array(self, data, byte_offsets, basket, branch, context): + self.hook_before_basket_array( + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, + ) + + assert basket.byte_offsets is not None + + output = ObjectArray(self._model, branch, context, byte_offsets, data) + + self.hook_after_basket_array( + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, + output=output, + ) + + return output + + def final_array( + self, basket_arrays, entry_start, entry_stop, entry_offsets, library, branch + ): + self.hook_before_final_array( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + ) + + output = numpy.empty(entry_stop - entry_start, dtype=numpy.dtype(numpy.object)) + + start = entry_offsets[0] + for basket_num, stop in enumerate(entry_offsets[1:]): + if start <= entry_start and entry_stop <= stop: + basket_array = basket_arrays[basket_num] + for global_i in range(entry_start, entry_stop): + local_i = global_i - start + output[global_i] = basket_array[local_i] + + elif start <= entry_start < stop: + basket_array = basket_arrays[basket_num] + for global_i in range(entry_start, stop): + local_i = global_i - start + output[global_i] = basket_array[local_i] + + elif start <= entry_stop <= stop: + basket_array = basket_arrays[basket_num] + for global_i in range(start, entry_stop): + local_i = global_i - start + output[global_i] = basket_array[local_i] + + elif entry_start < stop and start <= entry_stop: + for global_i in range(start, stop): + local_i = global_i - start + output[global_i] = basket_array[local_i] + + start = stop + + self.hook_before_library_finalize( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + output=output, + ) + + output = library.finalize(output, branch) + + self.hook_after_final_array( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + output=output, + ) + + return output diff --git a/uproot4/interpretation/strings.py b/uproot4/interpretation/strings.py index 49f91ee8e..c583f53dd 100644 --- a/uproot4/interpretation/strings.py +++ b/uproot4/interpretation/strings.py @@ -36,9 +36,10 @@ def __len__(self): class AsStrings(uproot4.interpretation.Interpretation): - def __init__(self, header_bytes=0, size_1to5_bytes=False): + def __init__(self, header_bytes=0, size_1to5_bytes=True, typename=None): self._header_bytes = header_bytes self._size_1to5_bytes = size_1to5_bytes + self._typename = typename @property def header_bytes(self): @@ -52,10 +53,24 @@ def __repr__(self): args = [] if self._header_bytes != 0: args.append("header_bytes={0}".format(self._header_bytes)) - if self._size_1to5_bytes is not False: + if self._size_1to5_bytes is not True: args.append("size_1to5_bytes={0}".format(self._size_1to5_bytes)) return "AsStrings({0})".format(", ".join(args)) + def __eq__(self, other): + return ( + isinstance(other, AsStrings) + and self._header_bytes == other._header_bytes + and self._size_1to5_bytes == other._size_1to5_bytes + ) + + @property + def typename(self): + if self._typename is None: + return "char*" + else: + return self._typename + @property def numpy_dtype(self): return numpy.dtype(numpy.object) @@ -70,9 +85,13 @@ def cache_key(self): type(self).__name__, self._header_bytes, self._size_1to5_bytes ) - def basket_array(self, data, byte_offsets, basket, branch): + def basket_array(self, data, byte_offsets, basket, branch, context): self.hook_before_basket_array( - data=data, byte_offsets=byte_offsets, basket=basket, branch=branch + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, ) assert basket.byte_offsets is not None @@ -96,13 +115,18 @@ def basket_array(self, data, byte_offsets, basket, branch): offsets[0] = 0 numpy.cumsum(counts, out=offsets[1:]) - output = StringArray(offsets, uproot4._util.ensure_str(data.tostring())) + if hasattr(data, "tobytes"): + data = data.tobytes() + else: + data = data.tostring() + output = StringArray(offsets, uproot4._util.ensure_str(data)) - self.hook_before_basket_array( + self.hook_after_basket_array( data=data, byte_offsets=byte_offsets, basket=basket, branch=branch, + context=context, output=output, ) diff --git a/uproot4/model.py b/uproot4/model.py index 4cbee096a..ce0c692c0 100644 --- a/uproot4/model.py +++ b/uproot4/model.py @@ -71,7 +71,7 @@ def read(cls, chunk, cursor, context, file, parent): self.hook_after_read_members(chunk=chunk, cursor=cursor, context=context) - self.check_numbytes(cursor, context) + self.check_numbytes(chunk, cursor, context) self.hook_before_postprocess(chunk=chunk, cursor=cursor, context=context) @@ -97,10 +97,11 @@ def read_numbytes_version(self, chunk, cursor, context): def read_members(self, chunk, cursor, context): pass - def check_numbytes(self, cursor, context): + def check_numbytes(self, chunk, cursor, context): import uproot4.deserialization uproot4.deserialization.numbytes_check( + chunk, self._cursor, cursor, self._num_bytes, @@ -187,7 +188,7 @@ def has_member(self, name, bases=True, recursive_bases=True): return True return False - def member(self, name, bases=True, recursive_bases=True): + def member(self, name, bases=True, recursive_bases=True, none_if_missing=False): if name in self._members: return self._members[name] if bases: @@ -203,16 +204,21 @@ def member(self, name, bases=True, recursive_bases=True): if name in base._members: return base._members[name] - raise uproot4.KeyInFileError( - name, - """{0}.{1} has only the following members: + if none_if_missing: + return None + else: + raise uproot4.KeyInFileError( + name, + """{0}.{1} has only the following members: {2} """.format( - type(self).__module__, type(self).__name__, "\n ".join(self._members) - ), - file_path=self._file.file_path, - ) + type(self).__module__, + type(self).__name__, + "\n ".join(self.all_members), + ), + file_path=self._file.file_path, + ) def tojson(self): out = {"_typename": self.classname} diff --git a/uproot4/models/TBasket.py b/uproot4/models/TBasket.py index 2ee063bad..b331597ad 100644 --- a/uproot4/models/TBasket.py +++ b/uproot4/models/TBasket.py @@ -163,7 +163,7 @@ def array(self, interpretation=None): if interpretation is None: interpretation = self._parent.interpretation return interpretation.basket_array( - self.data, self.byte_offsets, self, self.parent + self.data, self.byte_offsets, self, self.parent, self.parent.context ) diff --git a/uproot4/reading.py b/uproot4/reading.py index 3ace81a85..ca86807d3 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -1130,7 +1130,9 @@ def iterclassnames( filter_name = uproot4._util.regularize_filter(filter_name) filter_classname = uproot4._util.regularize_filter(filter_classname) for key in self._keys: - if filter_name(key.fName) and filter_classname(key.fClassName): + if (filter_name is no_filter or filter_name(key.fName)) and ( + filter_classname is no_filter or filter_classname(key.fClassName) + ): yield key.name(cycle=cycle), key.fClassName if recursive and key.fClassName in ("TDirectory", "TDirectoryFile"): @@ -1142,7 +1144,7 @@ def iterclassnames( ): k2 = "{0}/{1}".format(key.name(cycle=False), k1) k3 = k2[: k2.index(";")] if ";" in k2 else k2 - if filter_name(k3): + if filter_name is no_filter or filter_name(k3): yield k2, v def classnames( @@ -1171,7 +1173,9 @@ def iterkeys( filter_name = uproot4._util.regularize_filter(filter_name) filter_classname = uproot4._util.regularize_filter(filter_classname) for key in self._keys: - if filter_name(key.fName) and filter_classname(key.fClassName): + if (filter_name is no_filter or filter_name(key.fName)) and ( + filter_classname is no_filter or filter_classname(key.fClassName) + ): yield key.name(cycle=cycle) if recursive and key.fClassName in ("TDirectory", "TDirectoryFile"): @@ -1183,7 +1187,7 @@ def iterkeys( ): k2 = "{0}/{1}".format(key.name(cycle=False), k1) k3 = k2[: k2.index(";")] if ";" in k2 else k2 - if filter_name(k3): + if filter_name is no_filter or filter_name(k3): yield k2 def keys( @@ -1212,7 +1216,9 @@ def iteritems( filter_name = uproot4._util.regularize_filter(filter_name) filter_classname = uproot4._util.regularize_filter(filter_classname) for key in self._keys: - if filter_name(key.fName) and filter_classname(key.fClassName): + if (filter_name is no_filter or filter_name(key.fName)) and ( + filter_classname is no_filter or filter_classname(key.fClassName) + ): yield key.name(cycle=cycle), key.get() if recursive and key.fClassName in ("TDirectory", "TDirectoryFile"): @@ -1224,7 +1230,7 @@ def iteritems( ): k2 = "{0}/{1}".format(key.name(cycle=False), k1) k3 = k2[: k2.index(";")] if ";" in k2 else k2 - if filter_name(k3): + if filter_name is no_filter or filter_name(k3): yield k2, v def items( diff --git a/uproot4/source/cursor.py b/uproot4/source/cursor.py index 6f2620783..bc98bd22c 100644 --- a/uproot4/source/cursor.py +++ b/uproot4/source/cursor.py @@ -234,7 +234,11 @@ def bytestring(self, chunk, context, move=True): stop = start + length if move: self._index = stop - return chunk.get(start, stop, self, context).tostring() + data = chunk.get(start, stop, self, context) + if hasattr(data, "tobytes"): + return data.tobytes() + else: + return data.tostring() def string(self, chunk, context, move=True): """ @@ -251,6 +255,36 @@ def string(self, chunk, context, move=True): else: return out.decode(errors="surrogateescape") + def bytestring_with_length(self, chunk, context, length, move=True): + """ + Interpret data at this index of the Chunk as an unprefixed, unsuffixed + bytestring with a given length. + + If `move` is False, only peek: don't update the index. + """ + start = self._index + stop = start + length + if move: + self._index = stop + data = chunk.get(start, stop, self, context) + if hasattr(data, "tobytes"): + return data.tobytes() + else: + return data.tostring() + + def string_with_length(self, chunk, context, length, move=True): + """ + Interpret data at this index of the Chunk as an unprefixed, unsuffixed + Python str with a given length. + + If `move` is False, only peek: don't update the index. + """ + out = self.bytestring_with_length(chunk, context, length, move=move) + if uproot4._util.py2: + return out + else: + return out.decode(errors="surrogateescape") + def classname(self, chunk, context, move=True): """ Interpret data at this index of the Chunk as a ROOT class diff --git a/uproot4/stl_containers.py b/uproot4/stl_containers.py index aabfe5f0e..4b506ae16 100644 --- a/uproot4/stl_containers.py +++ b/uproot4/stl_containers.py @@ -2,7 +2,6 @@ from __future__ import absolute_import -import re import types import struct @@ -24,233 +23,51 @@ import uproot4._util import uproot4.model +import uproot4.interpretation.numerical import uproot4.deserialization _stl_container_size = struct.Struct(">I") -_stl_primitive_types = { - numpy.dtype("?"): "bool", - numpy.dtype("i1"): "int8_t", - numpy.dtype("u1"): "uint8_t", - numpy.dtype("i2"): "int16_t", - numpy.dtype(">i2"): "int16_t", - numpy.dtype("u2"): "unt16_t", - numpy.dtype(">u2"): "unt16_t", - numpy.dtype("i4"): "int32_t", - numpy.dtype(">i4"): "int32_t", - numpy.dtype("u4"): "unt32_t", - numpy.dtype(">u4"): "unt32_t", - numpy.dtype("i8"): "int64_t", - numpy.dtype(">i8"): "int64_t", - numpy.dtype("u8"): "unt64_t", - numpy.dtype(">u8"): "unt64_t", - numpy.dtype("f4"): "float", - numpy.dtype(">f4"): "float", - numpy.dtype("f8"): "double", - numpy.dtype(">f8"): "double", -} _stl_object_type = numpy.dtype(numpy.object) -_tokenize_typename_pattern = re.compile( - r"(\b([A-Za-z_][A-Za-z_0-9]*)(\s*::\s*[A-Za-z_][A-Za-z_0-9]*)*\b(\s*\*)*|<|>|,)" -) - -_simplify_token_1 = re.compile(r"\s*\*") -_simplify_token_2 = re.compile(r"\s*::\s*") - - -def _simplify_token(token): - return _simplify_token_2.sub("::", _simplify_token_1.sub("*", token.group(0))) - - -def _parse_error(pos, typename, file): - in_file = "" - if file is not None: - in_file = "\nin file {0}".format(file.file_path) - raise ValueError( - """invalid C++ type name syntax at char {0} - - {1} -{2}{3}""".format( - pos, typename, "-" * (4 + pos) + "^", in_file - ) - ) - - -def _parse_expect(what, tokens, i, typename, file): - if i >= len(tokens): - _parse_error(len(typename), typename, file) - - if what is not None and tokens[i].group(0) != what: - _parse_error(tokens[i].start() + 1, typename, file) - - -def _parse_maybe_quote(quoted, quote): - if quote: - return quoted +def _content_typename(content): + if isinstance(content, numpy.dtype): + return uproot4.interpretation.numerical._dtype_kind_itemsize_to_typename[ + content.kind, content.itemsize + ] + elif isinstance(content, type): + return content.classname else: - return eval(quoted) - - -def _parse_node(tokens, i, typename, file, quote): - _parse_expect(None, tokens, i, typename, file) - - has2 = i + 1 < len(tokens) - - if tokens[i].group(0) == ",": - _parse_error(tokens[i].start() + 1, typename, file) - - elif tokens[i].group(0) == "Bool_t": - return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote) - elif tokens[i].group(0) == "bool": - return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote) - - elif tokens[i].group(0) == "Char_t": - return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote) - elif tokens[i].group(0) == "char": - return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote) - elif tokens[i].group(0) == "UChar_t": - return i + 1, _parse_maybe_quote('numpy.dtype("u1")', quote) - elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "char": - return i + 2, _parse_maybe_quote('numpy.dtype("u1")', quote) - - elif tokens[i].group(0) == "Short_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote) - elif tokens[i].group(0) == "short": - return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote) - elif tokens[i].group(0) == "UShort_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">u2")', quote) - elif ( - has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "short" - ): - return i + 2, _parse_maybe_quote('numpy.dtype(">u2")', quote) - - elif tokens[i].group(0) == "Int_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote) - elif tokens[i].group(0) == "int": - return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote) - elif tokens[i].group(0) == "UInt_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">u4")', quote) - elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "int": - return i + 2, _parse_maybe_quote('numpy.dtype(">u4")', quote) - - elif tokens[i].group(0) == "Long_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) - elif tokens[i].group(0) == "Long64_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) - elif tokens[i].group(0) == "long": - return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote) - elif tokens[i].group(0) == "ULong_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote) - elif tokens[i].group(0) == "ULong64_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote) - elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "long": - return i + 2, _parse_maybe_quote('numpy.dtype(">u8")', quote) - - elif tokens[i].group(0) == "Float_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote) - elif tokens[i].group(0) == "float": - return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote) - - elif tokens[i].group(0) == "Double_t": - return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote) - elif tokens[i].group(0) == "double": - return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote) - - elif tokens[i].group(0) == "string" or _simplify_token(tokens[i]) == "std::string": - return i + 1, _parse_maybe_quote("uproot4.stl_containers.AsString()", quote) - elif tokens[i].group(0) == "TString": - return ( - i + 1, - _parse_maybe_quote("uproot4.stl_containers.AsString(is_stl=False)", quote), - ) - elif _simplify_token(tokens[i]) == "char*": - return ( - i + 1, - _parse_maybe_quote("uproot4.stl_containers.AsString(is_stl=False)", quote), - ) - elif ( - has2 - and tokens[i].group(0) == "const" - and _simplify_token(tokens[i + 1]) == "char*" - ): - return ( - i + 2, - _parse_maybe_quote("uproot4.stl_containers.AsString(is_stl=False)", quote), - ) + return content.typename - elif tokens[i].group(0) == "vector" or _simplify_token(tokens[i]) == "std::vector": - _parse_expect("<", tokens, i + 1, typename, file) - i, values = _parse_node(tokens, i + 2, typename, file, quote) - _parse_expect(">", tokens, i, typename, file) - if quote: - return i + 1, "uproot4.stl_containers.AsVector({0})".format(values) - else: - return i + 1, AsVector(values) - - elif tokens[i].group(0) == "set" or _simplify_token(tokens[i]) == "std::set": - _parse_expect("<", tokens, i + 1, typename, file) - i, keys = _parse_node(tokens, i + 2, typename, file, quote) - _parse_expect(">", tokens, i, typename, file) - if quote: - return i + 1, "uproot4.stl_containers.AsSet({0})".format(keys) - else: - return i + 1, AsSet(keys) - - elif tokens[i].group(0) == "map" or _simplify_token(tokens[i]) == "std::map": - _parse_expect("<", tokens, i + 1, typename, file) - i, keys = _parse_node(tokens, i + 2, typename, file, quote) - _parse_expect(",", tokens, i, typename, file) - i, values = _parse_node(tokens, i + 1, typename, file, quote) - _parse_expect(">", tokens, i, typename, file) - if quote: - return i + 1, "uproot4.stl_containers.AsMap({0}, {1})".format(keys, values) - else: - return i + 1, AsMap(keys, values) +def _content_cache_key(content): + if isinstance(content, numpy.dtype): + bo = uproot4.interpretation.numerical._numpy_byteorder_to_cache_key[ + content.byteorder + ] + return "{0}{1}{2}".format(bo, content.kind, content.itemsize) + elif isinstance(content, type): + return content.__name__ else: - start, stop = tokens[i].span() - - if has2 and tokens[i + 1].group(0) == "<": - i, keys = _parse_node(tokens, i + 1, typename, file, quote) - _parse_expect(">", tokens, i + 1, typename, file) - stop = tokens[i + 1].span()[1] - i += 1 - - classname = typename[start:stop] - - if quote: - return "c({0})".format(repr(classname)) - elif file is None: - cls = uproot4.classes[classname] - else: - cls = file.class_named(classname) - - return i + 1, cls - + return content.cache_key -def parse_typename(typename, file=None, quote=False): - tokens = list(_tokenize_typename_pattern.finditer(typename)) - i, out = _parse_node(tokens, 0, typename, file, quote) - - if i < len(tokens): - _parse_error(tokens[i].start(), typename, file) +def _nested_context(context): + out = dict(context) + out["read_stl_header"] = False return out -def _read_nested(model, length, chunk, cursor, context, file, parent): +def _read_nested(model, length, chunk, cursor, context, file, parent, header=True): if isinstance(model, numpy.dtype): return cursor.array(chunk, length, model, context) - elif isinstance(model, AsSTLContainer): - return model.read(chunk, cursor, context, file, parent, multiplicity=length) - else: values = numpy.empty(length, dtype=_stl_object_type) for i in range(length): - values[i] = model.read(chunk, cursor, context, file, parent) + values[i] = model.read(chunk, cursor, context, file, parent, header=header) return values @@ -303,10 +120,27 @@ def _str_with_ellipsis(tostring, length, lbracket, rbracket, limit): class AsSTLContainer(object): @property - def classname(self): + def header(self): + return self._header + + @header.setter + def header(self, value): + if value is True or value is False: + self._header = value + else: + raise TypeError( + "{0}.header must be True or False".format(type(self).__name__) + ) + + @property + def cache_key(self): raise AssertionError - def read(self, chunk, cursor, context, file, parent, multiplicity=None): + @property + def typename(self): + raise AssertionError + + def read(self, chunk, cursor, context, file, parent, header=True): raise AssertionError def __eq__(self, other): @@ -315,62 +149,78 @@ def __eq__(self, other): def __ne__(self, other): return not self == other + def tolist(self): + raise AssertionError + class STLContainer(object): - pass + def __ne__(self, other): + return not self == other class AsString(AsSTLContainer): - def __init__(self, is_stl=True): - self._is_stl = is_stl - - def __hash__(self): - return hash((AsString, self._is_stl)) + def __init__(self, header, size_1to5_bytes=True, typename=None): + self.header = header + self._typename = typename + self._size_1to5_bytes = size_1to5_bytes @property - def is_stl(self): - return self._is_stl + def size_1to5_bytes(self): + return self._size_1to5_bytes + + def __hash__(self): + return hash((AsString, self._header, self._size_1to5_bytes)) def __repr__(self): - is_stl = "" - if not self._is_stl: - is_stl = "is_stl=False" - return "AsString({0})".format(is_stl) + args = [repr(self._header)] + if self._size_1to5_bytes is not True: + args.append("size_1to5_bytes={0}".format(self._size_1to5_bytes)) + return "AsString({0})".format(", ".join(args)) + + @property + def cache_key(self): + return "AsString({0},{1})".format(self._header, self._size_1to5_bytes) @property - def classname(self): - if self._is_stl: + def typename(self): + if self._typename is None: return "std::string" else: - return "const char*" + return self._typename - def read(self, chunk, cursor, context, file, parent, multiplicity=None): - if self._is_stl: + def read(self, chunk, cursor, context, file, parent, header=True): + if self._header and header: start_cursor = cursor.copy() num_bytes, instance_version = uproot4.deserialization.numbytes_version( chunk, cursor, context ) - if multiplicity is None: + if self._size_1to5_bytes: out = cursor.string(chunk, context) else: - out = numpy.empty(multiplicity, dtype=_stl_object_type) - for i in range(multiplicity): - out[i] = cursor.string(chunk, context) + length = cursor.field(chunk, _stl_container_size, context) + out = cursor.string_with_length(chunk, context, length) - if self._is_stl: + if self._header and header: uproot4.deserialization.numbytes_check( - start_cursor, cursor, num_bytes, self.classname, context, file.file_path + chunk, + start_cursor, + cursor, + num_bytes, + self.typename, + context, + file.file_path, ) return out def __eq__(self, other): - return isinstance(other, AsString) and self.is_stl == other.is_stl + return isinstance(other, AsString) and self.header == other.header class AsVector(AsSTLContainer): - def __init__(self, values): + def __init__(self, header, values): + self.header = header if isinstance(values, AsSTLContainer): self._values = values elif isinstance(values, type) and issubclass(values, uproot4.model.Model): @@ -379,52 +229,69 @@ def __init__(self, values): self._values = numpy.dtype(values) def __hash__(self): - return hash((AsVector, self._values)) + return hash((AsVector, self._header, self._values)) @property def values(self): return self._values def __repr__(self): - return "AsVector({0})".format(repr(self._values)) + return "AsVector({0}, {1})".format(self._header, repr(self._values)) @property - def classname(self): - values = _stl_primitive_types.get(self._values) - if values is None: - values = self._values.classname - return "std::vector<{0}>".format(values) - - def read(self, chunk, cursor, context, file, parent, multiplicity=None): - start_cursor = cursor.copy() - num_bytes, instance_version = uproot4.deserialization.numbytes_version( - chunk, cursor, context + def cache_key(self): + return "AsVector({0},{1})".format( + self._header, _content_cache_key(self._values) ) - length = cursor.field(chunk, _stl_container_size, context) + @property + def typename(self): + return "std::vector<{0}>".format(_content_typename(self._values)) - if multiplicity is None: - values = _read_nested( - self._values, length, chunk, cursor, context, file, parent + def read(self, chunk, cursor, context, file, parent, header=True): + if self._header and header: + start_cursor = cursor.copy() + num_bytes, instance_version = uproot4.deserialization.numbytes_version( + chunk, cursor, context ) - out = STLVector(values) - else: - out = numpy.empty(multiplicity, dtype=_stl_object_type) - for i in range(multiplicity): - values = _read_nested( - self._values, length, chunk, cursor, context, file, parent - ) - out[i] = STLVector(values) - - uproot4.deserialization.numbytes_check( - start_cursor, cursor, num_bytes, self.classname, context, file.file_path, + length = cursor.field(chunk, _stl_container_size, context) + + values = _read_nested( + self._values, length, chunk, cursor, context, file, parent ) + out = STLVector(values) + + if self._header and header: + uproot4.deserialization.numbytes_check( + chunk, + start_cursor, + cursor, + num_bytes, + self.typename, + context, + file.file_path, + ) return out def __eq__(self, other): - return isinstance(other, AsVector) and self.values == other.values + if not isinstance(other, AsVector): + return False + + if self.header != other.header: + return False + + if isinstance(self.values, numpy.dtype) and isinstance( + other.values, numpy.dtype + ): + return self.values == other.values + elif not isinstance(self.values, numpy.dtype) and not isinstance( + other.values, numpy.dtype + ): + return self.values == other.values + else: + return False class STLVector(STLContainer, Sequence): @@ -472,12 +339,16 @@ def __eq__(self, other): else: return False - def __ne__(self, other): - return not self == other + def tolist(self): + return [ + x.tolist() if isinstance(x, (STLContainer, numpy.ndarray)) else x + for x in self + ] class AsSet(AsSTLContainer): - def __init__(self, keys): + def __init__(self, header, keys): + self.header = header if isinstance(keys, AsSTLContainer): self._keys = keys elif isinstance(keys, type) and issubclass(keys, uproot4.model.Model): @@ -486,52 +357,63 @@ def __init__(self, keys): self._keys = numpy.dtype(keys) def __hash__(self): - return hash((AsSet, self._keys)) + return hash((AsSet, self._header, self._keys)) @property def keys(self): return self._keys def __repr__(self): - return "AsSet({0})".format(repr(self._keys)) + return "AsSet({0}, {1})".format(self._header, repr(self._keys)) @property - def classname(self): - keys = _stl_primitive_types.get(self._keys) - if keys is None: - keys = self._keys.classname - return "std::set<{0}>".format(keys) - - def read(self, chunk, cursor, context, file, parent, multiplicity=None): - start_cursor = cursor.copy() - num_bytes, instance_version = uproot4.deserialization.numbytes_version( - chunk, cursor, context - ) + def cache_key(self): + return "AsSet({0},{1})".format(self._header, _content_cache_key(self._keys)) - length = cursor.field(chunk, _stl_container_size, context) + @property + def typename(self): + return "std::set<{0}>".format(_content_typename(self._keys)) - if multiplicity is None: - keys = _read_nested( - self._keys, length, chunk, cursor, context, file, parent + def read(self, chunk, cursor, context, file, parent, header=True): + if self._header and header: + start_cursor = cursor.copy() + num_bytes, instance_version = uproot4.deserialization.numbytes_version( + chunk, cursor, context ) - out = STLSet(keys) - else: - out = numpy.empty(multiplicity, dtype=_stl_object_type) - for i in range(multiplicity): - keys = _read_nested( - self._keys, length, chunk, cursor, context, file, parent - ) - out[i] = STLSet(keys) - - uproot4.deserialization.numbytes_check( - start_cursor, cursor, num_bytes, self.classname, context, file.file_path, - ) + length = cursor.field(chunk, _stl_container_size, context) + + keys = _read_nested(self._keys, length, chunk, cursor, context, file, parent) + out = STLSet(keys) + + if self._header and header: + uproot4.deserialization.numbytes_check( + chunk, + start_cursor, + cursor, + num_bytes, + self.typename, + context, + file.file_path, + ) return out def __eq__(self, other): - return isinstance(other, AsSet) and self.keys == other.keys + if not isinstance(other, AsSet): + return False + + if self.header != other.header: + return False + + if isinstance(self.keys, numpy.dtype) and isinstance(other.keys, numpy.dtype): + return self.keys == other.keys + elif not isinstance(self.keys, numpy.dtype) and not isinstance( + other.keys, numpy.dtype + ): + return self.keys == other.keys + else: + return False class STLSet(STLContainer, Set): @@ -591,12 +473,24 @@ def __eq__(self, other): else: return numpy.all(keys_same) - def __ne__(self, other): - return not self == other + def tolist(self): + return set( + x.tolist() if isinstance(x, (STLContainer, numpy.ndarray)) else x + for x in self + ) + + +def _has_nested_header(obj): + if isinstance(obj, AsSTLContainer): + return obj.header + else: + return False class AsMap(AsSTLContainer): - def __init__(self, keys, values): + def __init__(self, header, keys, values): + self.header = header + if isinstance(keys, AsSTLContainer): self._keys = keys else: @@ -610,7 +504,7 @@ def __init__(self, keys, values): self._values = numpy.dtype(values) def __hash__(self): - return hash((AsMap, self._keys, self._values)) + return hash((AsMap, self._header, self._keys, self._values)) @property def keys(self): @@ -621,60 +515,89 @@ def values(self): return self._values def __repr__(self): - return "AsMap({0}, {1})".format(repr(self._keys), repr(self._values)) + return "AsMap({0}, {1}, {2})".format( + self._header, repr(self._keys), repr(self._values) + ) @property - def classname(self): - keys = _stl_primitive_types.get(self._keys) - if keys is None: - keys = self._keys.classname - values = _stl_primitive_types.get(self._values) - if values is None: - values = self._values.classname - return "std::map<{0}, {1}>".format(keys, values) - - def read(self, chunk, cursor, context, file, parent, multiplicity=None): - start_cursor = cursor.copy() - num_bytes, instance_version = uproot4.deserialization.numbytes_version( - chunk, cursor, context + def cache_key(self): + return "AsMap({0},{1},{2})".format( + self._header, + _content_cache_key(self._keys), + _content_cache_key(self._values), ) - cursor.skip(6) + @property + def typename(self): + return "std::map<{0}, {1}>".format( + _content_typename(self._keys), _content_typename(self._values) + ) + + def read(self, chunk, cursor, context, file, parent, header=True): + if self._header and header: + start_cursor = cursor.copy() + num_bytes, instance_version = uproot4.deserialization.numbytes_version( + chunk, cursor, context + ) + cursor.skip(6) length = cursor.field(chunk, _stl_container_size, context) - if multiplicity is None: - keys = _read_nested( - self._keys, length, chunk, cursor, context, file, parent - ) - values = _read_nested( - self._values, length, chunk, cursor, context, file, parent - ) - out = STLMap(keys, values) + if _has_nested_header(self._keys) and header: + cursor.skip(6) + keys = _read_nested( + self._keys, length, chunk, cursor, context, file, parent, header=False + ) - else: - out = numpy.empty(multiplicity, dtype=_stl_object_type) - for i in range(multiplicity): - keys = _read_nested( - self._keys, length, chunk, cursor, context, file, parent - ) - values = _read_nested( - self._values, length, chunk, cursor, context, file, parent - ) - out[i] = STLMap(keys, values) - - uproot4.deserialization.numbytes_check( - start_cursor, cursor, num_bytes, self.classname, context, file.file_path, + if _has_nested_header(self._values) and header: + cursor.skip(6) + values = _read_nested( + self._values, length, chunk, cursor, context, file, parent, header=False ) + out = STLMap(keys, values) + + if self._header and header: + uproot4.deserialization.numbytes_check( + chunk, + start_cursor, + cursor, + num_bytes, + self.typename, + context, + file.file_path, + ) + return out def __eq__(self, other): - return ( - isinstance(other, AsMap) - and self.keys == other.keys - and self.values == other.values - ) + if not isinstance(other, AsMap): + return False + + if self.header != other.header: + return False + + if isinstance(self.keys, numpy.dtype) and isinstance(other.keys, numpy.dtype): + if self.keys != other.keys: + return False + elif not isinstance(self.keys, numpy.dtype) and not isinstance( + other.keys, numpy.dtype + ): + if self.keys != other.keys: + return False + else: + return False + + if isinstance(self.values, numpy.dtype) and isinstance( + other.values, numpy.dtype + ): + return self.values == other.values + elif not isinstance(self.values, numpy.dtype) and not isinstance( + other.values, numpy.dtype + ): + return self.values == other.values + else: + return False class STLMap(STLContainer, Mapping): @@ -803,5 +726,12 @@ def __eq__(self, other): else: return numpy.logical_and(keys_same, values_same).all() - def __ne__(self, other): - return not self == other + def tolist(self): + out = {} + for i in range(len(self)): + x = self._values[i] + if isinstance(x, (STLContainer, numpy.ndarray)): + out[self._keys[i]] = x.tolist() + else: + out[self._keys[i]] = x + return out diff --git a/uproot4/streamers.py b/uproot4/streamers.py index 2ea1f416d..67e770859 100644 --- a/uproot4/streamers.py +++ b/uproot4/streamers.py @@ -13,6 +13,7 @@ import uproot4.const import uproot4.deserialization import uproot4.models.TNamed +import uproot4.interpretation.identify _canonical_typename_patterns = [ @@ -159,6 +160,10 @@ def __repr__(self): def name(self): return self.member("fName") + @property + def typename(self): + return self.member("fName") + @property def class_version(self): return self._members["fClassVersion"] @@ -195,9 +200,6 @@ def show(self, stream=sys.stdout): def new_class(self, file): class_code = self.class_code() - - print(class_code) - class_name = uproot4.model.classname_encode(self.name, self.class_version) classes = uproot4.model.maybe_custom_classes(file.custom_classes) return uproot4.deserialization.compile_class( @@ -756,10 +758,12 @@ def class_code( member_names, class_flags, ): - stl_container = uproot4.stl_containers.parse_typename(self.typename, quote=True) + stl_container = uproot4.interpretation.identify.parse_typename( + self.typename, quote=True, outer_header=True, inner_header=False + ) read_members.append( " self._members[{0}] = self._stl_container{1}.read(" - "chunk, cursor, context, self._file, self._parent, multiplicity=1)" + "chunk, cursor, context, self._file, self._parent)" "".format(repr(self.name), len(stl_containers)) ) stl_containers.append(stl_container)