From a7d71de3bb7f9a28ff3444d32595983c425ed1db Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 17 Jun 2020 18:40:55 -0500 Subject: [PATCH] Develop more interpretations. (#23) * Develop more interpretations. * Fixed some subtleties in filename handling and remote errors. * Replaced the vertical bar syntax 'myfile.root | mytree' with a colon 'myfile.root : mytree' with the appropriate precautions for URIs and Windows drive letters. * Manage some corner cases in computable expressions. * Implemented the string type and ported test_versions from Uproot3. (Testing 10 years of ROOT TTree versions.) * Placeholders for some more Interpretation-tests. * Worked through some bugs pre-testing Double32. (The demo-double32.root file has the wrong TBranch streamer version.) --- tests/test_0016-interpretations.py | 4 +- tests/test_0023-more-interpretations-1.py | 134 +++ tests/test_versions.py | 1169 +++++++++++++++++++++ uproot4/__init__.py | 2 +- uproot4/_util.py | 86 +- uproot4/behaviors/TBranch.py | 90 +- uproot4/behaviors/TBranchElement.py | 9 + uproot4/compute/python.py | 146 ++- uproot4/deserialization.py | 3 +- uproot4/interpretation/identify.py | 84 +- uproot4/interpretation/strings.py | 205 +++- uproot4/reading.py | 27 +- uproot4/source/chunk.py | 6 +- uproot4/source/file.py | 2 +- uproot4/source/http.py | 5 +- uproot4/source/memmap.py | 4 +- 16 files changed, 1869 insertions(+), 107 deletions(-) create mode 100644 tests/test_0023-more-interpretations-1.py create mode 100644 tests/test_versions.py create mode 100644 uproot4/behaviors/TBranchElement.py diff --git a/tests/test_0016-interpretations.py b/tests/test_0016-interpretations.py index cb722140d..967d993a8 100644 --- a/tests/test_0016-interpretations.py +++ b/tests/test_0016-interpretations.py @@ -81,7 +81,7 @@ def test_recovery(mini): # flat array to recover: filename = skhep_testdata.data_path("uproot-issue21.root") with uproot4.open( - "file:" + filename + " | nllscan/mH", minimal_ttree_metadata=mini + "file:" + filename + " : nllscan/mH", minimal_ttree_metadata=mini ) as branch: basket = branch.basket(0) assert basket.data.view(">f8").tolist()[:10] == [ @@ -107,7 +107,7 @@ def test_recovery(mini): # uproot-from-geant4.root Details: numgood, TrackedRays: Event phi filename = skhep_testdata.data_path("uproot-issue327.root") with uproot4.open( - "file:" + filename + " | DstTree/fTracks.fCharge", minimal_ttree_metadata=mini + "file:" + filename + " : DstTree/fTracks.fCharge", minimal_ttree_metadata=mini ) as branch: basket = branch.basket(0) assert basket.data.view("i1")[:10].tolist() == [ diff --git a/tests/test_0023-more-interpretations-1.py b/tests/test_0023-more-interpretations-1.py new file mode 100644 index 000000000..933b7bef3 --- /dev/null +++ b/tests/test_0023-more-interpretations-1.py @@ -0,0 +1,134 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import sys +import json + +import numpy +import pytest +import skhep_testdata + +import uproot4 +import uproot4.interpretation.library +import uproot4.interpretation.jagged +import uproot4.interpretation.numerical + + +def test_formula_with_dot(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree"] as tree: + assert tree.arrays("P3.Py - 50", library="np")["P3.Py - 50"].tolist() == list( + range(-50, 50) + ) + + +def test_formula_with_slash(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree"] as tree: + assert tree.arrays("get('evt/P3/P3.Py') - 50", library="np")[ + "get('evt/P3/P3.Py') - 50" + ].tolist() == list(range(-50, 50)) + + +def test_formula_with_missing(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree"] as tree: + with pytest.raises(KeyError): + tree.arrays("wonky", library="np") + + +def test_strings1(): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root") + )["sample/str"] as branch: + result = branch.array(library="np") + assert result.tolist() == ["hey-{0}".format(i) for i in range(30)] + + +@pytest.mark.skip(reason="FIXME: implement strings specified by a TStreamer") +def test_strings2(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree/Str"] as branch: + result = branch.array(library="np") + assert result.tolist() == ["evt-{0:03d}".format(i) for i in range(100)] + + +@pytest.mark.skip(reason="FIXME: implement std::string") +def test_strings3(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree/StdStr"] as branch: + result = branch.array(library="np") + assert result.tolist() == ["std-{0:03d}".format(i) for i in range(100)] + + +@pytest.mark.skip(reason="FIXME: implement std::vector") +def test_strings4(): + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree/StlVecStr"] as branch: + result = branch.array(library="np") + assert [result.tolist() for x in result] == [ + ["vec-{0:03d}".format(i)] * (i % 10) for i in range(100) + ] + + +@pytest.mark.skip(reason="FIXME: implement unsplit object") +def test_strings4(): + with uproot4.open(skhep_testdata.data_path("uproot-small-evnt-tree-nosplit.root"))[ + "tree/evt" + ] as branch: + result = branch.array(library="np") + assert [result.member("StlVecStr").tolist() for x in result] == [ + ["vec-{0:03d}".format(i)] * (i % 10) for i in range(100) + ] + + +@pytest.mark.skip(reason="FIXME: implement std::vector>") +def test_strings4(): + with uproot4.open(skhep_testdata.data_path("uproot-vectorVectorDouble.root"))[ + "t/x" + ] as branch: + result = branch.array(library="np") + assert [x.tolist() for x in result] == [ + [], + [[], []], + [[10.0], [], [10.0, 20.0]], + [[20.0, -21.0, -22.0]], + [[200.0], [-201.0], [202.0]], + ] + + +@pytest.mark.skip(reason="FIXME: implement Double32") +def test_double32(): + del uproot4.classes["TBranch"] + del uproot4.classes["TBranchElement"] + + with uproot4.open( + skhep_testdata.data_path("uproot-demo-double32.root"), + )["T"] as t: + + print(t["fD64"].interpretation) + print(t["fF32"].interpretation) + print(t["fI32"].interpretation) + print(t["fI30"].interpretation) + print(t["fI28"].interpretation) + + fD64 = t["fD64"].array(library="np") + fF32 = t["fF32"].array(library="np") + fI32 = t["fI32"].array(library="np") + fI30 = t["fI30"].array(library="np") + fI28 = t["fI28"].array(library="np") + ratio_fF32 = fF32 / fD64 + ratio_fI32 = fI32 / fD64 + ratio_fI30 = fI30 / fD64 + ratio_fI28 = fI28 / fD64 + assert ratio_fF32.min() > 0.9999 and ratio_fF32.max() < 1.0001 + assert ratio_fI32.min() > 0.9999 and ratio_fI32.max() < 1.0001 + assert ratio_fI30.min() > 0.9999 and ratio_fI30.max() < 1.0001 + assert ratio_fI28.min() > 0.9999 and ratio_fI28.max() < 1.0001 diff --git a/tests/test_versions.py b/tests/test_versions.py new file mode 100644 index 000000000..97150b0c7 --- /dev/null +++ b/tests/test_versions.py @@ -0,0 +1,1169 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import numpy +import pytest +import skhep_testdata + +import uproot4 + +truth = { + "n": [ + 0, + 1, + 2, + 3, + 4, + 0, + 1, + 2, + 3, + 4, + 0, + 1, + 2, + 3, + 4, + 0, + 1, + 2, + 3, + 4, + 0, + 1, + 2, + 3, + 4, + 0, + 1, + 2, + 3, + 4, + ], + "b": [ + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + True, + False, + ], + "ab": [ + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + [False, True, False], + [True, False, True], + ], + "Ab": [ + [], + [True], + [True, True], + [True, True, True], + [True, True, True, True], + [], + [False], + [False, False], + [False, False, False], + [False, False, False, False], + [], + [True], + [True, True], + [True, True, True], + [True, True, True, True], + [], + [False], + [False, False], + [False, False, False], + [False, False, False, False], + [], + [True], + [True, True], + [True, True, True], + [True, True, True, True], + [], + [False], + [False, False], + [False, False, False], + [False, False, False, False], + ], + "i1": [ + -15, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -7, + -6, + -5, + -4, + -3, + -2, + -1, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ], + "ai1": [ + [-14, -13, -12], + [-13, -12, -11], + [-12, -11, -10], + [-11, -10, -9], + [-10, -9, -8], + [-9, -8, -7], + [-8, -7, -6], + [-7, -6, -5], + [-6, -5, -4], + [-5, -4, -3], + [-4, -3, -2], + [-3, -2, -1], + [-2, -1, 0], + [-1, 0, 1], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + ], + "Ai1": [ + [], + [-15], + [-15, -13], + [-15, -13, -11], + [-15, -13, -11, -9], + [], + [-10], + [-10, -8], + [-10, -8, -6], + [-10, -8, -6, -4], + [], + [-5], + [-5, -3], + [-5, -3, -1], + [-5, -3, -1, 1], + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + ], + "u1": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + ], + "au1": [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + [16, 17, 18], + [17, 18, 19], + [18, 19, 20], + [19, 20, 21], + [20, 21, 22], + [21, 22, 23], + [22, 23, 24], + [23, 24, 25], + [24, 25, 26], + [25, 26, 27], + [26, 27, 28], + [27, 28, 29], + [28, 29, 30], + [29, 30, 31], + [30, 31, 32], + ], + "Au1": [ + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + [], + [15], + [15, 17], + [15, 17, 19], + [15, 17, 19, 21], + [], + [20], + [20, 22], + [20, 22, 24], + [20, 22, 24, 26], + [], + [25], + [25, 27], + [25, 27, 29], + [25, 27, 29, 31], + ], + "i2": [ + -15, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -7, + -6, + -5, + -4, + -3, + -2, + -1, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ], + "ai2": [ + [-14, -13, -12], + [-13, -12, -11], + [-12, -11, -10], + [-11, -10, -9], + [-10, -9, -8], + [-9, -8, -7], + [-8, -7, -6], + [-7, -6, -5], + [-6, -5, -4], + [-5, -4, -3], + [-4, -3, -2], + [-3, -2, -1], + [-2, -1, 0], + [-1, 0, 1], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + ], + "Ai2": [ + [], + [-15], + [-15, -13], + [-15, -13, -11], + [-15, -13, -11, -9], + [], + [-10], + [-10, -8], + [-10, -8, -6], + [-10, -8, -6, -4], + [], + [-5], + [-5, -3], + [-5, -3, -1], + [-5, -3, -1, 1], + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + ], + "u2": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + ], + "au2": [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + [16, 17, 18], + [17, 18, 19], + [18, 19, 20], + [19, 20, 21], + [20, 21, 22], + [21, 22, 23], + [22, 23, 24], + [23, 24, 25], + [24, 25, 26], + [25, 26, 27], + [26, 27, 28], + [27, 28, 29], + [28, 29, 30], + [29, 30, 31], + [30, 31, 32], + ], + "Au2": [ + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + [], + [15], + [15, 17], + [15, 17, 19], + [15, 17, 19, 21], + [], + [20], + [20, 22], + [20, 22, 24], + [20, 22, 24, 26], + [], + [25], + [25, 27], + [25, 27, 29], + [25, 27, 29, 31], + ], + "i4": [ + -15, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -7, + -6, + -5, + -4, + -3, + -2, + -1, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ], + "ai4": [ + [-14, -13, -12], + [-13, -12, -11], + [-12, -11, -10], + [-11, -10, -9], + [-10, -9, -8], + [-9, -8, -7], + [-8, -7, -6], + [-7, -6, -5], + [-6, -5, -4], + [-5, -4, -3], + [-4, -3, -2], + [-3, -2, -1], + [-2, -1, 0], + [-1, 0, 1], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + ], + "Ai4": [ + [], + [-15], + [-15, -13], + [-15, -13, -11], + [-15, -13, -11, -9], + [], + [-10], + [-10, -8], + [-10, -8, -6], + [-10, -8, -6, -4], + [], + [-5], + [-5, -3], + [-5, -3, -1], + [-5, -3, -1, 1], + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + ], + "u4": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + ], + "au4": [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + [16, 17, 18], + [17, 18, 19], + [18, 19, 20], + [19, 20, 21], + [20, 21, 22], + [21, 22, 23], + [22, 23, 24], + [23, 24, 25], + [24, 25, 26], + [25, 26, 27], + [26, 27, 28], + [27, 28, 29], + [28, 29, 30], + [29, 30, 31], + [30, 31, 32], + ], + "Au4": [ + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + [], + [15], + [15, 17], + [15, 17, 19], + [15, 17, 19, 21], + [], + [20], + [20, 22], + [20, 22, 24], + [20, 22, 24, 26], + [], + [25], + [25, 27], + [25, 27, 29], + [25, 27, 29, 31], + ], + "i8": [ + -15, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -7, + -6, + -5, + -4, + -3, + -2, + -1, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ], + "ai8": [ + [-14, -13, -12], + [-13, -12, -11], + [-12, -11, -10], + [-11, -10, -9], + [-10, -9, -8], + [-9, -8, -7], + [-8, -7, -6], + [-7, -6, -5], + [-6, -5, -4], + [-5, -4, -3], + [-4, -3, -2], + [-3, -2, -1], + [-2, -1, 0], + [-1, 0, 1], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + ], + "Ai8": [ + [], + [-15], + [-15, -13], + [-15, -13, -11], + [-15, -13, -11, -9], + [], + [-10], + [-10, -8], + [-10, -8, -6], + [-10, -8, -6, -4], + [], + [-5], + [-5, -3], + [-5, -3, -1], + [-5, -3, -1, 1], + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + ], + "u8": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + ], + "au8": [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [5, 6, 7], + [6, 7, 8], + [7, 8, 9], + [8, 9, 10], + [9, 10, 11], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15], + [14, 15, 16], + [15, 16, 17], + [16, 17, 18], + [17, 18, 19], + [18, 19, 20], + [19, 20, 21], + [20, 21, 22], + [21, 22, 23], + [22, 23, 24], + [23, 24, 25], + [24, 25, 26], + [25, 26, 27], + [26, 27, 28], + [27, 28, 29], + [28, 29, 30], + [29, 30, 31], + [30, 31, 32], + ], + "Au8": [ + [], + [0], + [0, 2], + [0, 2, 4], + [0, 2, 4, 6], + [], + [5], + [5, 7], + [5, 7, 9], + [5, 7, 9, 11], + [], + [10], + [10, 12], + [10, 12, 14], + [10, 12, 14, 16], + [], + [15], + [15, 17], + [15, 17, 19], + [15, 17, 19, 21], + [], + [20], + [20, 22], + [20, 22, 24], + [20, 22, 24, 26], + [], + [25], + [25, 27], + [25, 27, 29], + [25, 27, 29, 31], + ], + "f4": [ + -14.899999618530273, + -13.899999618530273, + -12.899999618530273, + -11.899999618530273, + -10.899999618530273, + -9.899999618530273, + -8.899999618530273, + -7.900000095367432, + -6.900000095367432, + -5.900000095367432, + -4.900000095367432, + -3.9000000953674316, + -2.9000000953674316, + -1.899999976158142, + -0.8999999761581421, + 0.10000000149011612, + 1.100000023841858, + 2.0999999046325684, + 3.0999999046325684, + 4.099999904632568, + 5.099999904632568, + 6.099999904632568, + 7.099999904632568, + 8.100000381469727, + 9.100000381469727, + 10.100000381469727, + 11.100000381469727, + 12.100000381469727, + 13.100000381469727, + 14.100000381469727, + ], + "af4": [ + [-13.899999618530273, -12.899999618530273, -11.899999618530273], + [-12.899999618530273, -11.899999618530273, -10.899999618530273], + [-11.899999618530273, -10.899999618530273, -9.899999618530273], + [-10.899999618530273, -9.899999618530273, -8.899999618530273], + [-9.899999618530273, -8.899999618530273, -7.900000095367432], + [-8.899999618530273, -7.900000095367432, -6.900000095367432], + [-7.900000095367432, -6.900000095367432, -5.900000095367432], + [-6.900000095367432, -5.900000095367432, -4.900000095367432], + [-5.900000095367432, -4.900000095367432, -3.9000000953674316], + [-4.900000095367432, -3.9000000953674316, -2.9000000953674316], + [-3.9000000953674316, -2.9000000953674316, -1.899999976158142], + [-2.9000000953674316, -1.899999976158142, -0.8999999761581421], + [-1.899999976158142, -0.8999999761581421, 0.10000000149011612], + [-0.8999999761581421, 0.10000000149011612, 1.100000023841858], + [0.10000000149011612, 1.100000023841858, 2.0999999046325684], + [1.100000023841858, 2.0999999046325684, 3.0999999046325684], + [2.0999999046325684, 3.0999999046325684, 4.099999904632568], + [3.0999999046325684, 4.099999904632568, 5.099999904632568], + [4.099999904632568, 5.099999904632568, 6.099999904632568], + [5.099999904632568, 6.099999904632568, 7.099999904632568], + [6.099999904632568, 7.099999904632568, 8.100000381469727], + [7.099999904632568, 8.100000381469727, 9.100000381469727], + [8.100000381469727, 9.100000381469727, 10.100000381469727], + [9.100000381469727, 10.100000381469727, 11.100000381469727], + [10.100000381469727, 11.100000381469727, 12.100000381469727], + [11.100000381469727, 12.100000381469727, 13.100000381469727], + [12.100000381469727, 13.100000381469727, 14.100000381469727], + [13.100000381469727, 14.100000381469727, 15.100000381469727], + [14.100000381469727, 15.100000381469727, 16.100000381469727], + [15.100000381469727, 16.100000381469727, 17.100000381469727], + ], + "Af4": [ + [], + [-15.0], + [-15.0, -13.899999618530273], + [-15.0, -13.899999618530273, -12.800000190734863], + [-15.0, -13.899999618530273, -12.800000190734863, -11.699999809265137], + [], + [-10.0], + [-10.0, -8.899999618530273], + [-10.0, -8.899999618530273, -7.800000190734863], + [-10.0, -8.899999618530273, -7.800000190734863, -6.699999809265137], + [], + [-5.0], + [-5.0, -3.9000000953674316], + [-5.0, -3.9000000953674316, -2.799999952316284], + [-5.0, -3.9000000953674316, -2.799999952316284, -1.7000000476837158], + [], + [0.0], + [0.0, 1.100000023841858], + [0.0, 1.100000023841858, 2.200000047683716], + [0.0, 1.100000023841858, 2.200000047683716, 3.299999952316284], + [], + [5.0], + [5.0, 6.099999904632568], + [5.0, 6.099999904632568, 7.199999809265137], + [5.0, 6.099999904632568, 7.199999809265137, 8.300000190734863], + [], + [10.0], + [10.0, 11.100000381469727], + [10.0, 11.100000381469727, 12.199999809265137], + [10.0, 11.100000381469727, 12.199999809265137, 13.300000190734863], + ], + "f8": [ + -14.9, + -13.9, + -12.9, + -11.9, + -10.9, + -9.9, + -8.9, + -7.9, + -6.9, + -5.9, + -4.9, + -3.9000000000000004, + -2.9000000000000004, + -1.9000000000000004, + -0.9000000000000004, + 0.09999999999999964, + 1.0999999999999996, + 2.0999999999999996, + 3.0999999999999996, + 4.1, + 5.1, + 6.1, + 7.1, + 8.1, + 9.1, + 10.1, + 11.1, + 12.1, + 13.1, + 14.1, + ], + "af8": [ + [-13.9, -12.9, -11.9], + [-12.9, -11.9, -10.9], + [-11.9, -10.9, -9.9], + [-10.9, -9.9, -8.9], + [-9.9, -8.9, -7.9], + [-8.9, -7.9, -6.9], + [-7.9, -6.9, -5.9], + [-6.9, -5.9, -4.9], + [-5.9, -4.9, -3.9000000000000004], + [-4.9, -3.9000000000000004, -2.9000000000000004], + [-3.9000000000000004, -2.9000000000000004, -1.9000000000000004], + [-2.9000000000000004, -1.9000000000000004, -0.9000000000000004], + [-1.9000000000000004, -0.9000000000000004, 0.09999999999999964], + [-0.9000000000000004, 0.09999999999999964, 1.0999999999999996], + [0.09999999999999964, 1.0999999999999996, 2.0999999999999996], + [1.0999999999999996, 2.0999999999999996, 3.0999999999999996], + [2.0999999999999996, 3.0999999999999996, 4.1], + [3.0999999999999996, 4.1, 5.1], + [4.1, 5.1, 6.1], + [5.1, 6.1, 7.1], + [6.1, 7.1, 8.1], + [7.1, 8.1, 9.1], + [8.1, 9.1, 10.1], + [9.1, 10.1, 11.1], + [10.1, 11.1, 12.1], + [11.1, 12.1, 13.1], + [12.1, 13.1, 14.1], + [13.1, 14.1, 15.1], + [14.1, 15.1, 16.1], + [15.1, 16.1, 17.1], + ], + "Af8": [ + [], + [-15.0], + [-15.0, -13.9], + [-15.0, -13.9, -12.8], + [-15.0, -13.9, -12.8, -11.7], + [], + [-10.0], + [-10.0, -8.9], + [-10.0, -8.9, -7.8], + [-10.0, -8.9, -7.8, -6.7], + [], + [-5.0], + [-5.0, -3.9], + [-5.0, -3.9, -2.8], + [-5.0, -3.9, -2.8, -1.7], + [], + [0.0], + [0.0, 1.1], + [0.0, 1.1, 2.2], + [0.0, 1.1, 2.2, 3.3], + [], + [5.0], + [5.0, 6.1], + [5.0, 6.1, 7.2], + [5.0, 6.1, 7.2, 8.3], + [], + [10.0], + [10.0, 11.1], + [10.0, 11.1, 12.2], + [10.0, 11.1, 12.2, 13.3], + ], + "str": [ + "hey-0", + "hey-1", + "hey-2", + "hey-3", + "hey-4", + "hey-5", + "hey-6", + "hey-7", + "hey-8", + "hey-9", + "hey-10", + "hey-11", + "hey-12", + "hey-13", + "hey-14", + "hey-15", + "hey-16", + "hey-17", + "hey-18", + "hey-19", + "hey-20", + "hey-21", + "hey-22", + "hey-23", + "hey-24", + "hey-25", + "hey-26", + "hey-27", + "hey-28", + "hey-29", + ], +} + + +@pytest.mark.parametrize( + "version", + [ + "5.23.02", # 2009-02-26, TTree version 16, TBranch version 11 + "5.24.00", # 2009-06-30, TTree version 16, TBranch version 11 + "5.25.02", # 2009-10-01, TTree version 17, TBranch version 12 + "5.26.00", # 2009-12-14, TTree version 18, TBranch version 12 + "5.27.02", # 2010-04-27, TTree version 18, TBranch version 12 + "5.28.00", # 2010-12-15, TTree version 18, TBranch version 12 + "5.29.02", # 2011-04-21, TTree version 18, TBranch version 12 + "5.30.00", # 2011-06-28, TTree version 19, TBranch version 12 + "6.08.04", # 2017-01-13, TTree version 19, TBranch version 12 + "6.10.05", # 2017-07-28, TTree version 19, TBranch version 12 is this 6.10.04? + "6.14.00", # 2018-06-13, TTree version 20, TBranch version 13 + "6.16.00", # 2019-01-23, TTree version 20, TBranch version 13 + "6.18.00", # 2019-06-25, TTree version 20, TBranch version 13 + "6.20.04", # 2020-04-01, TTree version 20, TBranch version 13 + ], +) +def test(version): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-{0}-uncompressed.root".format(version)) + )["sample"] as sample: + arrays = sample.arrays(sample.keys(), library="np") + + assert set(arrays.keys()) == set(truth.keys()) + for key in truth.keys(): + if isinstance( + sample[key].interpretation, uproot4.interpretation.jagged.AsJagged + ): + assert [row.tolist() for row in arrays[key]] == truth[key] + else: + assert arrays[key].tolist() == truth[key] + + assert sample.file._streamers is None diff --git a/uproot4/__init__.py b/uproot4/__init__.py index cf5c6013b..3ef1fe544 100644 --- a/uproot4/__init__.py +++ b/uproot4/__init__.py @@ -111,7 +111,7 @@ def __str__(self): if self.object_path is None: object_path = "" else: - object_path = " at {0}".format(self.object_path) + object_path = "\nin object {0}".format(self.object_path) if self.cycle == "any": return """not found: {0} (with any cycle number){1} diff --git a/uproot4/_util.py b/uproot4/_util.py index 0f8186234..de08b7329 100644 --- a/uproot4/_util.py +++ b/uproot4/_util.py @@ -148,41 +148,87 @@ def regularize_filter(filter): ) +def regularize_path(path): + if isinstance(path, getattr(os, "PathLike", ())): + path = os.fspath(path) + + elif hasattr(path, "__fspath__"): + path = path.__fspath__() + + elif path.__class__.__module__ == "pathlib": + import pathlib + + if isinstance(path, pathlib.Path): + path = str(path) + + return path + + +_windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$") _windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:\\") +_windows_absolute_path_pattern_slash = re.compile(r"^/[A-Za-z]:\\") + +def file_object_path_split(path): + path = regularize_path(path) -def path_to_source_class(file_path, options): - if isinstance(file_path, getattr(os, "PathLike", ())): - file_path = os.fspath(file_path) + try: + index = path.rindex(":") + except ValueError: + return path, None + else: + file_path, object_path = path[:index], path[index + 1 :] + file_path = file_path.rstrip() + object_path = object_path.lstrip() + + if file_path.upper() in ("FILE", "HTTP", "HTTPS", "ROOT"): + return path, None + elif ( + os.name == "nt" + and _windows_drive_letter_ending.match(file_path) is not None + ): + return path, None + else: + return file_path, object_path - elif hasattr(file_path, "__fspath__"): - file_path = file_path.__fspath__() - elif file_path.__class__.__module__ == "pathlib": - import pathlib +def file_path_to_source_class(file_path, options): + file_path = regularize_path(file_path) - if isinstance(file_path, pathlib.Path): - file_path = str(file_path) + windows_absolute_path = None + + if os.name == "nt": + if _windows_absolute_path_pattern.match(file_path) is not None: + windows_absolute_path = file_path - windows_absolute_path = ( - os.name == "nt" and _windows_absolute_path_pattern.match(file_path) is not None - ) parsed_url = urlparse(file_path) + if os.name == "nt" and windows_absolute_path is None: + if _windows_absolute_path_pattern.match(parsed_url.path) is not None: + windows_absolute_path = parsed_url.path + elif _windows_absolute_path_pattern_slash.match(parsed_url.path) is not None: + windows_absolute_path = parsed_url.path[1:] + if ( - parsed_url.scheme == "file" + parsed_url.scheme.upper() == "FILE" or len(parsed_url.scheme) == 0 or windows_absolute_path ): - if not windows_absolute_path: - file_path = parsed_url.netloc + parsed_url.path - return options["file_handler"] + if windows_absolute_path is None: + if parsed_url.netloc.upper() == "LOCALHOST": + file_path = parsed_url.path + else: + file_path = parsed_url.netloc + parsed_url.path + else: + file_path = windows_absolute_path + + return options["file_handler"], os.path.expanduser(file_path) - elif parsed_url.scheme == "root": - return options["xrootd_handler"] + elif parsed_url.scheme.upper() == "ROOT": + return options["xrootd_handler"], file_path - elif parsed_url.scheme == "http" or parsed_url.scheme == "https": - return options["http_handler"] + elif parsed_url.scheme.upper() == "HTTP" or parsed_url.scheme.upper() == "HTTPS": + return options["http_handler"], file_path else: raise ValueError("URI scheme not recognized: {0}".format(file_path)) diff --git a/uproot4/behaviors/TBranch.py b/uproot4/behaviors/TBranch.py index 5f028de7c..bd2ddb895 100644 --- a/uproot4/behaviors/TBranch.py +++ b/uproot4/behaviors/TBranch.py @@ -160,6 +160,7 @@ def _regularize_branchname( def _regularize_expression( hasbranches, expression, + keys, aliases, compute, get_from_cache, @@ -194,7 +195,11 @@ def _regularize_expression( is_jagged = False for symbol in compute.free_symbols( - to_compute, aliases, hasbranches.file.file_path, hasbranches.object_path, + to_compute, + keys, + aliases, + hasbranches.file.file_path, + hasbranches.object_path, ): if symbol in symbol_path: raise ValueError( @@ -215,6 +220,7 @@ def _regularize_expression( _regularize_expression( hasbranches, symbol, + keys, aliases, compute, get_from_cache, @@ -242,6 +248,7 @@ def _regularize_expressions( filter_name, filter_typename, filter_branch, + keys, aliases, compute, get_from_cache, @@ -256,24 +263,30 @@ def _regularize_expressions( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=True, ): - _regularize_branchname( - hasbranches, - branchname, - branch, + if not isinstance( branch.interpretation, - get_from_cache, - arrays, - expression_context, - branchid_interpretation, - True, - False, - ) + uproot4.interpretation.identify.UnknownInterpretation, + ): + _regularize_branchname( + hasbranches, + branchname, + branch, + branch.interpretation, + get_from_cache, + arrays, + expression_context, + branchid_interpretation, + True, + False, + ) elif uproot4._util.isstr(expressions): _regularize_expression( hasbranches, expressions, + keys, aliases, compute, get_from_cache, @@ -306,6 +319,7 @@ def _regularize_expressions( _regularize_expression( hasbranches, expression, + keys, aliases, compute, get_from_cache, @@ -345,6 +359,7 @@ def _regularize_expressions( _regularize_expression( hasbranches, cut, + keys, aliases, compute, get_from_cache, @@ -504,12 +519,14 @@ def __getitem__(self, where): if "/" in where: where = "/".join([x for x in where.split("/") if x != ""]) - for k, v in self.iteritems(recursive=True): + for k, v in self.iteritems(recursive=True, full_paths=True): if where == k: self._lookup[original_where] = v return v else: - raise uproot4.KeyInFileError(original_where, self._file.file_path) + raise uproot4.KeyInFileError( + original_where, self._file.file_path, object_path=self.object_path + ) elif recursive: got = _get_recursive(self, where) @@ -517,7 +534,9 @@ def __getitem__(self, where): self._lookup[original_where] = got return got else: - raise uproot4.KeyInFileError(original_where, self._file.file_path) + raise uproot4.KeyInFileError( + original_where, self._file.file_path, object_path=self.object_path + ) else: for branch in self.branches: @@ -525,7 +544,9 @@ def __getitem__(self, where): self._lookup[original_where] = branch return branch else: - raise uproot4.KeyInFileError(original_where, self._file.file_path) + raise uproot4.KeyInFileError( + original_where, self._file.file_path, object_path=self.object_path + ) def iteritems( self, @@ -533,6 +554,7 @@ def iteritems( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): filter_name = uproot4._util.regularize_filter(filter_name) filter_typename = uproot4._util.regularize_filter(filter_typename) @@ -560,8 +582,12 @@ def iteritems( filter_name=no_filter, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ): - k2 = "{0}/{1}".format(branch.name, k1) + if full_paths: + k2 = "{0}/{1}".format(branch.name, k1) + else: + k2 = k1 if filter_name(k2): yield k2, v @@ -571,6 +597,7 @@ def items( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): return list( self.iteritems( @@ -578,6 +605,7 @@ def items( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ) ) @@ -587,12 +615,14 @@ def iterkeys( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): for k, v in self.iteritems( recursive=recursive, filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ): yield k @@ -602,6 +632,7 @@ def keys( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): return list( self.iterkeys( @@ -609,6 +640,7 @@ def keys( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ) ) @@ -628,6 +660,7 @@ def itervalues( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=False, ): yield v @@ -644,6 +677,7 @@ def values( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=False, ) ) @@ -653,12 +687,14 @@ def itertypenames( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): for k, v in self.iteritems( recursive=recursive, filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ): yield k, v.typename @@ -668,6 +704,7 @@ def typenames( filter_name=no_filter, filter_typename=no_filter, filter_branch=no_filter, + full_paths=True, ): return dict( self.itertypenames( @@ -675,6 +712,7 @@ def typenames( filter_name=filter_name, filter_typename=filter_typename, filter_branch=filter_branch, + full_paths=full_paths, ) ) @@ -725,6 +763,7 @@ def get_from_cache(branchname, interpretation): else: return None + keys = set(self.keys(recursive=True, full_paths=False)) aliases = _regularize_aliases(self, aliases) arrays, expression_context, branchid_interpretation = _regularize_expressions( self, @@ -733,6 +772,7 @@ def get_from_cache(branchname, interpretation): filter_name, filter_typename, filter_branch, + keys, aliases, compute, get_from_cache, @@ -776,7 +816,12 @@ def get_from_cache(branchname, interpretation): array_cache[cache_key] = arrays[id(branch)] output = compute.compute_expressions( - arrays, expression_context, aliases, self.file.file_path, self.object_path, + arrays, + expression_context, + keys, + aliases, + self.file.file_path, + self.object_path, ) expression_context = [ @@ -947,9 +992,12 @@ def streamer(self): @property def interpretation(self): if self._interpretation is None: - self._interpretation = uproot4.interpretation.identify.interpretation_of( - self, {} - ) + try: + self._interpretation = uproot4.interpretation.identify.interpretation_of( + self, {} + ) + except uproot4.interpretation.identify.UnknownInterpretation as err: + self._interpretation = err return self._interpretation @property diff --git a/uproot4/behaviors/TBranchElement.py b/uproot4/behaviors/TBranchElement.py new file mode 100644 index 000000000..5fc9a9ef6 --- /dev/null +++ b/uproot4/behaviors/TBranchElement.py @@ -0,0 +1,9 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import uproot4.behaviors.TBranch + + +class TBranchElement(uproot4.behaviors.TBranch.TBranch): + pass diff --git a/uproot4/compute/python.py b/uproot4/compute/python.py index 580abcf59..4451ea3a5 100644 --- a/uproot4/compute/python.py +++ b/uproot4/compute/python.py @@ -12,13 +12,13 @@ def _expression_to_node(expression, file_path, object_path): node = ast.parse(expression) except SyntaxError as err: raise SyntaxError( - err.args[0] + "\nin file {0} at {1}".format(file_path, object_path), + err.args[0] + "\nin file {0}\nin object {1}".format(file_path, object_path), err.args[1], ) if len(node.body) != 1 or not isinstance(node.body[0], ast.Expr): raise SyntaxError( - "expected a single expression\nin file {0} at {1}".format( + "expected a single expression\nin file {0}\nin object {1}".format( file_path, object_path ) ) @@ -41,76 +41,122 @@ def _attribute_to_dotted_name(node): return None -def _walk_ast_yield_symbols(node, aliases, functions): - if isinstance(node, ast.Name): - if node.id not in functions: +def _walk_ast_yield_symbols(node, keys, aliases, functions, getter): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == getter + ): + if len(node.args) == 1 and isinstance(node.args[0], ast.Str): + yield node.args[0].s + else: + raise TypeError( + "expected a constant string as the only argument of {0}; " + "found {1}".format(repr(getter), ast.dump(node.args)) + ) + + elif isinstance(node, ast.Name): + if node.id in keys or node.id in aliases: yield node.id + elif node.id in functions or node.id == getter: + pass + else: + raise KeyError(node.id) elif isinstance(node, ast.Attribute): name = _attribute_to_dotted_name(node) if name is None: - for y in _walk_ast_yield_symbols(node.value, aliases, functions): + for y in _walk_ast_yield_symbols( + node.value, keys, aliases, functions, getter + ): yield y - else: + elif name in keys or name in aliases: yield name + else: + # implicitly means functions and getter can't have dots in their names + raise KeyError(name) elif isinstance(node, ast.AST): for field_name in node._fields: x = getattr(node, field_name) - for y in _walk_ast_yield_symbols(x, aliases, functions): + for y in _walk_ast_yield_symbols(x, keys, aliases, functions, getter): yield y elif isinstance(node, list): for x in node: - for y in _walk_ast_yield_symbols(x, aliases, functions): + for y in _walk_ast_yield_symbols(x, keys, aliases, functions, getter): yield y else: pass -def _ast_as_branch_expression(node, aliases, functions): - if isinstance(node, ast.Name): - if node.id in aliases: - return ast.parse("get_alias({0})".format(repr(node.id))).body[0].value +def _ast_as_branch_expression(node, keys, aliases, functions, getter): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == getter + and len(node.args) == 1 + and isinstance(node.args[0], ast.Str) + ): + return node + + elif isinstance(node, ast.Name): + if node.id in keys or node.id in aliases: + return ast.parse("get({0})".format(repr(node.id))).body[0].value elif node.id in functions: - return ast.parse("functions[{0}]".format(repr(node.id))).body[0].value + return ast.parse("function[{0}]".format(repr(node.id))).body[0].value else: - return ast.parse("arrays[{0}]".format(repr(node.id))).body[0].value + raise KeyError(node.id) elif isinstance(node, ast.Attribute): name = _attribute_to_dotted_name(node) if name is None: - value = _ast_as_branch_expression(node.value, aliases, functions) + value = _ast_as_branch_expression( + node.value, keys, aliases, functions, getter + ) new_node = ast.Attribute(value, node.attr, node.ctx) new_node.lineno = getattr(node, "lineno", 1) new_node.col_offset = getattr(node, "col_offset", 0) return new_node + elif name in keys or name in aliases: + return ast.parse("get({0})".format(repr(name))).body[0].value else: - return ast.parse("arrays[{0}]".format(repr(name))).body[0].value + # implicitly means functions and getter can't have dots in their names + raise KeyError(name) elif isinstance(node, ast.AST): args = [] for field_name in node._fields: field_value = getattr(node, field_name) - args.append(_ast_as_branch_expression(field_value, aliases, functions)) + args.append( + _ast_as_branch_expression(field_value, keys, aliases, functions, getter) + ) new_node = type(node)(*args) new_node.lineno = getattr(node, "lineno", 1) new_node.col_offset = getattr(node, "col_offset", 0) return new_node elif isinstance(node, list): - return [_ast_as_branch_expression(x, aliases, functions) for x in node] + return [ + _ast_as_branch_expression(x, keys, aliases, functions, getter) for x in node + ] else: return node def _expression_to_function( - expression, aliases, functions, scope, file_path, object_path + expression, keys, aliases, functions, getter, scope, file_path, object_path ): node = _expression_to_node(expression, file_path, object_path) - expr = _ast_as_branch_expression(node.body[0].value, aliases, functions) + try: + expr = _ast_as_branch_expression( + node.body[0].value, keys, aliases, functions, getter + ) + except KeyError as err: + raise uproot4.KeyInFileError(err.args[0], file_path, object_path=object_path) + function = ast.parse("lambda: None").body[0].value function.body = expr expression = ast.Expression(function) @@ -237,55 +283,85 @@ class ComputePython(uproot4.compute.Compute): "where": numpy.where, } - def __init__(self, functions=None): + def __init__(self, functions=None, getter="get"): if functions is None: self._functions = self.default_functions else: self._functions = dict(functions) + self._getter = getter @property def functions(self): return self._functions - def free_symbols(self, expression, aliases, file_path, object_path): + @property + def getter(self): + return self._getter + + def free_symbols(self, expression, keys, aliases, file_path, object_path): node = _expression_to_node(expression, file_path, object_path) - return _walk_ast_yield_symbols(node, aliases, self._functions) + try: + return list( + _walk_ast_yield_symbols( + node, keys, aliases, self._functions, self._getter + ) + ) + except KeyError as err: + raise uproot4.KeyInFileError( + err.args[0], file_path, object_path=object_path + ) def compute_expressions( - self, arrays, expression_context, aliases, file_path, object_path + self, arrays, expression_context, keys, aliases, file_path, object_path ): - alias_values = {} + values = {} - def get_alias(alias_name): - if alias_name not in alias_values: - alias_values[alias_name] = _expression_to_function( - aliases[alias_name], + def getter(name): + if name not in values: + values[name] = _expression_to_function( + aliases[name], + keys, aliases, self._functions, + self._getter, scope, file_path, object_path, )() - return alias_values[alias_name] + return values[name] - scope = {"arrays": {}, "get_alias": get_alias, "functions": self._functions} + scope = {self._getter: getter, "function": self._functions} for expression, context in expression_context: branch = context.get("branch") if branch is not None: - scope["arrays"][expression] = arrays[id(branch)] + values[expression] = arrays[id(branch)] output = {} for expression, context in expression_context: if context["is_primary"] and not context["is_cut"]: output[expression] = _expression_to_function( - expression, aliases, self._functions, scope, file_path, object_path, + expression, + keys, + aliases, + self._functions, + self._getter, + scope, + file_path, + object_path, )() cut = None for expression, context in expression_context: if context["is_primary"] and context["is_cut"]: cut = _expression_to_function( - expression, aliases, self._functions, scope, file_path, object_path, + expression, + keys, + aliases, + self._functions, + self._getter, + scope, + file_path, + object_path, )() break diff --git a/uproot4/deserialization.py b/uproot4/deserialization.py index f69b6331f..60eebf2ed 100644 --- a/uproot4/deserialization.py +++ b/uproot4/deserialization.py @@ -45,7 +45,8 @@ def c(name, version=None): behavior_cls = uproot4.behavior_of(uproot4.model.classname_decode(class_name)[0]) if behavior_cls is not None: - out.__bases__ = (behavior_cls,) + out.__bases__ + out = uproot4._util.new_class(out.__name__, (behavior_cls, out), {}) + out.__module__ = "" return out diff --git a/uproot4/interpretation/identify.py b/uproot4/interpretation/identify.py index 7f7628faf..21902315d 100644 --- a/uproot4/interpretation/identify.py +++ b/uproot4/interpretation/identify.py @@ -28,10 +28,75 @@ def __repr__(self): def __str__(self): return """{0} -in file {1} at {2}""".format( +in file {1} +in object {2}""".format( self.reason, self.file_path, self.object_path ) + @property + def cache_key(self): + raise self + + @property + def numpy_dtype(self): + raise self + + @property + def awkward_form(self): + raise self + + @property + def basket_array(self): + raise self + + @property + def final_array(self): + raise self + + @property + def hook_before_basket_array(self): + raise self + + @property + def hook_after_basket_array(self): + raise self + + @property + def hook_before_final_array(self): + raise self + + @property + def hook_before_library_finalize(self): + raise self + + @property + def hook_after_final_array(self): + raise self + + @property + def itemsize(self): + raise self + + @property + def from_dtype(self): + raise self + + @property + def to_dtype(self): + raise self + + @property + def content(self): + raise self + + @property + def header_bytes(self): + raise self + + @property + def size_1to5_bytes(self): + raise self + def _normalize_ftype(fType): if fType is not None and uproot4.const.kOffsetL < fType < uproot4.const.kOffsetP: @@ -345,4 +410,21 @@ def interpretation_of(branch, context): ) except NotNumerical: + if len(branch.member("fLeaves")) != 1: + raise UnknownInterpretation( + "more or less than one TLeaf ({0}) in a non-numerical TBranch".format( + len(branch.member("fLeaves")) + ), + branch.file.file_path, + branch.object_path, + ) + + leaf = branch.member("fLeaves")[0] + + if leaf.classname == "TLeafC": + return uproot4.interpretation.strings.AsStrings(size_1to5_bytes=True) + + if leaf.classname == "TLeafElement": + raise NotImplementedError + raise NotImplementedError diff --git a/uproot4/interpretation/strings.py b/uproot4/interpretation/strings.py index cde3f03d5..49f91ee8e 100644 --- a/uproot4/interpretation/strings.py +++ b/uproot4/interpretation/strings.py @@ -2,8 +2,211 @@ from __future__ import absolute_import +import numpy + import uproot4.interpretation class StringArray(uproot4.interpretation.Interpretation): - pass + def __init__(self, offsets, content): + self._offsets = offsets + self._content = content + + def __repr__(self): + if len(self._content) > 100: + left, right = self._content[:45], self._content[-45:] + content = repr(left) + " ... " + repr(right) + else: + content = repr(self._content) + return "StringArray({0}, {1})".format(self._offsets, content) + + @property + def offsets(self): + return self._offsets + + @property + def content(self): + return self._content + + def __getitem__(self, where): + return self._content[self._offsets[where] : self._offsets[where + 1]] + + def __len__(self): + return len(self._offsets) - 1 + + +class AsStrings(uproot4.interpretation.Interpretation): + def __init__(self, header_bytes=0, size_1to5_bytes=False): + self._header_bytes = header_bytes + self._size_1to5_bytes = size_1to5_bytes + + @property + def header_bytes(self): + return self._header_bytes + + @property + def size_1to5_bytes(self): + return self._size_1to5_bytes + + def __repr__(self): + args = [] + if self._header_bytes != 0: + args.append("header_bytes={0}".format(self._header_bytes)) + if self._size_1to5_bytes is not False: + args.append("size_1to5_bytes={0}".format(self._size_1to5_bytes)) + return "AsStrings({0})".format(", ".join(args)) + + @property + def numpy_dtype(self): + return numpy.dtype(numpy.object) + + @property + def awkward_form(self): + raise NotImplementedError + + @property + def cache_key(self): + return "{0}({1},{2})".format( + type(self).__name__, self._header_bytes, self._size_1to5_bytes + ) + + def basket_array(self, data, byte_offsets, basket, branch): + self.hook_before_basket_array( + data=data, byte_offsets=byte_offsets, basket=basket, branch=branch + ) + + assert basket.byte_offsets is not None + + byte_starts = byte_offsets[:-1] + self._header_bytes + byte_stops = byte_offsets[1:] + + if self._size_1to5_bytes: + length_header_size = numpy.ones(len(byte_starts), dtype=numpy.int32) + length_header_size[data[byte_starts] == 255] += 4 + byte_starts += length_header_size + + mask = numpy.zeros(len(data), dtype=numpy.int8) + mask[byte_starts[byte_starts < len(data)]] = 1 + numpy.add.at(mask, byte_stops[byte_stops < len(data)], -1) + numpy.cumsum(mask, out=mask) + data = data[mask.view(numpy.bool_)] + + counts = byte_stops - byte_starts + offsets = numpy.empty(len(counts) + 1, dtype=numpy.int32) + offsets[0] = 0 + numpy.cumsum(counts, out=offsets[1:]) + + output = StringArray(offsets, uproot4._util.ensure_str(data.tostring())) + + self.hook_before_basket_array( + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + output=output, + ) + + return output + + def final_array( + self, basket_arrays, entry_start, entry_stop, entry_offsets, library, branch + ): + self.hook_before_final_array( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + ) + + basket_offsets = {} + basket_content = {} + for k, v in basket_arrays.items(): + basket_offsets[k] = v.offsets + basket_content[k] = v.content + + if entry_start >= entry_stop: + return StringArray(library.zeros((1,), numpy.int64), "") + + else: + length = 0 + start = entry_offsets[0] + for basket_num, stop in enumerate(entry_offsets[1:]): + if start <= entry_start and entry_stop <= stop: + length += entry_stop - entry_start + elif start <= entry_start < stop: + length += stop - entry_start + elif start <= entry_stop <= stop: + length += entry_stop - start + elif entry_start < stop and start <= entry_stop: + length += stop - start + start = stop + + offsets = numpy.empty((length + 1,), numpy.int64) + + before = 0 + start = entry_offsets[0] + contents = [] + for basket_num, stop in enumerate(entry_offsets[1:]): + if start <= entry_start and entry_stop <= stop: + local_start = entry_start - start + local_stop = entry_stop - start + off, cnt = basket_offsets[basket_num], basket_content[basket_num] + offsets[:] = before + off[local_start : local_stop + 1] + before += off[local_stop] - off[local_start] + contents.append(cnt[off[local_start] : off[local_stop]]) + + elif start <= entry_start < stop: + local_start = entry_start - start + local_stop = stop - start + off, cnt = basket_offsets[basket_num], basket_content[basket_num] + offsets[: stop - entry_start + 1] = ( + before + off[local_start : local_stop + 1] + ) + before += off[local_stop] - off[local_start] + contents.append(cnt[off[local_start] : off[local_stop]]) + + elif start <= entry_stop <= stop: + local_start = 0 + local_stop = entry_stop - start + off, cnt = basket_offsets[basket_num], basket_content[basket_num] + offsets[start - entry_start :] = ( + before + off[local_start : local_stop + 1] + ) + before += off[local_stop] - off[local_start] + contents.append(cnt[off[local_start] : off[local_stop]]) + + elif entry_start < stop and start <= entry_stop: + off, cnt = basket_offsets[basket_num], basket_content[basket_num] + offsets[start - entry_start : stop - entry_start + 1] = before + off + before += off[-1] - off[0] + contents.append(cnt[off[0] : off[-1]]) + + start = stop + + output = StringArray(offsets, "".join(contents)) + + self.hook_before_library_finalize( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + output=output, + ) + + output = library.finalize(output, branch) + + self.hook_after_final_array( + basket_arrays=basket_arrays, + entry_start=entry_start, + entry_stop=entry_stop, + entry_offsets=entry_offsets, + library=library, + branch=branch, + output=output, + ) + + return output diff --git a/uproot4/reading.py b/uproot4/reading.py index 960a254a7..f61c87178 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -33,15 +33,13 @@ def open( - file_path, - object_cache=100, - array_cache="100 MB", - classes=uproot4.classes, - **options + path, object_cache=100, array_cache="100 MB", classes=uproot4.classes, **options ): """ Args: - file_path (str or Path): File path or URL to open. + path (str or Path): Path or URL to open, which may include a colon + separating a file path from an object-within-ROOT path, like + `"root://server/path/to/file.root : internal_directory/my_ttree"`. object_cache (None, MutableMapping, or int): Cache of objects drawn from ROOT directories (e.g histograms, TTrees, other directories); if None, do not use a cache; if an int, create a new cache of this @@ -70,11 +68,7 @@ def open( * minimal_ttree_metadata (bool; True) """ - if "|" in file_path: - i = file_path.index("|") - file_path, object_path = file_path[:i].rstrip(), file_path[i + 1 :].lstrip() - else: - object_path = None + file_path, object_path = uproot4._util.file_object_path_split(path) file = ReadOnlyFile( file_path, @@ -132,11 +126,10 @@ def __init__( self.hook_before_create_source() - Source = uproot4._util.path_to_source_class(file_path, self._options) - if file_path.startswith("file:"): - self._source = Source(file_path[5:], **self._options) - else: - self._source = Source(file_path, **self._options) + Source, file_path = uproot4._util.file_path_to_source_class( + file_path, self._options + ) + self._source = Source(file_path, **self._options) self.hook_before_get_chunks() @@ -667,7 +660,7 @@ def __repr__(self): nameclass = "" else: nameclass = " {0}: {1}".format(self.name(cycle=True), self.classname()) - return "".format( + return "".format( nameclass, self.data_cursor.index, id(self) ) diff --git a/uproot4/source/chunk.py b/uproot4/source/chunk.py index 787cd2ca9..268e7ff1d 100644 --- a/uproot4/source/chunk.py +++ b/uproot4/source/chunk.py @@ -354,8 +354,9 @@ def get(self, start, stop): Calling this function blocks until `raw_data` is filled. """ + self.wait() + if (start, stop) in self: - self.wait() local_start = start - self._start local_stop = stop - self._start return self._raw_data[local_start:local_stop] @@ -385,8 +386,9 @@ def remainder(self, start): Calling this function blocks until `raw_data` is filled. """ + self.wait() + if self._start <= start: - self.wait() local_start = start - self._start return self._raw_data[local_start:] diff --git a/uproot4/source/file.py b/uproot4/source/file.py index 5e62fc3f4..73be6811e 100644 --- a/uproot4/source/file.py +++ b/uproot4/source/file.py @@ -21,7 +21,7 @@ class FileResource(uproot4.source.chunk.Resource): __slots__ = ["_file_path", "_file"] def __init__(self, file_path): - self._file_path = os.path.expanduser(file_path) + self._file_path = file_path self._file = open(self._file_path, "rb") @property diff --git a/uproot4/source/http.py b/uproot4/source/http.py index c918ebc6d..1a0f3eeb5 100644 --- a/uproot4/source/http.py +++ b/uproot4/source/http.py @@ -680,8 +680,9 @@ def chunks(self, ranges, exact=True, notifications=None): @staticmethod def _fix_start_stop(chunk): def fix(future): - chunk._start = future._start - chunk._stop = future._stop + if future._excinfo is None: + chunk._start = future._start + chunk._stop = future._stop return fix diff --git a/uproot4/source/memmap.py b/uproot4/source/memmap.py index 5489753a6..a3ddbf8e8 100644 --- a/uproot4/source/memmap.py +++ b/uproot4/source/memmap.py @@ -6,8 +6,6 @@ from __future__ import absolute_import -import os.path - import numpy import uproot4.source.chunk @@ -37,7 +35,7 @@ def __init__(self, file_path, **options): self._num_requested_chunks = 0 self._num_requested_bytes = 0 - self._file_path = os.path.expanduser(file_path) + self._file_path = file_path try: self._file = numpy.memmap(self._file_path, dtype=self._dtype, mode="r") self._fallback = None