Skip to content

Commit

Permalink
Implement STL containers (#29)
Browse files Browse the repository at this point in the history
* Implement more string types.

* Using issue431 as an example of data that contains an STL object.

* Reorder arguments of KeyInFileError.

* More useful DeserializationError object.

* Black and flake8.

* Done for now. There are lots of STL examples to look at.

* Start developing STL container objects.

* Developed STLVector and STLMap.

* Wrote but haven't tested AsVector.

* Avoid reading a second time if there are no new classes to read.

* Remove (commented out) pass-through dict of custom_classes.

* STL container deserialization has been formalized.

* We have a C++ type name parser.

* General STL type handling has been implemented; tested for map<string,string>.
  • Loading branch information
jpivarski authored Jun 20, 2020
1 parent 53eba54 commit 81a5805
Show file tree
Hide file tree
Showing 17 changed files with 1,208 additions and 228 deletions.
18 changes: 0 additions & 18 deletions tests/test_0023-more-interpretations-1.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,6 @@ def test_strings1():
assert result.tolist() == ["hey-{0}".format(i) for i in range(30)]


@pytest.mark.skip(reason="FIXME: implement strings specified by a TStreamer")
def test_strings2():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
)["tree/Str"] as branch:
result = branch.array(library="np")
assert result.tolist() == ["evt-{0:03d}".format(i) for i in range(100)]


@pytest.mark.skip(reason="FIXME: implement std::string")
def test_strings3():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
)["tree/StdStr"] as branch:
result = branch.array(library="np")
assert result.tolist() == ["std-{0:03d}".format(i) for i in range(100)]


@pytest.mark.skip(reason="FIXME: implement std::vector<std::string>")
def test_strings4():
with uproot4.open(
Expand Down
10 changes: 5 additions & 5 deletions tests/test_0028-fallback-to-read-streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@


def test_fallback_reading():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
) as f:
f["tree:evt/P3/P3.Py"]
assert f.file._streamers is None
# with uproot4.open(
# skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
# ) as f:
# f["tree:evt/P3/P3.Py"]
# assert f.file._streamers is None

with uproot4.open(skhep_testdata.data_path("uproot-demo-double32.root")) as f:
f["T/fD64"]
Expand Down
173 changes: 173 additions & 0 deletions tests/test_0029-more-string-types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE

from __future__ import absolute_import

import sys
import json

import numpy
import pytest
import skhep_testdata

import uproot4
from uproot4.stl_containers import parse_typename
from uproot4.stl_containers import AsString
from uproot4.stl_containers import AsVector
from uproot4.stl_containers import AsSet
from uproot4.stl_containers import AsMap


def test_parse_typename():
assert parse_typename("TTree") is uproot4.classes["TTree"]
assert parse_typename("string") == AsString()
assert parse_typename("std::string") == AsString()
assert parse_typename("std :: string") == AsString()
assert parse_typename("char*") == AsString(is_stl=False)
assert parse_typename("char *") == AsString(is_stl=False)
assert parse_typename("TString") == AsString(is_stl=False)
assert parse_typename("vector<TTree>") == AsVector(uproot4.classes["TTree"])
assert parse_typename("vector<int>") == AsVector(">i4")
assert parse_typename("vector<bool>") == AsVector("?")
assert parse_typename("vector<string>") == AsVector(AsString())
assert parse_typename("vector < string >") == AsVector(AsString())
assert parse_typename("std::vector<std::string>") == AsVector(AsString())
assert parse_typename("vector<vector<int>>") == AsVector(AsVector(">i4"))
assert parse_typename("vector<vector<string>>") == AsVector(AsVector(AsString()))
assert parse_typename("vector<vector<char*>>") == AsVector(
AsVector(AsString(is_stl=False))
)
assert parse_typename("set<unsigned short>") == AsSet(">u2")
assert parse_typename("std::set<unsigned short>") == AsSet(">u2")
assert parse_typename("set<string>") == AsSet(AsString())
assert parse_typename("set<vector<string>>") == AsSet(AsVector(AsString()))
assert parse_typename("set<vector<string> >") == AsSet(AsVector(AsString()))
assert parse_typename("map<int, double>") == AsMap(">i4", ">f8")
assert parse_typename("map<string, double>") == AsMap(AsString(), ">f8")
assert parse_typename("map<int, string>") == AsMap(">i4", AsString())
assert parse_typename("map<string, string>") == AsMap(AsString(), AsString())
assert parse_typename("map<string,string>") == AsMap(AsString(), AsString())
assert parse_typename("map< string,string >") == AsMap(AsString(), AsString())
assert parse_typename("map<string,vector<int>>") == AsMap(
AsString(), AsVector(">i4")
)
assert parse_typename("map<vector<int>, string>") == AsMap(
AsVector(">i4"), AsString()
)
assert parse_typename("map<vector<int>, set<float>>") == AsMap(
AsVector(">i4"), AsSet(">f4")
)
assert parse_typename("map<vector<int>, set<set<float>>>") == AsMap(
AsVector(">i4"), AsSet(AsSet(">f4"))
)

with pytest.raises(ValueError):
parse_typename("string <")

with pytest.raises(ValueError):
parse_typename("vector <")

with pytest.raises(ValueError):
parse_typename("map<string<int>>")

with pytest.raises(ValueError):
parse_typename("map<string, int>>")


def test_strings1():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
)["tree"] as tree:
result = tree["Beg"].array(library="np")
assert result.tolist() == ["beg-{0:03d}".format(i) for i in range(100)]

result = tree["End"].array(library="np")
assert result.tolist() == ["end-{0:03d}".format(i) for i in range(100)]


def test_map_string_string_in_object():
with uproot4.open(skhep_testdata.data_path("uproot-issue431.root")) as f:
head = f["Head"]
assert head.member("map<string,string>") == {
"DAQ": "394",
"PDF": "4 58",
"XSecFile": "",
"can": "0 1027 888.4",
"can_user": "0.00 1027.00 888.40",
"coord_origin": "0 0 0",
"cut_in": "0 0 0 0",
"cut_nu": "100 1e+08 -1 1",
"cut_primary": "0 0 0 0",
"cut_seamuon": "0 0 0 0",
"decay": "doesnt happen",
"detector": "NOT",
"drawing": "Volume",
"end_event": "",
"genhencut": "2000 0",
"genvol": "0 1027 888.4 2.649e+09 100000",
"kcut": "2",
"livetime": "0 0",
"model": "1 2 0 1 12",
"muon_desc_file": "",
"ngen": "0.1000E+06",
"norma": "0 0",
"nuflux": "0 3 0 0.500E+00 0.000E+00 0.100E+01 0.300E+01",
"physics": "GENHEN 7.2-220514 181116 1138",
"seed": "GENHEN 3 305765867 0 0",
"simul": "JSirene 11012 11/17/18 07",
"sourcemode": "diffuse",
"spectrum": "-1.4",
"start_run": "1",
"target": "isoscalar",
"usedetfile": "false",
"xlat_user": "0.63297",
"xparam": "OFF",
"zed_user": "0.00 3450.00",
}


@pytest.mark.skip(
reason="FIXME: test works, but the file is not in scikit-hep-testdata yet"
)
def test_map_long_int_in_object():
with uproot4.open(
"/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue283.root"
) as f:
print(f["config/detector"])

# raise Exception


# has STL vectors at top-level:
#
# python -c 'import uproot; t = uproot.open("/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue38a.root")["ntupler/tree"]; print("\n".join(str((x._fName, getattr(x, "_fStreamerType", None), getattr(x, "_fClassName", None), getattr(x, "_fType", None), x.interpretation)) for x in t.allvalues()))'

# has STL map<int,struct> as described here:
#
# https://github.com/scikit-hep/uproot/issues/468#issuecomment-646325842
#
# python -c 'import uproot; t = uproot.open("/home/pivarski/irishep/scikit-hep-testdata/src/skhep_testdata/data/uproot-issue468.root")["Geant4Data/Geant4Data./Geant4Data.particles"]; print(t.array(uproot.asdebug)[0][:1000])'

# def test_strings1():
# with uproot4.open(
# skhep_testdata.data_path("uproot-issue31.root")
# )["T/name"] as branch:
# result = branch.array(library="np")
# assert result.tolist() == ["one", "two", "three", "four", "five"]


@pytest.mark.skip(reason="FIXME: implement strings specified by a TStreamer")
def test_strings2():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
)["tree/Str"] as branch:
result = branch.array(library="np")
assert result.tolist() == ["evt-{0:03d}".format(i) for i in range(100)]


@pytest.mark.skip(reason="FIXME: implement std::string")
def test_strings3():
with uproot4.open(
skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
)["tree/StdStr"] as branch:
result = branch.array(library="np")
assert result.tolist() == ["std-{0:03d}".format(i) for i in range(100)]
40 changes: 24 additions & 16 deletions uproot4/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
decompression_executor = ThreadPoolExecutor()
interpretation_executor = TrivialExecutor()

from uproot4.deserialization import DeserializationError

from uproot4.reading import open
from uproot4.reading import ReadOnlyFile
from uproot4.reading import ReadOnlyDirectory
Expand All @@ -33,6 +35,10 @@
from uproot4.model import has_class_named
from uproot4.model import class_named

from uproot4.stl_containers import STLVector
from uproot4.stl_containers import STLSet
from uproot4.stl_containers import STLMap

import uproot4.interpretation
import uproot4.interpretation.library

Expand Down Expand Up @@ -94,12 +100,14 @@ def behavior_of(classname):


class KeyInFileError(KeyError):
def __init__(self, key, file_path, cycle=None, because="", object_path=None):
__slots__ = ["key", "because", "cycle", "file_path", "object_path"]

def __init__(self, key, because="", cycle=None, file_path=None, object_path=None):
super(KeyInFileError, self).__init__(key)
self.key = key
self.file_path = file_path
self.cycle = cycle
self.because = because
self.cycle = cycle
self.file_path = file_path
self.object_path = object_path

def __str__(self):
Expand All @@ -108,25 +116,25 @@ def __str__(self):
else:
because = " because " + self.because

if self.object_path is None:
object_path = ""
else:
object_path = "\nin object {0}".format(self.object_path)
in_file = ""
if self.file_path is not None:
in_file = "\nin file {0}".format(self.file_path)

in_object = ""
if self.object_path is not None:
in_object = "\nin object {0}".format(self.object_path)

if self.cycle == "any":
return """not found: {0} (with any cycle number){1}
in file {2}{3}""".format(
repr(self.key), because, self.file_path, object_path
return """not found: {0} (with any cycle number){1}{2}{3}""".format(
repr(self.key), because, in_file, in_object
)
elif self.cycle is None:
return """not found: {0}{1}
in file {2}{3}""".format(
repr(self.key), because, self.file_path, object_path
return """not found: {0}{1}{2}{3}""".format(
repr(self.key), because, in_file, in_object
)
else:
return """not found: {0} with cycle {1}{2}
in file {3}{4}""".format(
repr(self.key), self.cycle, because, self.file_path, object_path
return """not found: {0} with cycle {1}{2}{3}{4}""".format(
repr(self.key), self.cycle, because, in_file, in_object
)


Expand Down
12 changes: 9 additions & 3 deletions uproot4/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,9 @@ def __getitem__(self, where):
return v
else:
raise uproot4.KeyInFileError(
original_where, self._file.file_path, object_path=self.object_path
original_where,
file_path=self._file.file_path,
object_path=self.object_path,
)

elif recursive:
Expand All @@ -535,7 +537,9 @@ def __getitem__(self, where):
return got
else:
raise uproot4.KeyInFileError(
original_where, self._file.file_path, object_path=self.object_path
original_where,
file_path=self._file.file_path,
object_path=self.object_path,
)

else:
Expand All @@ -545,7 +549,9 @@ def __getitem__(self, where):
return branch
else:
raise uproot4.KeyInFileError(
original_where, self._file.file_path, object_path=self.object_path
original_where,
file_path=self._file.file_path,
object_path=self.object_path,
)

def iteritems(
Expand Down
4 changes: 3 additions & 1 deletion uproot4/compute/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def _expression_to_function(
node.body[0].value, keys, aliases, functions, getter
)
except KeyError as err:
raise uproot4.KeyInFileError(err.args[0], file_path, object_path=object_path)
raise uproot4.KeyInFileError(
err.args[0], file_path=file_path, object_path=object_path
)

function = ast.parse("lambda: None").body[0].value
function.body = expr
Expand Down
Loading

0 comments on commit 81a5805

Please sign in to comment.