diff --git a/tests/test_0016-interpretations.py b/tests/test_0016-interpretations.py index 967d993a8..e9c419eea 100644 --- a/tests/test_0016-interpretations.py +++ b/tests/test_0016-interpretations.py @@ -81,7 +81,7 @@ def test_recovery(mini): # flat array to recover: filename = skhep_testdata.data_path("uproot-issue21.root") with uproot4.open( - "file:" + filename + " : nllscan/mH", minimal_ttree_metadata=mini + {"file:" + filename: "nllscan/mH"}, minimal_ttree_metadata=mini ) as branch: basket = branch.basket(0) assert basket.data.view(">f8").tolist()[:10] == [ @@ -107,7 +107,7 @@ def test_recovery(mini): # uproot-from-geant4.root Details: numgood, TrackedRays: Event phi filename = skhep_testdata.data_path("uproot-issue327.root") with uproot4.open( - "file:" + filename + " : DstTree/fTracks.fCharge", minimal_ttree_metadata=mini + {"file:" + filename: "DstTree/fTracks.fCharge"}, minimal_ttree_metadata=mini ) as branch: basket = branch.basket(0) assert basket.data.view("i1")[:10].tolist() == [ diff --git a/tests/test_0043-iterate-function.py b/tests/test_0043-iterate-function.py index fe9ee60c3..5d1421e0d 100644 --- a/tests/test_0043-iterate-function.py +++ b/tests/test_0043-iterate-function.py @@ -159,14 +159,13 @@ def test_iterate_report_2(): def test_function_iterate(): - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) expect = 0 - for arrays, report in uproot4.iterate(files, "i8", report=True, library="np"): + for arrays, report in uproot4.iterate( + {files: "sample"}, "i8", report=True, library="np" + ): assert arrays["i8"][:5].tolist() == [-15, -14, -13, -12, -11] assert report.global_entry_start == expect assert report.global_entry_stop == expect + len(arrays["i8"]) @@ -175,14 +174,13 @@ def test_function_iterate(): def test_function_iterate_pandas(): pandas = pytest.importorskip("pandas") - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) expect = 0 - for arrays, report in uproot4.iterate(files, "i8", report=True, library="pd"): + for arrays, report in uproot4.iterate( + {files: "sample"}, "i8", report=True, library="pd" + ): assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11] assert arrays.index.values[0] == expect assert report.global_entry_start == expect @@ -192,13 +190,12 @@ def test_function_iterate_pandas(): def test_function_iterate_pandas_2(): pandas = pytest.importorskip("pandas") - files = ( - skhep_testdata.data_path("uproot-HZZ.root").replace( - "HZZ", "HZZ-{uncompressed,zlib,lz4}" - ) - + ":events" + files = skhep_testdata.data_path("uproot-HZZ.root").replace( + "HZZ", "HZZ-{uncompressed,zlib,lz4}" ) expect = 0 - for arrays, report in uproot4.iterate(files, "Muon_Px", report=True, library="pd"): + for arrays, report in uproot4.iterate( + {files: "events"}, "Muon_Px", report=True, library="pd" + ): assert arrays["Muon_Px"].index.values[0] == (expect, 0) expect += report.tree.num_entries diff --git a/tests/test_0044-concatenate-function.py b/tests/test_0044-concatenate-function.py index b9f6813da..76d71a175 100644 --- a/tests/test_0044-concatenate-function.py +++ b/tests/test_0044-concatenate-function.py @@ -10,26 +10,20 @@ def test_concatenate_numpy(): - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) - arrays = uproot4.concatenate(files, ["i8", "f8"], library="np") + arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="np") assert len(arrays["i8"]) == 420 assert len(arrays["f8"]) == 420 def test_concatenate_awkward(): awkward1 = pytest.importorskip("awkward1") - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) - arrays = uproot4.concatenate(files, ["i8", "f8"], library="ak") + arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="ak") assert isinstance(arrays, awkward1.Array) assert set(awkward1.keys(arrays)) == set(["i8", "f8"]) assert len(arrays) == 420 @@ -37,13 +31,10 @@ def test_concatenate_awkward(): def test_concatenate_pandas(): pandas = pytest.importorskip("pandas") - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) - arrays = uproot4.concatenate(files, ["i8", "f8"], library="pd") + arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="pd") assert isinstance(arrays, pandas.DataFrame) assert set(arrays.columns.tolist()) == set(["i8", "f8"]) assert len(arrays) == 420 diff --git a/tests/test_0045-lazy-arrays-1.py b/tests/test_0045-lazy-arrays-1.py index 5423979e8..ce22102b4 100644 --- a/tests/test_0045-lazy-arrays-1.py +++ b/tests/test_0045-lazy-arrays-1.py @@ -71,7 +71,7 @@ def test_branch_pluralization(): assert False for i, arrays in enumerate( - uproot4.iterate(skhep_testdata.data_path("uproot-Zmumu.root") + ":events/px1") + uproot4.iterate({skhep_testdata.data_path("uproot-Zmumu.root"): "events/px1"}) ): if i == 0: assert arrays["px1"][:5].tolist() == [ @@ -103,14 +103,11 @@ def test_branch_pluralization(): def test_awkward(): awkward1 = pytest.importorskip("awkward1") - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) cache = {} - array = uproot4.lazy(files, array_cache=cache) + array = uproot4.lazy({files: "sample"}, array_cache=cache) assert len(cache) == 0 assert awkward1.to_list(array[:5, "i4"]) == [-15, -14, -13, -12, -11] @@ -146,10 +143,8 @@ def test_awkward(): def test_awkward_pluralization(): awkward1 = pytest.importorskip("awkward1") - files = ( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( - "6.20.04", "*" - ) - + ":sample/i4" + files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace( + "6.20.04", "*" ) - assert awkward1.to_list(uproot4.lazy(files)[:5, "i4"]) == [-15, -14, -13, -12, -11] + array = uproot4.lazy({files: "sample"}) + assert awkward1.to_list(array[:5, "i4"]) == [-15, -14, -13, -12, -11] diff --git a/tests/test_0066-fix-http-fallback-freeze.py b/tests/test_0066-fix-http-fallback-freeze.py index ff8a30e48..254de5339 100644 --- a/tests/test_0066-fix-http-fallback-freeze.py +++ b/tests/test_0066-fix-http-fallback-freeze.py @@ -10,6 +10,8 @@ @pytest.mark.network def test(): - with uproot4.open("http://scikit-hep.org/uproot/examples/HZZ.root:events") as t: + with uproot4.open( + {"http://scikit-hep.org/uproot/examples/HZZ.root": "events"} + ) as t: t["MET_px"].array() t["MET_py"].array() diff --git a/tests/test_pr0067-common-entry-offsets.py b/tests/test_0067-common-entry-offsets.py similarity index 100% rename from tests/test_pr0067-common-entry-offsets.py rename to tests/test_0067-common-entry-offsets.py diff --git a/tests/test_0081-dont-parse-colons.py b/tests/test_0081-dont-parse-colons.py new file mode 100644 index 000000000..2089ac339 --- /dev/null +++ b/tests/test_0081-dont-parse-colons.py @@ -0,0 +1,140 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import pytest +import skhep_testdata + +import uproot4 + + +def test_open(): + assert isinstance( + uproot4.open(skhep_testdata.data_path("uproot-issue63.root")), + uproot4.reading.ReadOnlyDirectory, + ) + assert isinstance( + uproot4.open( + {skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"} + ), + uproot4.behaviors.TTree.TTree, + ) + + with pytest.raises(ValueError): + uproot4.open([skhep_testdata.data_path("uproot-issue63.root")]) + + +def test_lazy(): + with pytest.raises(ValueError): + uproot4.lazy(skhep_testdata.data_path("uproot-issue63.root")) + + with pytest.raises(ValueError): + uproot4.lazy( + {skhep_testdata.data_path("uproot-issue63.root"): "blah"}, + allow_missing=True, + ) + + uproot4.lazy({skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"}) + uproot4.lazy( + { + skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal", + skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_Fake_nominal", + } + ) + + uproot4.lazy([{skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"}]) + uproot4.lazy( + {skhep_testdata.data_path("uproot-issue63.root") + "*": "WtLoop_nominal"} + ) + uproot4.lazy( + [{skhep_testdata.data_path("uproot-issue63.root") + "*": "WtLoop_nominal"}] + ) + + +def test_concatenate(): + with pytest.raises(ValueError): + uproot4.concatenate(skhep_testdata.data_path("uproot-issue63.root")) + + assert ( + len( + uproot4.concatenate( + {skhep_testdata.data_path("uproot-issue63.root"): "blah"}, + allow_missing=True, + ) + ) + == 0 + ) + + files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace( + "6.16.00", "*" + ) + + uproot4.concatenate(files, "Ai8") + uproot4.concatenate({files: "sample"}, "Ai8") + uproot4.concatenate([files], "Ai8") + uproot4.concatenate([{files: "sample"}], "Ai8") + + +def test_iterate(): + with pytest.raises(ValueError): + for arrays in uproot4.iterate(skhep_testdata.data_path("uproot-issue63.root")): + pass + + assert ( + len( + list( + uproot4.iterate( + {skhep_testdata.data_path("uproot-issue63.root"): "blah"}, + allow_missing=True, + ) + ) + ) + == 0 + ) + + files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace( + "6.16.00", "*" + ) + + for arrays in uproot4.iterate(files, "Ai8"): + pass + for arrays in uproot4.iterate({files: "sample"}, "Ai8"): + pass + for arrays in uproot4.iterate([files], "Ai8"): + pass + for arrays in uproot4.iterate([{files: "sample"}], "Ai8"): + pass + + +pathlib = pytest.importorskip("pathlib") + + +def test_open_colon(): + assert isinstance( + uproot4.open( + skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal" + ), + uproot4.behaviors.TTree.TTree, + ) + + with pytest.raises(FileNotFoundError): + uproot4.open( + pathlib.Path( + skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal" + ) + ) + + with pytest.raises(FileNotFoundError): + uproot4.open( + {skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal": None} + ) + + +def test_lazy_colon(): + uproot4.lazy(skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal") + uproot4.lazy( + [ + skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal", + skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_Fake_nominal", + ] + ) diff --git a/uproot4/_util.py b/uproot4/_util.py index 3bcca8995..b76d1b9b2 100644 --- a/uproot4/_util.py +++ b/uproot4/_util.py @@ -226,6 +226,52 @@ def file_path_to_source_class(file_path, options): raise ValueError("URI scheme not recognized: {0}".format(file_path)) +if isinstance(__builtins__, dict): + if "FileNotFoundError" in __builtins__: + _FileNotFoundError = __builtins__["FileNotFoundError"] + else: + _FileNotFoundError = __builtins__["IOError"] +else: + if hasattr(__builtins__, "FileNotFoundError"): + _FileNotFoundError = __builtins__.FileNotFoundError + else: + _FileNotFoundError = __builtins__.IOError + + +def _file_not_found(files, message=None): + if message is None: + message = "" + else: + message = " (" + message + ")" + + return _FileNotFoundError( + """file not found{0} + + {1} + +Files may be specified as: + * str/bytes: relative or absolute filesystem path or URL, without any colons + other than Windows drive letter or URL schema. + Examples: "rel/file.root", "C:\\abs\\file.root", "http://where/what.root" + * str/bytes: same with an object-within-ROOT path, separated by a colon. + Example: "rel/file.root:tdirectory/ttree" + * pathlib.Path: always interpreted as a filesystem path or URL only (no + object-within-ROOT path), regardless of whether there are any colons. + Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root") + +Functions that accept many files (uproot4.iterate, etc.) also allow: + * glob syntax in str/bytes and pathlib.Path. + Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree" + * dict: keys are filesystem paths, values are objects-within-ROOT paths. + Example: {{"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}} + * already-open TTree objects. + * iterables of the above. +""".format( + message, repr(files) + ) + ) + + def memory_size(data, error_message=None): """ Regularizes strings like '## kB' and plain integer number of bytes to diff --git a/uproot4/behaviors/TBranch.py b/uproot4/behaviors/TBranch.py index 6cfc0173d..22cf62445 100644 --- a/uproot4/behaviors/TBranch.py +++ b/uproot4/behaviors/TBranch.py @@ -1635,17 +1635,23 @@ def array( _regularize_files_braces = re.compile(r"{([^}]*,)*([^}]*)}") -def _regularize_files(files): - files = uproot4._util.regularize_path(files) +def _regularize_files_inner(files, parse_colon): + files2 = uproot4._util.regularize_path(files) + + if uproot4._util.isstr(files2) and not uproot4._util.isstr(files): + parse_colon = False + files = files2 if uproot4._util.isstr(files): - file_path, object_path = uproot4._util.file_object_path_split(files) + if parse_colon: + file_path, object_path = uproot4._util.file_object_path_split(files) + else: + file_path, object_path = files, None + parsed_url = urlparse(file_path) - count = 0 if parsed_url.scheme.upper() in uproot4._util._remote_schemes: yield file_path, object_path - count += 1 else: expanded = os.path.expanduser(file_path) @@ -1668,47 +1674,93 @@ def _regularize_files(files): if match not in seen: yield match, object_path seen.add(match) - count += 1 - - if count == 0: - if hasattr(__builtins__, "FileNotFoundError"): - errclass = __builtins__.FileNotFoundError - else: - errclass = __builtins__.IOError - raise errclass("{0} did not match any files".format(repr(file_path))) elif isinstance(files, HasBranches): yield files, None + elif isinstance(files, dict): + for key, object_path in files.items(): + for file_path, _ in _regularize_files_inner(key, False): + yield file_path, object_path + elif isinstance(files, Iterable): - count = 0 - seen = set() for file in files: - for file_path, object_path in _regularize_files(file): - if uproot4._util.isstr(file_path): - if file_path not in seen: - yield file_path, object_path - seen.add(file_path) - else: - yield file_path, object_path - count += 1 - - if count == 0: - if hasattr(__builtins__, "FileNotFoundError"): - errclass = __builtins__.FileNotFoundError - else: - errclass = __builtins__.IOError - raise errclass("at least one file path or URL must be provided") + for file_path, object_path in _regularize_files_inner(file, parse_colon): + yield file_path, object_path else: raise TypeError( - "'files' must be a file path/URL (string or Path) with a TTree/TBranch " - "object path (separated by a colon ':'), possibly with glob " - "patterns (for local files), TTree/TBranch objects, or an iterable " - "of such things, not {0}".format(repr(files)) + "'files' must be a file path/URL (string or Path), possibly with " + "a glob pattern (for local files), a dict of " + "{{path/URL: TTree/TBranch name}}, actual TTree/TBranch objects, or " + "an iterable of such things, not {0}".format(repr(files)) ) +def _regularize_files(files): + out = [] + seen = set() + for file_path, object_path in _regularize_files_inner(files, True): + if uproot4._util.isstr(file_path): + if (file_path, object_path) not in seen: + out.append((file_path, object_path)) + seen.add((file_path, object_path)) + else: + out.append((file_path, object_path)) + + if len(out) == 0: + uproot4._util._file_not_found(files) + + return out + + +def _regularize_object_path( + file_path, object_path, custom_classes, allow_missing, options +): + if isinstance(file_path, HasBranches): + return _NoClose(file_path) + + else: + file = uproot4.reading.ReadOnlyFile( + file_path, + object_cache=None, + array_cache=None, + custom_classes=custom_classes, + **options, + ).root_directory + if object_path is None: + trees = [k for k, v in file.classnames().items() if v == "TTree"] + if len(trees) == 0: + if allow_missing: + return None + else: + raise ValueError( + """no TTrees found +in file {0}""".format( + file_path + ) + ) + elif len(trees) == 1: + return file[trees[0]] + else: + raise ValueError( + """TTree object paths must be specified in the 'files' """ + """as {{\"filenames*.root\": \"path\"}} if any files have """ + """more than one TTree + + TTrees: {0} + +in file {1}""".format( + ", ".join(repr(x) for x in trees), file_path + ) + ) + + else: + if allow_missing and object_path not in file: + return None + return file[object_path] + + class _NoClose(object): def __init__(self, hasbranches): self.hasbranches = hasbranches @@ -1736,18 +1788,10 @@ def iterate( how=None, report=False, custom_classes=None, + allow_missing=False, **options ): - files = list(_regularize_files(files)) - if any( - uproot4._util.isstr(file_path) and object_path is None - for file_path, object_path in files - ): - raise TypeError( - "'files' must include a TTree/TBranch object path (separated by a " - "colon ':') to each glob pattern (if multiple are given)" - ) - + files = _regularize_files(files) decompression_executor, interpretation_executor = _regularize_executors( decompression_executor, interpretation_executor ) @@ -1755,54 +1799,47 @@ def iterate( global_start = 0 for file_path, object_path in files: - if object_path is None: - hasbranches = _NoClose(file_path) - else: - file = uproot4.reading.ReadOnlyFile( - file_path, - object_cache=None, - array_cache=None, - custom_classes=custom_classes, - **options - ) - try: - hasbranches = file.root_directory[object_path] - except KeyError: - continue + hasbranches = _regularize_object_path( + file_path, object_path, custom_classes, allow_missing, options + ) - with hasbranches: - for item in hasbranches.iterate( - expressions=expressions, - cut=cut, - filter_name=filter_name, - filter_typename=filter_typename, - filter_branch=filter_branch, - aliases=aliases, - compute=compute, - step_size=step_size, - decompression_executor=decompression_executor, - interpretation_executor=interpretation_executor, - library=library, - how=how, - report=report, - ): - if report: - arrays, local_report = item - global_entry_start = local_report.tree_entry_start - global_entry_stop = local_report.tree_entry_stop - global_entry_start += global_start - global_entry_stop += global_start - global_report = type(local_report)( - *((global_entry_start, global_entry_stop) + local_report[2:]) - ) - arrays = library.global_index(arrays, global_start) - yield arrays, global_report + if hasbranches is not None: + with hasbranches: + for item in hasbranches.iterate( + expressions=expressions, + cut=cut, + filter_name=filter_name, + filter_typename=filter_typename, + filter_branch=filter_branch, + aliases=aliases, + compute=compute, + step_size=step_size, + decompression_executor=decompression_executor, + interpretation_executor=interpretation_executor, + library=library, + how=how, + report=report, + ): + if report: + arrays, local_report = item + global_entry_start = local_report.tree_entry_start + global_entry_stop = local_report.tree_entry_stop + global_entry_start += global_start + global_entry_stop += global_start + global_report = type(local_report)( + *( + (global_entry_start, global_entry_stop) + + local_report[2:] + ) + ) + arrays = library.global_index(arrays, global_start) + yield arrays, global_report - else: - arrays = library.global_index(item, global_start) - yield arrays + else: + arrays = library.global_index(item, global_start) + yield arrays - global_start += hasbranches.num_entries + global_start += hasbranches.num_entries def concatenate( @@ -1821,18 +1858,10 @@ def concatenate( how=None, report=False, custom_classes=None, + allow_missing=False, **options ): - files = list(_regularize_files(files)) - if any( - uproot4._util.isstr(file_path) and object_path is None - for file_path, object_path in files - ): - raise TypeError( - "'files' must include a TTree/TBranch object path (separated by a " - "colon ':') to each glob pattern (if multiple are given)" - ) - + files = _regularize_files(files) decompression_executor, interpretation_executor = _regularize_executors( decompression_executor, interpretation_executor ) @@ -1841,39 +1870,30 @@ def concatenate( all_arrays = [] global_start = 0 for file_path, object_path in files: - if object_path is None: - hasbranches = _NoClose(file_path) - else: - file = uproot4.reading.ReadOnlyFile( - file_path, - object_cache=None, - array_cache=None, - custom_classes=custom_classes, - **options - ) - try: - hasbranches = file.root_directory[object_path] - except KeyError: - continue + hasbranches = _regularize_object_path( + file_path, object_path, custom_classes, allow_missing, options + ) - with hasbranches: - arrays = hasbranches.arrays( - expressions=expressions, - cut=cut, - filter_name=filter_name, - filter_typename=filter_typename, - filter_branch=filter_branch, - aliases=aliases, - compute=compute, - decompression_executor=decompression_executor, - interpretation_executor=interpretation_executor, - array_cache=array_cache, - library=library, - how=how, - ) - arrays = library.global_index(arrays, global_start) - all_arrays.append(arrays) - global_start += hasbranches.num_entries + if hasbranches is not None: + with hasbranches: + arrays = hasbranches.arrays( + expressions=expressions, + cut=cut, + filter_name=filter_name, + filter_typename=filter_typename, + filter_branch=filter_branch, + aliases=aliases, + compute=compute, + decompression_executor=decompression_executor, + interpretation_executor=interpretation_executor, + array_cache=array_cache, + library=library, + how=how, + ) + arrays = library.global_index(arrays, global_start) + all_arrays.append(arrays) + + global_start += hasbranches.num_entries return library.concatenate(all_arrays) @@ -1892,18 +1912,10 @@ def lazy( library="ak", report=False, custom_classes=None, + allow_missing=False, **options ): - files = list(_regularize_files(files)) - if any( - uproot4._util.isstr(file_path) and object_path is None - for file_path, object_path in files - ): - raise TypeError( - "'files' must include a TTree/TBranch object path (separated by a " - "colon ':') to each glob pattern (if multiple are given)" - ) - + files = _regularize_files(files) decompression_executor, interpretation_executor = _regularize_executors( decompression_executor, interpretation_executor ) @@ -1926,54 +1938,68 @@ def lazy( common_keys = None is_self = [] + count = 0 for file_path, object_path in files: - if object_path is None: - obj = file_path - else: - obj = uproot4.reading.open( - file_path, - object_cache=None, - array_cache=None, - custom_classes=custom_classes, - **real_options - )[object_path] - - if isinstance(obj, TBranch) and len(obj.keys(recursive=True)) == 0: - original = obj - obj = obj.parent - is_self.append(True) - - def real_filter_branch(branch): - return branch is original and filter_branch(branch) + obj = _regularize_object_path( + file_path, object_path, custom_classes, allow_missing, real_options + ) - else: - is_self.append(False) - real_filter_branch = filter_branch + if obj is not None: + count += 1 - hasbranches.append(obj) + if isinstance(obj, TBranch) and len(obj.keys(recursive=True)) == 0: + original = obj + obj = obj.parent + is_self.append(True) - new_keys = obj.keys( - recursive=recursive, - filter_name=filter_name, - filter_typename=filter_typename, - filter_branch=real_filter_branch, - full_paths=full_paths, - ) + def real_filter_branch(branch): + return branch is original and filter_branch(branch) - if common_keys is None: - common_keys = new_keys - else: - new_keys = set(new_keys) - common_keys = [key for key in common_keys if key in new_keys] + else: + is_self.append(False) + real_filter_branch = filter_branch + + hasbranches.append(obj) + + new_keys = obj.keys( + recursive=recursive, + filter_name=filter_name, + filter_typename=filter_typename, + filter_branch=real_filter_branch, + full_paths=full_paths, + ) + + if common_keys is None: + common_keys = new_keys + else: + new_keys = set(new_keys) + common_keys = [key for key in common_keys if key in new_keys] + + if count == 0: + raise ValueError( + "allow_missing=True and no TTrees found in\n\n {0}".format( + "\n ".join( + "{" + + "{0}: {1}".format( + repr(f.file_path if isinstance(f, HasBranches) else f), + repr(f.object_path if isinstance(f, HasBranches) else o), + ) + + "}" + for f, o in files + ) + ) + ) if len(common_keys) == 0 or not (all(is_self) or not any(is_self)): raise ValueError( "TTrees in\n\n {0}\n\nhave no TBranches in common".format( "\n ".join( - "{0}:{1}".format( - f.file_path if o is None else f, - f.object_path if o is None else o, + "{" + + "{0}: {1}".format( + repr(f.file_path if isinstance(f, HasBranches) else f), + repr(f.object_path if isinstance(f, HasBranches) else o), ) + + "}" for f, o in files ) ) diff --git a/uproot4/reading.py b/uproot4/reading.py index 6f13c9ac6..11355aa41 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -41,9 +41,13 @@ def open( ): """ Args: - path (str or Path): Path or URL to open, which may include a colon - separating a file path from an object-within-ROOT path, like - `"root://server/path/to/file.root : internal_directory/my_ttree"`. + path (str or Path): The filesystem path or remote URL of the file to open. + If a string, it may be followed by a colon (`:`) and an object path + within the ROOT file, to return an object, rather than a file. + Path objects are interpreted strictly as filesystem paths or URLs. + Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root", + "rel/file.root:tdirectory/ttree", + Path("rel:/file.root"), Path("/abs/path:stuff.root") parse_object (bool): If False, interpret the `path` purely as a file path (no colon-delimited object path). object_cache (None, MutableMapping, or int): Cache of objects drawn @@ -73,7 +77,23 @@ def open( * minimal_ttree_metadata (bool; True) """ - file_path, object_path = uproot4._util.file_object_path_split(path) + if isinstance(path, dict) and len(path) == 1: + ((file_path, object_path),) = path.items() + + elif uproot4._util.isstr(path): + file_path, object_path = uproot4._util.file_object_path_split(path) + + else: + file_path = path + object_path = None + + file_path = uproot4._util.regularize_path(file_path) + + if not uproot4._util.isstr(file_path): + raise ValueError( + "'path' must be a string, Path, or a length-1 dict of " + "{{file_path: object_path}}, not {0}".format(repr(path)) + ) file = ReadOnlyFile( file_path, @@ -91,7 +111,7 @@ def open( open.defaults = { "file_handler": uproot4.source.file.MemmapSource, - # "xrootd_handler": uproot4.source.xrootd.XRootDSource, + "xrootd_handler": uproot4.source.xrootd.XRootDSource, "http_handler": uproot4.source.http.HTTPSource, "timeout": 30, "max_num_elements": None, diff --git a/uproot4/source/file.py b/uproot4/source/file.py index 293c912bb..09851b944 100644 --- a/uproot4/source/file.py +++ b/uproot4/source/file.py @@ -14,7 +14,10 @@ class FileResource(uproot4.source.chunk.Resource): def __init__(self, file_path): self._file_path = file_path - self._file = open(self._file_path, "rb") + try: + self._file = open(self._file_path, "rb") + except uproot4._util._FileNotFoundError: + raise uproot4._util._file_not_found(file_path) @property def file(self): @@ -52,11 +55,10 @@ def __init__(self, file_path, **options): self._num_requested_bytes = 0 self._file_path = file_path - self._num_bytes = os.path.getsize(self._file_path) - self._executor = uproot4.source.futures.ResourceThreadPoolExecutor( [FileResource(file_path) for x in range(num_workers)] ) + self._num_bytes = os.path.getsize(self._file_path) class MemmapSource(uproot4.source.chunk.Source): diff --git a/uproot4/source/http.py b/uproot4/source/http.py index afa545153..a5037ede1 100644 --- a/uproot4/source/http.py +++ b/uproot4/source/http.py @@ -57,6 +57,10 @@ def get_num_bytes(file_path, parsed_url, timeout): connection.request("HEAD", parsed_url.path) response = connection.getresponse() + if response.status == 404: + connection.close() + raise uproot4._util._file_not_found(file_path, "HTTP(S) returned 404") + if response.status != 200: connection.close() raise OSError( @@ -116,7 +120,13 @@ def task(resource): def get(self, connection, start, stop): response = connection.getresponse() + + if response.status == 404: + connection.close() + raise uproot4._util._file_not_found(self.file_path, "HTTP(S) returned 404") + if response.status != 206: + connection.close() raise OSError( """remote server does not support HTTP range requests for URL {0}""".format( diff --git a/uproot4/source/xrootd.py b/uproot4/source/xrootd.py index b9a0b65fc..908a1a8e9 100644 --- a/uproot4/source/xrootd.py +++ b/uproot4/source/xrootd.py @@ -66,7 +66,7 @@ def __init__(self, file_path, timeout): status, dummy = self._file.open(self._file_path, timeout=self._xrd_timeout()) if status.error: - self._xrd_error(status.message) + self._xrd_error(status) def _xrd_timeout(self): if self._timeout is None: @@ -74,14 +74,21 @@ def _xrd_timeout(self): else: return int(self._timeout) - def _xrd_error(self, message): + def _xrd_error(self, status): self._file.close(timeout=self._xrd_timeout()) - raise OSError( - """XRootD error: {0} + + # https://github.com/xrootd/xrootd/blob/8e91462e76ab969720b40fc324714b84e0b4bd42/src/XrdCl/XrdClStatus.hh#L47-L103 + # https://github.com/xrootd/xrootd/blob/250eced4d3787c2ac5be2c8c922134153bbf7f08/src/XrdCl/XrdClStatus.cc#L34-L74 + if status.code == 101 or status.code == 304 or status.code == 400: + raise uproot4._util._file_not_found(self._file_path, status.message) + + else: + raise OSError( + """XRootD error: {0} in file {1}""".format( - message, self._file_path + status.message, self._file_path + ) ) - ) @property def timeout(self): @@ -95,7 +102,7 @@ def file(self): def num_bytes(self): status, info = self._file.stat(self._xrd_timeout()) if status.error: - self._xrd_error(status.message) + self._xrd_error(status) return info.size def __enter__(self): @@ -107,7 +114,7 @@ def __exit__(self, exception_type, exception_value, traceback): def get(self, start, stop): status, data = self._file.read(start, stop - start, timeout=self._xrd_timeout()) if status.error: - self._xrd_error(status.message) + self._xrd_error(status) return data @property @@ -268,6 +275,6 @@ def chunks(self, ranges, notifications): chunks=request_ranges, callback=callback ) if status.error: - self._resource._xrd_error(status.message) + self._resource._xrd_error(status) return chunks