From d6956b64f31e42a2bcbed8f24be59245aaab9372 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 4 Jun 2020 13:25:02 -0500 Subject: [PATCH] Fixing bugs for HATS. (#19) * Add Python id to ReadOnlyObjects and fix fixed-width array reading. * Ensure that Pandas arrays are native-endian. * Try three platforms (just saw @henryiii show it in a meeting). * Needs 'runs-on'. * Put 'goanpeca/setup-miniconda@v1' back in. * That's a separate step. * Drop the 'evals'. * Need 'conda init bash' again (basically putting it back the way it was). * Use it in every step. * No, the old version had a 'shell' parameter, rather than doing 'conda init' in every step. * No XRootD for Windows. * Trim the number of jobs. I'm leaving fast-fast on because I like it. * Fix trimming syntax. * How is there a segfault on Windows? This is all pure Python. * Undo break apart test. I want to see that segfault again. * Break it apart again. I want to see that segfault\! * Run more cases for the test that segfaulted in Windows (memmap, normal file). Accessing a closed memmap is one of the few ways Python can segfault. * Can't reproduce the segfault, so going back to normal mode of operations. --- .github/workflows/build-test.yml | 43 +++++++++++++------ ...st_0017-multi-basket-multi-branch-fetch.py | 35 ++++++++++++++- uproot4/interpretation/library.py | 6 ++- uproot4/interpretation/numerical.py | 2 +- uproot4/reading.py | 12 ++++-- uproot4/source/futures.py | 4 +- 6 files changed, 81 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 9ce4ffdf2..4e039c9d4 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -2,31 +2,48 @@ name: "Test build" on: push: - branches: [ master ] + branches: [ "master" ] pull_request: - branches: [ master ] + branches: [ "master" ] jobs: build: - - runs-on: "ubuntu-latest" strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] + platform: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.5", "3.6", "3.7", "3.8"] + exclude: + - platform: "macos-latest" + python-version: "3.6" + - platform: "macos-latest" + python-version: "3.7" + - platform: "windows-latest" + python-version: "3.6" + - platform: "windows-latest" + python-version: "3.7" + # platform: ["windows-latest"] + # python-version: ["3.5", "3.6", "3.7", "3.8"] + + runs-on: "${{ matrix.platform }}" steps: - uses: "actions/checkout@v2" - - name: "environment for ${{ matrix.python-version }}" + - name: "get conda" + uses: "goanpeca/setup-miniconda@v1" + with: + auto-update-conda: true + + - name: "environment for Python ${{ matrix.python-version }}" run: | - eval "$(conda shell.bash hook)" conda create -n testing python=${{ matrix.python-version }} + conda init bash conda config --add channels conda-forge conda info - name: "install most dependencies" + shell: "bash -l {0}" run: | - eval "$(conda shell.bash hook)" conda activate testing conda env list conda install numpy pandas pytest flake8 @@ -34,32 +51,32 @@ jobs: conda list - name: "install Awkward" + shell: "bash -l {0}" run: | - eval "$(conda shell.bash hook)" conda activate testing conda env list pip install awkward1 conda list - name: "install XRootD" - if: "${{ matrix.python-version != 3.5 }}" + if: "${{ matrix.python-version != 3.5 && !contains(matrix.platform, 'windows') }}" + shell: "bash -l {0}" run: | - eval "$(conda shell.bash hook)" conda activate testing conda env list conda install xrootd conda list - name: "flake8" + shell: "bash -l {0}" run: | - eval "$(conda shell.bash hook)" conda activate testing conda env list python -m flake8 - name: "pytest" + shell: "bash -l {0}" run: | - eval "$(conda shell.bash hook)" conda activate testing conda env list python -m pytest -vv tests diff --git a/tests/test_0017-multi-basket-multi-branch-fetch.py b/tests/test_0017-multi-basket-multi-branch-fetch.py index c5e32cdd9..89d469f26 100644 --- a/tests/test_0017-multi-basket-multi-branch-fetch.py +++ b/tests/test_0017-multi-basket-multi-branch-fetch.py @@ -171,9 +171,13 @@ def test_ranges_or_baskets_to_arrays(): ] -def test_branch_array(): +@pytest.mark.parametrize( + "file_handler", [uproot4.source.file.FileSource, uproot4.source.memmap.MemmapSource] +) +def test_branch_array_1(file_handler): with uproot4.open( - skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root") + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), + file_handler=file_handler, )["sample/i4"] as branch: assert branch.array( uproot4.interpretation.numerical.AsDtype(">i4"), library="np" @@ -210,6 +214,15 @@ def test_branch_array(): 14, ] + +@pytest.mark.parametrize( + "file_handler", [uproot4.source.file.FileSource, uproot4.source.memmap.MemmapSource] +) +def test_branch_array_2(file_handler): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), + file_handler=file_handler + )["sample/i4"] as branch: assert branch.array( uproot4.interpretation.numerical.AsDtype(">i4"), entry_start=3, @@ -240,6 +253,15 @@ def test_branch_array(): 9, ] + +@pytest.mark.parametrize( + "file_handler", [uproot4.source.file.FileSource, uproot4.source.memmap.MemmapSource] +) +def test_branch_array_3(file_handler): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), + file_handler=file_handler + )["sample/i4"] as branch: assert branch.array( uproot4.interpretation.numerical.AsDtype(">i4"), entry_start=3, @@ -271,6 +293,15 @@ def test_branch_array(): 9, ] + +@pytest.mark.parametrize( + "file_handler", [uproot4.source.file.FileSource, uproot4.source.memmap.MemmapSource] +) +def test_branch_array_4(file_handler): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), + file_handler=file_handler + )["sample/i4"] as branch: with pytest.raises(ValueError): branch.array(uproot4.interpretation.numerical.AsDtype(">i8"), library="np") diff --git a/uproot4/interpretation/library.py b/uproot4/interpretation/library.py index a68eaab3b..97bdbdf60 100644 --- a/uproot4/interpretation/library.py +++ b/uproot4/interpretation/library.py @@ -223,7 +223,10 @@ def finalize(self, array, branch): index = pandas.MultiIndex.from_arrays( array.parents_localindex(), names=["entry", "subentry"] ) - return pandas.Series(array.content, index=index) + content = array.content.astype( + array.content.dtype.newbyteorder("="), copy=False + ) + return pandas.Series(content, index=index) elif isinstance(array, uproot4.interpretation.objects.ObjectArray): out = numpy.zeros(len(array), dtype=numpy.object) @@ -232,6 +235,7 @@ def finalize(self, array, branch): return pandas.Series(out) else: + array = array.astype(array.dtype.newbyteorder("="), copy=False) return pandas.Series(array) def group(self, arrays, original_jagged, how): diff --git a/uproot4/interpretation/numerical.py b/uproot4/interpretation/numerical.py index c2394529f..4b58a8c84 100644 --- a/uproot4/interpretation/numerical.py +++ b/uproot4/interpretation/numerical.py @@ -192,7 +192,7 @@ def basket_array(self, data, byte_offsets, basket, branch): dtype, shape = _dtype_shape(self._from_dtype) try: - output = data.view(self._from_dtype).reshape((-1,) + shape) + output = data.view(dtype).reshape((-1,) + shape) except ValueError: raise ValueError( """basket {0} in branch {1} has the wrong number of bytes ({2}) """ diff --git a/uproot4/reading.py b/uproot4/reading.py index 5c506f918..960a254a7 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -204,7 +204,9 @@ def __init__( ) def __repr__(self): - return "".format(repr(self._file_path)) + return "".format( + repr(self._file_path), id(self) + ) def hook_before_create_source(self, **kwargs): pass @@ -665,7 +667,9 @@ def __repr__(self): nameclass = "" else: nameclass = " {0}: {1}".format(self.name(cycle=True), self.classname()) - return "".format(nameclass, self.data_cursor.index) + return "".format( + nameclass, self.data_cursor.index, id(self) + ) def hook_before_read(self, **kwargs): pass @@ -927,7 +931,9 @@ def __init__(self, path, cursor, file, parent): ) def __repr__(self): - return "".format(repr("/" + "/".join(self._path))) + return "".format( + repr("/" + "/".join(self._path)), id(self) + ) def hook_before_read(self, **kwargs): pass diff --git a/uproot4/source/futures.py b/uproot4/source/futures.py index 0c8897a41..297c82b83 100644 --- a/uproot4/source/futures.py +++ b/uproot4/source/futures.py @@ -419,6 +419,7 @@ def __init__(self, resources): resources (iterable of Resource): Resources, such as file handles, to manage; spawns one Thread per Resource. """ + self._closed = False self._work_queue = queue.Queue() self._workers = [ThreadResourceWorker(x, self._work_queue) for x in resources] for thread in self._workers: @@ -461,7 +462,7 @@ def closed(self): True if the associated file/connection/thread pool is closed; False otherwise. """ - return any(thread.is_alive() for thread in self._workers) + return self._closed def _prepare(self, fn, *args, **kwargs): if len(args) != 0 or len(kwargs) != 0: @@ -497,6 +498,7 @@ def shutdown(self, wait=True): Puts None on the `work_queue` until all Threads get the message and shut down. """ + self._closed = True while any(thread.is_alive() for thread in self._workers): for x in range(len(self._workers)): self._work_queue.put(None)