Skip to content

Commit

Permalink
Multi example (#210)
Browse files Browse the repository at this point in the history
* added multimanager example and benchmark

* minor edits for multi mgr examples
  • Loading branch information
jreadey authored Jul 10, 2024
1 parent f060ed6 commit 673169a
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 118 deletions.
8 changes: 6 additions & 2 deletions examples/multi_mgr_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import h5pyd
import logging
import numpy as np
import random
import time
Expand Down Expand Up @@ -34,8 +35,8 @@ def benchmark_multimanager(h5file, num=10):
using the MultiManager.
"""
ds_names = list(h5file.keys())
datsets = [h5file[name] for name in ds_names]
mm = h5pyd.MultiManager(datsets)
datasets = [h5file[name] for name in ds_names]
mm = h5pyd.MultiManager(datasets)

# prepare queries to exclude from runtime
queries = []
Expand Down Expand Up @@ -99,6 +100,9 @@ def run_benchmark(f):
# main
#

loglevel = logging.WARNING
logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel)

# create domain if it does not exist already
with h5pyd.File(DOMAIN_PATH, "a") as f:
run_benchmark(f)
188 changes: 72 additions & 116 deletions examples/notebooks/multi_manager_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -28,7 +28,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -46,21 +46,33 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"data_in[0]: [0 1 2 3 4 5 6 7 8 9]\n",
"data_in[1]: [100 101 102 103 104 105 106 107 108 109]\n",
"data_in[2]: [200 201 202 203 204 205 206 207 208 209]\n",
"data_in[3]: [300 301 302 303 304 305 306 307 308 309]\n"
]
}
],
"source": [
"# initialize some data to write\n",
"data_in = []\n",
"for n in range(DSET_COUNT):\n",
" arr = np.zeros(DSET_SHAPE, dtype=DSET_DTYPE)\n",
" arr[...] = list(range(n*100, n*100+DSET_SHAPE[0]))\n",
" data_in.append(arr)\n"
" data_in.append(arr)\n",
" print(f\"data_in[{n}]: {arr}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -71,51 +83,31 @@
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# verify what get saved to the first dataset\n",
"dset = f[\"dset_0\"]\n",
"dset[...]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109], dtype=int32)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"dset_0: [0 1 2 3 4 5 6 7 8 9]\n",
"dset_1: [100 101 102 103 104 105 106 107 108 109]\n",
"dset_2: [200 201 202 203 204 205 206 207 208 209]\n",
"dset_3: [300 301 302 303 304 305 306 307 308 309]\n"
]
}
],
"source": [
"# and the second dataset\n",
"dset = f[\"dset_1\"]\n",
"dset[...]"
"# verify what get saved to each dataset\n",
"for n in range(DSET_COUNT):\n",
" dset_name = f\"dset_{n}\"\n",
" dset = f[dset_name]\n",
" print(f\"{dset_name}: {dset[...]}\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -124,7 +116,7 @@
"4"
]
},
"execution_count": 21,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -137,71 +129,52 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"data_out[0]: [0 1 2 3 4 5 6 7 8 9]\n",
"data_out[1]: [100 101 102 103 104 105 106 107 108 109]\n",
"data_out[2]: [200 201 202 203 204 205 206 207 208 209]\n",
"data_out[3]: [300 301 302 303 304 305 306 307 308 309]\n"
]
}
],
"source": [
"# get the first item from the returned list\n",
"data_out[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109], dtype=int32)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# and the second item\n",
"data_out[1]"
"# dump the data that was returned\n",
"for n in range(DSET_COUNT):\n",
" print(f\"data_out[{n}]: {data_out[n]}\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 2, 3], dtype=int32)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"data_out[0]: [0 1 2 3]\n",
"data_out[1]: [100 101 102 103]\n",
"data_out[2]: [200 201 202 203]\n",
"data_out[3]: [300 301 302 303]\n"
]
}
],
"source": [
"# rather than reading all the data for a dataset, you can read a given selection\n",
"data_out = mm[0:4]\n",
"data_out[0]"
"for n in range(DSET_COUNT):\n",
" print(f\"data_out[{n}]: {data_out[n]}\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -217,42 +190,25 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1], dtype=int32)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_out[0]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([101, 102], dtype=int32)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"data_out[0]: [0 1]\n",
"data_out[1]: [101 102]\n",
"data_out[2]: [202 203]\n",
"data_out[3]: [303 304]\n"
]
}
],
"source": [
"data_out[1]"
"# dump the data that was returned\n",
"for n in range(DSET_COUNT):\n",
" print(f\"data_out[{n}]: {data_out[n]}\")"

]
}
],
Expand Down

0 comments on commit 673169a

Please sign in to comment.