Skip to content

Commit

Permalink
support queries is alphanum field names
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Jan 21, 2025
1 parent 43e1961 commit 009d765
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 30 deletions.
9 changes: 6 additions & 3 deletions hsds/attr_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,11 @@ def _getValueFromRequest(body, data_type, data_shape):

# check to see if this works with our shape and type
try:
arr = bytesToArray(data, arr_dtype, np_dims)
except ValueError as e:
log.debug(f"data: {data}")
log.debug(f"type: {arr_dtype}")
log.debug(f"np_dims: {np_dims}")
arr = bytesToArray(data, arr_dtype, np_dims)
except ValueError as e:
msg = f"Bad Request: encoded input data doesn't match shape and type: {e}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
Expand Down Expand Up @@ -1405,7 +1405,10 @@ async def DELETE_Attributes(request):
# the query string
attr_names = attr_names_query_string.split(separator)
log.info(f"delete {len(attr_names)} attributes for {obj_id}")
log.debug(f"attr_names: {attr_names}")
if len(attr_names) < 10:
log.debug(f"attr_names: {attr_names}")
else:
log.debug(f"attr_names: deleting {len(attr_names)} attributes")

username, pswd = getUserPasswordFromRequest(request)
await validateUserPassword(app, username, pswd)
Expand Down
3 changes: 1 addition & 2 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@ async def read_chunk_hyperslab(
else:
# convert binary data to numpy array
try:
log.debug(f"np_arr.dtype: {np_arr.dtype}")
log.debug(f"chunk_shape: {chunk_shape}")
chunk_arr = bytesToArray(array_data, np_arr.dtype, chunk_shape)
except ValueError as ve:
Expand All @@ -408,7 +407,7 @@ async def read_chunk_hyperslab(
raise HTTPInternalServerError()
chunk_arr = chunk_arr.reshape(chunk_shape)

log.debug(f"chunk_arr shape: {chunk_arr.shape}, dtype: {chunk_arr.dtype}")
log.debug(f"chunk_arr shape: {chunk_arr.shape}")
log.debug(f"data_sel: {data_sel}")
log.debug(f"np_arr shape: {np_arr.shape}")

Expand Down
1 change: 1 addition & 0 deletions hsds/chunk_dn.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ async def GET_Chunk(request):
msg = f"chunk {chunk_id} no results for query: {query}"
log.debug(msg)
raise HTTPNotFound()
log.debug(f"test - got output_arr: {output_arr}")
else:
# read selected data from chunk
output_arr = chunkReadSelection(chunk_arr, slices=selection, select_dt=select_dt)
Expand Down
12 changes: 8 additions & 4 deletions hsds/chunk_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,11 @@ def _getSelectDtype(params, dset_dtype, body=None):
msg = f"invalid fields selection: {te}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
log.debug(f"using select dtype: {select_dtype}")
if select_dtype:
if len(select_dtype) < 10:
log.debug(f"using select dtype: {select_dtype}")
else:
log.debug(f"using select_dtype with {len(select_dtype)} fields")
else:
select_dtype = dset_dtype # return all fields

Expand Down Expand Up @@ -873,7 +877,7 @@ async def PUT_Value(request):
log.warn(msg)
raise HTTPBadRequest(reason=msg)

msg = f"PUT value - numpy array shape: {arr.shape} dtype: {arr.dtype}"
msg = f"PUT value - numpy array shape: {arr.shape}"
log.debug(msg)

elif request_type == "json":
Expand Down Expand Up @@ -1012,7 +1016,8 @@ async def GET_Value(request):
select_dtype = _getSelectDtype(params, dset_dtype)

log.debug(f"GET Value selection: {slices}")
log.debug(f"dset_dtype: {dset_dtype}, select_dtype: {select_dtype}")
if len(dset_dtype) < 10:
log.debug(f"dset_dtype: {dset_dtype}, select_dtype: {select_dtype}")

limit = _getLimit(params)

Expand Down Expand Up @@ -1317,7 +1322,6 @@ async def POST_Value(request):

slices = _getSelect(params, dset_json, body=body)
select_dtype = _getSelectDtype(params, dset_dtype, body=body)
log.debug(f"got select_dtype: {select_dtype}")

if request_type == "json":
if "points" in body:
Expand Down
1 change: 0 additions & 1 deletion hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,6 @@ async def get_chunk(
filters = getFilters(dset_json)
dset_id = dset_json["id"]
filter_ops = getFilterOps(app, dset_id, filters, dtype=dt, chunk_shape=chunk_dims)
log.debug(f"filter_ops: {filter_ops}")

if s3path:
if s3path.startswith("s3://"):
Expand Down
4 changes: 1 addition & 3 deletions hsds/domain_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ async def get_obj_json(self, obj_id):
self._obj_dict[obj_id] = {"status": status}

log.debug(f"DomainCrawler - got json for {obj_id}")
log.debug(f"obj_json: {obj_json}")

log.debug("store obj json")
self._obj_dict[obj_id] = obj_json # store the obj_json
Expand Down Expand Up @@ -432,7 +431,6 @@ def get_status(self):
status = None
for obj_id in self._obj_dict:
item = self._obj_dict[obj_id]
log.debug(f"item: {item}")
if "status" in item:
item_status = item["status"]
if status is None or item_status > status:
Expand Down Expand Up @@ -564,4 +562,4 @@ async def fetch(self, obj_id):
msg = f"DomainCrawler - fetch complete obj_id: {obj_id}, "
msg += f"{len(self._obj_dict)} objects found"
log.debug(msg)
log.debug(f"obj_dict: {self._obj_dict}")
log.debug(f"obj_dict: {len(self._obj_dict)} items")
12 changes: 10 additions & 2 deletions hsds/dset_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,11 @@ async def doReadSelection(
log.debug(f"chunk_ids: {chunk_ids}")
else:
log.debug(f"chunk_ids: {chunk_ids[:10]} ...")
log.debug(f"doReadSelection - select_dtype: {select_dtype}")
if select_dtype:
if len(select_dtype) < 10:
log.debug(f"doReadSelection - select_dtype: {select_dtype}")
else:
log.debug(f"doReadSelection - select_dtype: {len(select_dtype)} fields")

type_json = dset_json["type"]
item_size = getItemSize(type_json)
Expand All @@ -545,7 +549,11 @@ async def doReadSelection(
else:
log.debug(f"query: {query} limit: {limit}")
query_dtype = getQueryDtype(select_dtype)
log.debug(f"query_dtype: {query_dtype}")
if query_dtype:
if len(query_dtype) < 10:
log.debug(f"query_dtype: {query_dtype}")
else:
log.debug(f"query_dtype {len(query_dtype)}")

# create array to hold response data
arr = None
Expand Down
5 changes: 2 additions & 3 deletions hsds/servicenode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ async def getObjectJson(app,
continue
cache_obj[k] = obj_json[k]
meta_cache[obj_id] = cache_obj
log.debug(f"stored {cache_obj} in meta_cache")
log.debug(f"stored {obj_id} in meta_cache")

return obj_json

Expand Down Expand Up @@ -1199,7 +1199,6 @@ async def createObjectByPath(app,
# create a link to the new object
await putHardLink(app, parent_id, link_title, tgt_id=obj_id, bucket=bucket)
parent_id = obj_id # new parent
log.info(f"createObjectByPath {h5path} done")
log.debug(f" returning obj_json: {obj_json}")
log.info(f"createObjectByPath {h5path} done, returning obj_json")

return obj_json
23 changes: 14 additions & 9 deletions hsds/util/chunkUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,14 +891,17 @@ def chunkReadSelection(chunk_arr, slices=None, select_dt=None):
raise ValueError(msg)

dt = chunk_arr.dtype
log.debug(f"dtype: {dt}")

# get requested data
output_arr = chunk_arr[slices]

if len(select_dt) < len(dt):
# do a field selection
log.debug(f"select_dtype: {select_dt}")
if select_dt:
if len(select_dt) < 10:
log.debug(f"select_dtype: {select_dt}")
else:
log.debug(f"select_dtype: {len(select_dt)} from {len(dt)} fields")
# create an array with just the given fields
arr = np.zeros(output_arr.shape, select_dt)
# slot in each of the given fields
Expand Down Expand Up @@ -1069,7 +1072,8 @@ def chunkWritePoints(chunk_id=None,
dset_dtype = chunk_arr.dtype
if select_dt is None:
select_dt = dset_dtype # no field selection
log.debug(f"dtype: {dset_dtype}")
else:
log.debug(f"select dtype: {dset_dtype}")

# point_arr should have the following type:
# (coord1, coord2, ...) | select_dtype
Expand Down Expand Up @@ -1255,7 +1259,7 @@ def _getEvalStr(query, arr_name, field_names):
for item in black_list:
if item in field_names:
msg = "invalid field name"
log.warn("Bad query: " + msg)
log.warn(f"Bad query: {msg}")
raise ValueError(msg)

if query.startswith("where "):
Expand Down Expand Up @@ -1283,7 +1287,6 @@ def _getEvalStr(query, arr_name, field_names):
if var_name not in field_names:
# invalid
msg = f"query variable: {var_name}"
log.debug(f"field_names: {field_names}")
log.warn("Bad query: " + msg)
raise ValueError(msg)
eval_str += arr_name + "['" + var_name + "']"
Expand All @@ -1298,11 +1301,14 @@ def _getEvalStr(query, arr_name, field_names):
elif ch in ("'", '"'):
end_quote_char = ch
eval_str += ch
elif ch.isalpha() or ch == "_":
elif ch.isalnum() or ch == "_":
if ch == "b" and ch_next in ("'", '"'):
eval_str += "b" # start of a byte string literal
elif var_name is None:
var_name = ch # start of a variable
if ch.isalpha():
var_name = ch # start of a variable
else:
eval_str += ch # assume a numeric value
else:
var_name += ch
elif ch == "(" and end_quote_char is None:
Expand Down Expand Up @@ -1366,8 +1372,7 @@ def chunkQuery(
"""
Run query on chunk and selection
"""
msg = f"chunkQuery - chunk_id: {chunk_id} query: [{query}] slices: {slices}, "
msg += f"limit: {limit} select_dt: {select_dt}"
msg = f"chunkQuery - chunk_id: {chunk_id} query: [{query}] slices: {slices}, limit: {limit}"
log.debug(msg)

if not isinstance(chunk_arr, np.ndarray):
Expand Down
4 changes: 2 additions & 2 deletions hsds/util/dsetUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):

try:
if dset_id in filter_map:
log.debug(f"returning filter from filter_map {filter_map[dset_id]}")
log.debug(f"returning filter from filter_map for dset: {dset_id}")
return filter_map[dset_id]
except TypeError:
log.error(f"getFilterOps TypeError - dset_id: {dset_id} filter_map: {filter_map}")
Expand Down Expand Up @@ -204,7 +204,7 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
# save the chunk shape and dtype
filter_ops["chunk_shape"] = chunk_shape
filter_ops["dtype"] = dtype
log.debug(f"save filter ops: {filter_ops} for {dset_id}")
log.debug(f"save filter ops for {dset_id}")
filter_map[dset_id] = filter_ops # save

return filter_ops
Expand Down
7 changes: 7 additions & 0 deletions tests/unit/bool_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ def __init__(self, *args, **kwargs):
# main

def testExpressions(self):
p = BooleanParser("x1 < 42")
variables = p.getVariables()
self.assertEqual(len(variables), 1)
self.assertTrue("x1" in variables)
self.assertTrue(p.evaluate({"x1": 24}))
eval_str = p.getEvalStr()
self.assertEqual(eval_str, "x1 < 42.0")

p = BooleanParser("x1 < 42")
variables = p.getVariables()
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/chunk_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,7 @@ def testChunkIterator3d(self):
def testGetEvalStr(self):
queries = {}
queries["date == 23"] = "rows['date'] == 23"
queries["tgt123 == 456"] = "rows['tgt123'] == 456"
queries["wind == b'W 5'"] = "rows['wind'] == b'W 5'"
queries["temp > 61"] = "rows['temp'] > 61"
queries["(date >= 22) & (date <= 24)"] = "(rows['date'] >= 22) & (rows['date'] <= 24)"
Expand All @@ -1419,7 +1420,7 @@ def testGetEvalStr(self):
queries["date >= 22 where 'temp' in (61, 68, 72)"] = "rows['date'] >= 22"
queries["date >= 22 where 'temp F' in (61, 68, 72)"] = "rows['date'] >= 22"

fields = ["date", "wind", "temp"]
fields = ["date", "wind", "temp", "tgt123"]

for query in queries.keys():
eval_str = _getEvalStr(query, "rows", fields)
Expand Down

0 comments on commit 009d765

Please sign in to comment.