Skip to content

Commit

Permalink
Fix missing index column in /filter (#1935)
Browse files Browse the repository at this point in the history
* Fix missing index column in /filter

* Fix test

* Fix e2e test
  • Loading branch information
albertvillanova authored Oct 5, 2023
1 parent 41ea211 commit 6e19231
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
8 changes: 3 additions & 5 deletions e2e/tests/test_53_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_filter_endpoint(
headers = auth_headers[auth]
offset = 1
length = 2
where = "col_4 = 'B'"
where = "col_4='B'"
filter_response = poll_until_ready_and_assert(
relative_url=(
f"/filter?dataset={dataset}&config={config}&split={split}&offset={offset}&length={length}&where={where}"
Expand Down Expand Up @@ -47,7 +47,7 @@ def test_filter_endpoint(
"truncated_cells": [],
}, rows[0]
assert rows[1] == {
"row_idx": 2,
"row_idx": 3,
"row": {
"col_1": "The wingman spots the pirateship coming at him and warns the Dark Lord",
"col_2": 3,
Expand Down Expand Up @@ -77,15 +77,13 @@ def test_filter_endpoint(
("col_2<3 OR col_4='B'", 4),
],
)
def test_where_parameter_in_filter_endpoint(
def test_filter_endpoint_parameter_where(
where: str, expected_num_rows: int, hf_public_dataset_repo_csv_data: str
) -> None:
dataset = hf_public_dataset_repo_csv_data
config, split = get_default_config_split()
response = poll_until_ready_and_assert(
relative_url=f"/filter?dataset={dataset}&config={config}&split={split}&where={where}",
expected_status_code=200,
expected_error_code=None,
check_x_revision=True,
)
content = response.json()
Expand Down
5 changes: 4 additions & 1 deletion services/search/src/search/routes/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,10 @@ def execute_filter_query(
) -> tuple[int, pa.Table]:
with duckdb_connect(database=index_file_location) as con:
filter_query = FILTER_QUERY.format(
columns=",".join([f'"{column}"' for column in columns]), where=where, limit=limit, offset=offset
columns=",".join([f'"{column}"' for column in [ROW_IDX_COLUMN] + columns]),
where=where,
limit=limit,
offset=offset,
)
filter_count_query = FILTER_COUNT_QUERY.format(where=where)
try:
Expand Down
2 changes: 1 addition & 1 deletion services/search/tests/routes/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_execute_filter_query(index_file_location: str) -> None:
index_file_location=index_file_location, columns=columns, where=where, limit=limit, offset=offset
)
assert num_rows_total == 2
assert pa_table == pa.Table.from_pydict({"name": ["Simone"], "age": [30]})
assert pa_table == pa.Table.from_pydict({"__hf_index_id": [3], "name": ["Simone"], "age": [30]})


@pytest.mark.parametrize("where", ["non-existing-column=30", "name=30", "name>30"])
Expand Down

0 comments on commit 6e19231

Please sign in to comment.