Skip to content

Commit

Permalink
Refactor of get chart data endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Nov 4, 2024
1 parent 042c129 commit c6670a8
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 51 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies= [
"arrow >=1.2.3",
"awswrangler >=3.5, <4",
"boto3",
"Jinja2 >=3.1.4, <4",
"pandas >=2, <3",
"requests", # scripts only
"rich",
Expand Down
56 changes: 24 additions & 32 deletions src/dashboard/get_chart_data/get_chart_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Lambda for performing joins of site count data.
This is intended to provide an implementation of the logic described in docs/api.md
"""

import logging
import os
import pathlib

import awswrangler
import boto3
import filter_config
import jinja2
import pandas
from shared import decorators, enums, errors, functions

Expand Down Expand Up @@ -45,43 +45,35 @@ def _build_query(query_params: dict, filters: list, path_params: dict) -> str:
"""Creates a query from the dashboard API spec"""
dp_id = path_params["data_package_id"]
columns = _get_table_cols(dp_id)
filter_str = filter_config.get_filter_string(filters)
if filter_str != "":
filter_str = f"AND {filter_str} "
filter_configs = []
for config in filters:
config = config.split(':')
# lightly overkill, but this makes accessing params via jinja filters easier
filter_configs.append(
{'data' : config[0], 'filter_type': config[1], 'bound': config[2] or None }
)
count_col = next(c for c in columns if c.startswith("cnt"))
columns.remove(count_col)
select_str = f"{query_params['column']}, sum({count_col}) as {count_col}"
strat_str = ""
group_str = f"{query_params['column']}"
# the 'if in' check is meant to handle the case where the selected column is also
# present in the filter logic and has already been removed
if query_params["column"] in columns:
columns.remove(query_params["column"])
if "stratifier" in query_params.keys():
select_str = f"{query_params['stratifier']}, {select_str}"
group_str = f"{query_params['stratifier']}, {group_str}"
columns.remove(query_params["stratifier"])
strat_str = f'AND {query_params["stratifier"]} IS NOT NULL '
if len(columns) > 0:
coalesce_str = (
f"WHERE COALESCE (cast({' AS VARCHAR), cast('.join(columns)} AS VARCHAR)) "
"IS NOT NULL AND "
with open(pathlib.Path(__file__).parent / "templates/get_chart_data.sql.jinja") as file:
template = file.read()
loader = jinja2.FileSystemLoader(pathlib.Path(__file__).parent / "templates/")
env = jinja2.Environment(loader=loader).from_string(template) # noqa: S701
query_str = env.render(
data_column=query_params["column"],
stratifier_column= query_params.get("stratifier", None),
count_columns=[count_col],
schema = os.environ.get('GLUE_DB_NAME'),
data_package_id = path_params["data_package_id"],
coalesce_columns = columns,
filter_configs = filter_configs


)
else:
coalesce_str = "WHERE "
query_str = (
f"SELECT {select_str} " # nosec # noqa: S608
f"FROM \"{os.environ.get('GLUE_DB_NAME')}\".\"{dp_id}\" "
f"{coalesce_str}"
f"{query_params['column']} IS NOT NULL "
f"{filter_str}"
f"{strat_str}"
f"GROUP BY {group_str} "
)
if "stratifier" in query_params.keys():
query_str += f"ORDER BY {query_params['stratifier']}, {query_params['column']}"
else:
query_str += f"ORDER BY {query_params['column']}"
print(query_str)
return query_str, count_col


Expand Down
1 change: 1 addition & 0 deletions src/dashboard/get_chart_data/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Jinja2 >= 3.1.4, <4
48 changes: 48 additions & 0 deletions src/dashboard/get_chart_data/templates/filter_numeric.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{%- import 'syntax.sql.jinja' as syntax -%}

{%- set ns =namespace(matches=[]) -%}
{%- set filters=['eq', 'ne', 'gt', 'gte', 'lt', 'lte'] -%}

{%- macro render_filter( data, filter_type, bound) -%}
{%- if filter_type == 'eq'-%}
{{ data }} = {{ bound }}
{%- elif filter_type == 'ne'-%}
{{ data }} != {{ bound }}
{%- elif filter_type == 'gt'-%}
{{ data }} > {{ bound }}
{%- elif filter_type == 'gte'-%}
{{ data }} >= {{ bound }}
{%- elif filter_type == 'lt'-%}
{{ data }} < {{ bound }}
{%- elif filter_type == 'lte'-%}
{{ data }} <= {{ bound }}
{%- else -%}
not found {{ filter_type }}
{%- endif -%}
{%- endmacro -%}

{%- macro is_filter_in_list(filter_configs) -%}
{%- for f in filter_configs -%}
{%- if f.filter_type in filters -%}
{%- set ns.matches = ns.matches + [f] -%}
{%- endif -%}
{% endfor -%}
{%- endmacro -%}

{%- macro get_filters(filter_configs) -%}
{{ is_filter_in_list(filter_configs) }}
{%- for config in ns.matches %}
{{ syntax.and_delineate(loop) }}{{ render_filter(config['data'],config['filter_type'],config['bound']) }}
{%- endfor -%}
{%- set ns.matches = [] -%}
{%- endmacro -%}

{%- macro has_filter(filter_configs) %}
{{- is_filter_in_list(filter_configs) -}}
{%- if ns.matches|count >0 -%}
True
{%- else -%}
False
{%- endif -%}
{%- set ns.matches = [] -%}
{%- endmacro -%}
65 changes: 65 additions & 0 deletions src/dashboard/get_chart_data/templates/get_chart_data.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{%- import 'syntax.sql.jinja' as syntax -%}
{%- import 'filter_numeric.sql.jinja' as numeric -%}
{%-macro select_data(cumulus__none) -%}
SELECT
{%- if stratifier_column %}
"{{ stratifier_column }}",
{%- endif %}
"{{ data_column }}"
{%- if count_columns %},{%- endif %}
{%- for column in count_columns %}
SUM("{{ column }}") AS {{ column }}{{ syntax.comma_delineate(loop) }}
{%- endfor%}
FROM "{{ schema }}"."{{ data_package_id }}"
WHERE
{%- if coalesce_columns %} COALESCE (
{%- for column in coalesce_columns %}
cast("{{ column }}" AS VARCHAR){{ syntax.comma_delineate(loop) }}
{%- endfor%}
) IS NOT NULL
AND{%- endif%} "{{ data_column }}" IS NOT NULL
{%- if not cumulus__none %}
AND {{ data_column }} !='cumulus__none'
{%- if filter_str %}
AND {{ filter_str }}
{%- endif %}
{%- else %}
AND {{ data_column }} ='cumulus__none'
{%- endif %}
{%- if stratifier_column %}
AND "{{ stratifier_column }}" IS NOT NULL
{%- endif %}
GROUP BY
{%- if stratifier_column %}
"{{ stratifier_column }}", "{{ data_column }}"
{%- else %}
"{{ data_column }}"
{%- endif %}
ORDER BY
{%- if stratifier_column %}
"{{ stratifier_column }}", "{{ data_column }}"
{%- else %}
"{{ data_column }}"
{%- endif %}
{%- endmacro -%}
WITH non_null AS (
{{ select_data(False) }}
),
has_null AS (
{{ select_data(True) }}
)
SELECT * FROM non_null
{%- if numeric.has_filter(filter_configs) == 'True' %}
WHERE {{ numeric.get_filters(filter_configs) }}
{%- endif %}
UNION
SELECT * from has_null
{%- if numeric.has_filter(filter_configs) == 'True' %}
WHERE {{ numeric.get_filters(filter_configs) }}
{%- endif %}
ORDER BY
{%- if stratifier_column %}
"{{ stratifier_column }}", "{{ data_column }}"
{%- else %}
"{{ data_column }}"
{%- endif %}
24 changes: 24 additions & 0 deletions src/dashboard/get_chart_data/templates/syntax.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{%- macro comma_delineate(loop) -%}
{%- if not loop.last -%}
,
{%- endif -%}
{%- endmacro -%}


{# Note that the following two delineations are meant to be at the front of the string
in a loop - this is to enable formatting in a WHERE statement like this:
---
WHERE
b.bar = a.foo
AND b.baz != a.foo
---
This is slightly easier to work with when debugging queries (and also
conforms better to the mozilla style guide)
#}
{%- macro and_delineate(loop) -%}
{%- if not loop.first -%}AND {% endif -%}
{%- endmacro -%}

{%- macro or_delineate(loop) -%}
{%- if not loop.first -%}OR {% endif -%}
{%- endmacro -%}
Loading

0 comments on commit c6670a8

Please sign in to comment.