diff --git a/apps/wizard/app_pages/anomalist/app.py b/apps/wizard/app_pages/anomalist/app.py new file mode 100644 index 00000000000..73db97711f9 --- /dev/null +++ b/apps/wizard/app_pages/anomalist/app.py @@ -0,0 +1,220 @@ +import pandas as pd +import streamlit as st + +from apps.wizard.utils import cached +from apps.wizard.utils.components import grapher_chart + +# PAGE CONFIG +st.set_page_config( + page_title="Wizard: Anomalist", + page_icon="🪄", + layout="wide", +) +# OTHER CONFIG +ANOMALY_TYPES = [ + { + "title": "Time change", + "color": "orange", + "icon": ":material/timeline", + }, + { + "title": "Version change", + "color": "blue", + "icon": ":material/upgrade", + }, + { + "title": "Missing point", + "color": "red", + "icon": ":material/hide_source", + }, + { + "title": "AI", + "color": "rainbow", + "icon": ":material/lightbulb", + }, +] +ANOMALY_TYPE_NAMES = [a["title"] for a in ANOMALY_TYPES] +ANOMALY_TYPE_DISPLAY = {a["title"]: f":{a['color']}-background[{a['icon']}: {a['title']}]" for a in ANOMALY_TYPES} +# +# SESSION STATE +st.session_state.datasets_selected = st.session_state.get("datasets_selected", []) +st.session_state.indicators = st.session_state.get("indicators", []) + +st.session_state.anomalist_filter_entities = st.session_state.get("anomalist_filter_entities", []) +st.session_state.anomalist_filter_indicators = st.session_state.get("anomalist_filter_indicators", []) + +# DEBUGGING +ENTITIES = [ + "Afghanistan", + "Albania", + "Algeria", +] +YEAR_MIN = 1950 +YEAR_MAX = 2021 +ANOMALIES = [ + { + "title": "Coal consumption - Malaysia - 1983", + "description": "There are 12 missing points that used to be informed in the previous version", + "category": "Missing point", + "country": "Malaysia", + "year": 1983, + }, + { + "title": "Gas production - Ireland - 2000", + "description": "There are 2 abrupt changes in the time series.", + "category": "Time change", + "country": "Ireland", + "year": 2000, + }, + { + "title": "Nuclear production - France - 2010", + "description": "There is 1 abrupt changes in the time series.", + "category": "AI", + "country": "France", + "year": 2010, + }, +] +ANOMALIES = ANOMALIES + ANOMALIES + ANOMALIES + ANOMALIES +DATASETS_DEBUG = ["grapher/energy/2024-06-20/energy_mix"] + +# PAGE TITLE +st.title(":material/planner_review: Anomalist") + + +# DATASET SEARCH +st.markdown( + """ + """, + unsafe_allow_html=True, +) +with st.form(key="dataset_search"): + st.session_state.datasets_selected = st.multiselect( + "Select datasets", + options=cached.load_dataset_uris(), + max_selections=1, + default=DATASETS_DEBUG, + ) + + st.form_submit_button("Detect anomalies", type="primary") + + +# st.session_state.datasets_selected = DATASETS_DEBUG + +# FILTER PARAMS +with st.container(border=True): + st.markdown("##### Select filters") + indicator_uris = [] + if len(st.session_state.datasets_selected) > 0: + st.session_state.indicators = cached.load_variables_in_dataset( + st.session_state.datasets_selected, + ) + indicator_uris = cached.get_variable_uris(st.session_state.indicators, True) + + col1, col2 = st.columns([10, 2]) + # Indicator + with col1: + st.session_state.anomalist_filter_indicators = st.multiselect( + label="Indicators", + options=indicator_uris, + help="Show anomalies affecting only a selection of indicators.", + ) + + with col2: + # Entity + st.session_state.anomalist_filter_entities = st.multiselect( + label="Entities", + options=ENTITIES, + help="Show anomalies affecting only a selection of entities.", + ) + + # Anomaly type + col1, col2 = st.columns([10, 3]) + with col1: + st.slider( + label="Years", + min_value=YEAR_MIN, + max_value=YEAR_MAX, + value=(YEAR_MIN, YEAR_MAX), + help="Show anomalies occuring in a particular time range.", + ) + with col2: + col21, col22 = st.columns(2) + with col21: + # Anomaly sorting + st.multiselect( + label="Anomaly type", + options=ANOMALY_TYPE_NAMES, + # default=ANOMALY_TYPES, + ) + with col22: + # Anomaly sorting + st.multiselect( + label="Sort by", + options=[ + "Anomaly score", + "Population", + "Chart views", + ], + ) + + # st.multiselect("Anomaly type", min_value=0.0, max_value=1.0, value=0.5, step=0.01) + # st.number_input("Minimum score", min_value=0.0, max_value=1.0, value=0.5, step=0.01) + +# SHOW ANOMALIES +data = { + "anomaly": ["Anomaly 1", "Anomaly 2", "Anomaly 3"], + "description": ["Description 1", "Description 2", "Description 3"], +} + + +# SHOW ANOMALIES +def show_anomaly(anomaly, indicator): + with st.container(border=True): + col1, col2 = st.columns(2) + with col1: + st.markdown(ANOMALY_TYPE_DISPLAY[anomaly["category"]]) + st.markdown(f"##### {anomaly['title']}") + st.markdown(f"{anomaly['description']}") + with col2: + # st.write(indicator.id) + grapher_chart(variable_id=indicator.id, selected_entities=[anomaly["country"]]) + + +def trigger_dialog_for_df_selection(df: pd.DataFrame): + if len(st.session_state.anomalies["selection"]["rows"]) > 0: + # Get selected row number + row_num = st.session_state.anomalies["selection"]["rows"][0] + # Get indicator id + indicator_id = df.index[row_num] + action(indicator_id) + + +@st.dialog("Show anomaly", width="large") +def action(indicator_id): + grapher_chart(variable_id=indicator_id) + + +# If any indicator is given, show the anomalies +if len(st.session_state.indicators) > 0: + for index, anomaly in enumerate(ANOMALIES): + # Pic random indicator + indicator = st.session_state.indicators[index * 3] + show_anomaly(ANOMALIES[index], indicator) + # df = pd.DataFrame( + # { + # "indicator_id": [i.id for i in st.session_state.indicators], + # "reviewed": [False for i in st.session_state.indicators], + # }, + # ).set_index("indicator_id") + + # st.dataframe( + # df, + # key="anomalies", + # selection_mode="single-row", + # on_select=lambda df=df: trigger_dialog_for_df_selection(df), + # use_container_width=True, + # ) diff --git a/apps/wizard/app_pages/anomalist.py b/apps/wizard/app_pages/anomalist/old.py similarity index 100% rename from apps/wizard/app_pages/anomalist.py rename to apps/wizard/app_pages/anomalist/old.py diff --git a/apps/wizard/app_pages/anomalist_2.py b/apps/wizard/app_pages/anomalist_2.py deleted file mode 100644 index 61369b2880a..00000000000 --- a/apps/wizard/app_pages/anomalist_2.py +++ /dev/null @@ -1,110 +0,0 @@ -import pandas as pd -import streamlit as st - -from apps.wizard.utils import cached -from apps.wizard.utils.components import grapher_chart, st_horizontal - -# PAGE CONFIG -st.set_page_config( - page_title="Wizard: Anomalist", - page_icon="🪄", -) -# OTHER CONFIG -ANOMALY_TYPES = [ - "Upgrade", - "Abrupt change", - "Context change", -] - -# SESSION STATE -st.session_state.datasets_selected = st.session_state.get("datasets_selected", []) -st.session_state.filter_indicators = st.session_state.get("filter_indicators", []) -st.session_state.indicators = st.session_state.get("indicators", []) - -# PAGE TITLE -st.title(":material/planner_review: Anomalist") - - -# DATASET SEARCH -st.markdown( - """ - """, - unsafe_allow_html=True, -) -with st.form(key="dataset_search"): - st.session_state.datasets_selected = st.multiselect( - "Select datasets", - options=cached.load_dataset_uris(), - max_selections=1, - ) - - st.form_submit_button("Detect anomalies", type="primary") - - -# FILTER PARAMS -with st.container(border=True): - st.markdown("##### Filter Parameters") - options = [] - if len(st.session_state.datasets_selected) > 0: - st.session_state.indicators = cached.load_variables_in_dataset(st.session_state.datasets_selected) - options = [o.catalogPath for o in st.session_state.indicators] - - st.session_state.filter_indicators = st.multiselect( - label="Indicator", - options=options, - ) - - with st_horizontal(): - st.session_state.filter_indicators = st.multiselect( - label="Indicator type", - options=["New indicator", "Indicator upgrade"], - ) - st.session_state.filter_indicators = st.multiselect( - label="Anomaly type", - options=ANOMALY_TYPES, - ) - - # st.multiselect("Anomaly type", min_value=0.0, max_value=1.0, value=0.5, step=0.01) - st.number_input("Minimum score", min_value=0.0, max_value=1.0, value=0.5, step=0.01) - -# SHOW ANOMALIES -data = { - "anomaly": ["Anomaly 1", "Anomaly 2", "Anomaly 3"], - "description": ["Description 1", "Description 2", "Description 3"], -} - - -# SHOW ANOMALIES -def show_anomaly(df: pd.DataFrame): - if len(st.session_state.anomalies["selection"]["rows"]) > 0: - # Get selected row number - row_num = st.session_state.anomalies["selection"]["rows"][0] - # Get indicator id - indicator_id = df.index[row_num] - action(indicator_id) - - -@st.dialog("Show anomaly", width="large") -def action(indicator_id): - grapher_chart(variable_id=indicator_id) - - -if len(st.session_state.indicators) > 0: - df = pd.DataFrame( - { - "indicator_id": [i.id for i in st.session_state.indicators], - "reviewed": [False for i in st.session_state.indicators], - }, - ).set_index("indicator_id") - - st.dataframe( - df, - key="anomalies", - selection_mode="single-row", - on_select=lambda df=df: show_anomaly(df), - use_container_width=True, - ) diff --git a/apps/wizard/config/config.yml b/apps/wizard/config/config.yml index f445f1dd50f..0ec6f7414fd 100644 --- a/apps/wizard/config/config.yml +++ b/apps/wizard/config/config.yml @@ -99,6 +99,15 @@ sections: image_url: "https://superheroetc.wordpress.com/wp-content/uploads/2017/05/bulbasaur-line.jpg" disable: production: True + - title: "Anomalist" + alias: anomalist + entrypoint: app_pages/anomalist/app.py + description: List anomalies in data + maintainer: "@lucas" + icon: ":material/planner_review:" + image_url: "https://i0.pickpik.com/photos/87/645/315/halloween-ghosts-happy-halloween-ghost-preview.jpg" + disable: + production: True - title: "Chart Diff" alias: chart-diff entrypoint: app_pages/chart_diff/app.py @@ -108,15 +117,6 @@ sections: image_url: "https://static.wikia.nocookie.net/dragonball/images/6/60/FusionDanceFinaleGotenTrunksBuuSaga.png" disable: production: True - - title: "Anomalist" - alias: anomalist - entrypoint: app_pages/anomalist_2.py - description: List anomalies in data - maintainer: "@lucas" - icon: ":material/planner_review:" - image_url: "https://superheroetc.wordpress.com/wp-content/uploads/2017/05/bulbasaur-line.jpg" - disable: - production: True - title: "Harmonizer" alias: harmonizer description: "Harmonize a column of a table" @@ -124,20 +124,6 @@ sections: entrypoint: app_pages/harmonizer.py icon: ":material/music_note:" image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/C_triad.svg/2560px-C_triad.svg.png" - - title: "Map Bracketer" - alias: map_brackets - entrypoint: app_pages/map_brackets.py - description: Create optimal map brackets - maintainer: "@pablo" - icon: ":material/map:" - image_url: "https://upload.wikimedia.org/wikipedia/en/8/8c/Human_Language_Families_Map_%28Wikipedia_Colors_.PNG" - - title: "Explorer editor" - alias: explorer_editor - entrypoint: app_pages/explorer_edit.py - description: Edit explorer config - maintainer: "@lucas" - icon: ":material/explore:" - image_url: "https://upload.wikimedia.org/wikipedia/en/1/18/Dora_the_Explorer_2004_album_cover.jpg" - title: "Monitoring" description: |- @@ -158,6 +144,25 @@ sections: icon: ":material/search:" image_url: "https://upload.wikimedia.org/wikipedia/commons/c/c3/NGC_4414_%28NASA-med%29.jpg" + - title: "Explorers" + description: |- + Explorer tools. + apps: + - title: "Map Bracketer" + alias: map_brackets + entrypoint: app_pages/map_brackets.py + description: Create optimal map brackets + maintainer: "@pablo" + icon: ":material/map:" + image_url: "https://upload.wikimedia.org/wikipedia/en/8/8c/Human_Language_Families_Map_%28Wikipedia_Colors_.PNG" + - title: "ID to Path" + alias: explorer_editor + entrypoint: app_pages/explorer_edit.py + description: Migrate id-based explorers + maintainer: "@lucas" + icon: ":material/explore:" + image_url: "https://upload.wikimedia.org/wikipedia/en/1/18/Dora_the_Explorer_2004_album_cover.jpg" + - title: "Research" description: |- Research tools. diff --git a/apps/wizard/utils/cached.py b/apps/wizard/utils/cached.py index 58d0ffbeee5..d3a461a5d73 100644 --- a/apps/wizard/utils/cached.py +++ b/apps/wizard/utils/cached.py @@ -3,14 +3,28 @@ import pandas as pd import streamlit as st -from etl import grapher_io as io +from etl import grapher_io as gio from etl.config import OWID_ENV, OWIDEnv from etl.grapher_model import Variable +@st.cache_data +def get_variable_uris(indicators: List[Variable], only_slug: Optional[bool] = False) -> List[str]: + options = [o.catalogPath for o in indicators] + if only_slug: + options = [o.rsplit("/", 1)[-1] if isinstance(o, str) else "" for o in options] + return options # type: ignore + + +@st.cache_data +def load_dataset_uris_new_in_server() -> List[str]: + """Load URIs of datasets that are new in staging server.""" + return gio.load_dataset_uris() + + @st.cache_data def load_dataset_uris() -> List[str]: - return io.load_dataset_uris() + return gio.load_dataset_uris() @st.cache_data @@ -19,7 +33,7 @@ def load_variables_in_dataset( _owid_env: OWIDEnv = OWID_ENV, ) -> List[Variable]: """Load Variable objects that belong to a dataset with URI `dataset_uri`.""" - return io.load_variables_in_dataset(dataset_uri, _owid_env) + return gio.load_variables_in_dataset(dataset_uri, _owid_env) @st.cache_data @@ -29,7 +43,7 @@ def load_variable_metadata( variable: Optional[Variable] = None, _owid_env: OWIDEnv = OWID_ENV, ) -> Dict[str, Any]: - return io.load_variable_metadata( + return gio.load_variable_metadata( catalog_path=catalog_path, variable_id=variable_id, variable=variable, @@ -44,7 +58,7 @@ def load_variable_data( variable: Optional[Variable] = None, _owid_env: OWIDEnv = OWID_ENV, ) -> pd.DataFrame: - return io.load_variable_data( + return gio.load_variable_data( catalog_path=catalog_path, variable_id=variable_id, variable=variable, diff --git a/apps/wizard/utils/components.py b/apps/wizard/utils/components.py index 52b5cc445c0..82c3ff1156b 100644 --- a/apps/wizard/utils/components.py +++ b/apps/wizard/utils/components.py @@ -9,7 +9,7 @@ import streamlit.components.v1 as components from etl.config import OWID_ENV, OWIDEnv -from etl.grapher_io import load_variable_data +from etl.grapher_io import ensure_load_variable, load_variable_data from etl.grapher_model import Variable HORIZONTAL_STYLE = """