diff --git a/apps/wizard/app_pages/anomalist/app.py b/apps/wizard/app_pages/anomalist/app.py
new file mode 100644
index 00000000000..73db97711f9
--- /dev/null
+++ b/apps/wizard/app_pages/anomalist/app.py
@@ -0,0 +1,220 @@
+import pandas as pd
+import streamlit as st
+
+from apps.wizard.utils import cached
+from apps.wizard.utils.components import grapher_chart
+
+# PAGE CONFIG
+st.set_page_config(
+ page_title="Wizard: Anomalist",
+ page_icon="🪄",
+ layout="wide",
+)
+# OTHER CONFIG
+ANOMALY_TYPES = [
+ {
+ "title": "Time change",
+ "color": "orange",
+ "icon": ":material/timeline",
+ },
+ {
+ "title": "Version change",
+ "color": "blue",
+ "icon": ":material/upgrade",
+ },
+ {
+ "title": "Missing point",
+ "color": "red",
+ "icon": ":material/hide_source",
+ },
+ {
+ "title": "AI",
+ "color": "rainbow",
+ "icon": ":material/lightbulb",
+ },
+]
+ANOMALY_TYPE_NAMES = [a["title"] for a in ANOMALY_TYPES]
+ANOMALY_TYPE_DISPLAY = {a["title"]: f":{a['color']}-background[{a['icon']}: {a['title']}]" for a in ANOMALY_TYPES}
+#
+# SESSION STATE
+st.session_state.datasets_selected = st.session_state.get("datasets_selected", [])
+st.session_state.indicators = st.session_state.get("indicators", [])
+
+st.session_state.anomalist_filter_entities = st.session_state.get("anomalist_filter_entities", [])
+st.session_state.anomalist_filter_indicators = st.session_state.get("anomalist_filter_indicators", [])
+
+# DEBUGGING
+ENTITIES = [
+ "Afghanistan",
+ "Albania",
+ "Algeria",
+]
+YEAR_MIN = 1950
+YEAR_MAX = 2021
+ANOMALIES = [
+ {
+ "title": "Coal consumption - Malaysia - 1983",
+ "description": "There are 12 missing points that used to be informed in the previous version",
+ "category": "Missing point",
+ "country": "Malaysia",
+ "year": 1983,
+ },
+ {
+ "title": "Gas production - Ireland - 2000",
+ "description": "There are 2 abrupt changes in the time series.",
+ "category": "Time change",
+ "country": "Ireland",
+ "year": 2000,
+ },
+ {
+ "title": "Nuclear production - France - 2010",
+ "description": "There is 1 abrupt changes in the time series.",
+ "category": "AI",
+ "country": "France",
+ "year": 2010,
+ },
+]
+ANOMALIES = ANOMALIES + ANOMALIES + ANOMALIES + ANOMALIES
+DATASETS_DEBUG = ["grapher/energy/2024-06-20/energy_mix"]
+
+# PAGE TITLE
+st.title(":material/planner_review: Anomalist")
+
+
+# DATASET SEARCH
+st.markdown(
+ """
+ """,
+ unsafe_allow_html=True,
+)
+with st.form(key="dataset_search"):
+ st.session_state.datasets_selected = st.multiselect(
+ "Select datasets",
+ options=cached.load_dataset_uris(),
+ max_selections=1,
+ default=DATASETS_DEBUG,
+ )
+
+ st.form_submit_button("Detect anomalies", type="primary")
+
+
+# st.session_state.datasets_selected = DATASETS_DEBUG
+
+# FILTER PARAMS
+with st.container(border=True):
+ st.markdown("##### Select filters")
+ indicator_uris = []
+ if len(st.session_state.datasets_selected) > 0:
+ st.session_state.indicators = cached.load_variables_in_dataset(
+ st.session_state.datasets_selected,
+ )
+ indicator_uris = cached.get_variable_uris(st.session_state.indicators, True)
+
+ col1, col2 = st.columns([10, 2])
+ # Indicator
+ with col1:
+ st.session_state.anomalist_filter_indicators = st.multiselect(
+ label="Indicators",
+ options=indicator_uris,
+ help="Show anomalies affecting only a selection of indicators.",
+ )
+
+ with col2:
+ # Entity
+ st.session_state.anomalist_filter_entities = st.multiselect(
+ label="Entities",
+ options=ENTITIES,
+ help="Show anomalies affecting only a selection of entities.",
+ )
+
+ # Anomaly type
+ col1, col2 = st.columns([10, 3])
+ with col1:
+ st.slider(
+ label="Years",
+ min_value=YEAR_MIN,
+ max_value=YEAR_MAX,
+ value=(YEAR_MIN, YEAR_MAX),
+ help="Show anomalies occuring in a particular time range.",
+ )
+ with col2:
+ col21, col22 = st.columns(2)
+ with col21:
+ # Anomaly sorting
+ st.multiselect(
+ label="Anomaly type",
+ options=ANOMALY_TYPE_NAMES,
+ # default=ANOMALY_TYPES,
+ )
+ with col22:
+ # Anomaly sorting
+ st.multiselect(
+ label="Sort by",
+ options=[
+ "Anomaly score",
+ "Population",
+ "Chart views",
+ ],
+ )
+
+ # st.multiselect("Anomaly type", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
+ # st.number_input("Minimum score", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
+
+# SHOW ANOMALIES
+data = {
+ "anomaly": ["Anomaly 1", "Anomaly 2", "Anomaly 3"],
+ "description": ["Description 1", "Description 2", "Description 3"],
+}
+
+
+# SHOW ANOMALIES
+def show_anomaly(anomaly, indicator):
+ with st.container(border=True):
+ col1, col2 = st.columns(2)
+ with col1:
+ st.markdown(ANOMALY_TYPE_DISPLAY[anomaly["category"]])
+ st.markdown(f"##### {anomaly['title']}")
+ st.markdown(f"{anomaly['description']}")
+ with col2:
+ # st.write(indicator.id)
+ grapher_chart(variable_id=indicator.id, selected_entities=[anomaly["country"]])
+
+
+def trigger_dialog_for_df_selection(df: pd.DataFrame):
+ if len(st.session_state.anomalies["selection"]["rows"]) > 0:
+ # Get selected row number
+ row_num = st.session_state.anomalies["selection"]["rows"][0]
+ # Get indicator id
+ indicator_id = df.index[row_num]
+ action(indicator_id)
+
+
+@st.dialog("Show anomaly", width="large")
+def action(indicator_id):
+ grapher_chart(variable_id=indicator_id)
+
+
+# If any indicator is given, show the anomalies
+if len(st.session_state.indicators) > 0:
+ for index, anomaly in enumerate(ANOMALIES):
+ # Pic random indicator
+ indicator = st.session_state.indicators[index * 3]
+ show_anomaly(ANOMALIES[index], indicator)
+ # df = pd.DataFrame(
+ # {
+ # "indicator_id": [i.id for i in st.session_state.indicators],
+ # "reviewed": [False for i in st.session_state.indicators],
+ # },
+ # ).set_index("indicator_id")
+
+ # st.dataframe(
+ # df,
+ # key="anomalies",
+ # selection_mode="single-row",
+ # on_select=lambda df=df: trigger_dialog_for_df_selection(df),
+ # use_container_width=True,
+ # )
diff --git a/apps/wizard/app_pages/anomalist.py b/apps/wizard/app_pages/anomalist/old.py
similarity index 100%
rename from apps/wizard/app_pages/anomalist.py
rename to apps/wizard/app_pages/anomalist/old.py
diff --git a/apps/wizard/app_pages/anomalist_2.py b/apps/wizard/app_pages/anomalist_2.py
deleted file mode 100644
index 61369b2880a..00000000000
--- a/apps/wizard/app_pages/anomalist_2.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import pandas as pd
-import streamlit as st
-
-from apps.wizard.utils import cached
-from apps.wizard.utils.components import grapher_chart, st_horizontal
-
-# PAGE CONFIG
-st.set_page_config(
- page_title="Wizard: Anomalist",
- page_icon="🪄",
-)
-# OTHER CONFIG
-ANOMALY_TYPES = [
- "Upgrade",
- "Abrupt change",
- "Context change",
-]
-
-# SESSION STATE
-st.session_state.datasets_selected = st.session_state.get("datasets_selected", [])
-st.session_state.filter_indicators = st.session_state.get("filter_indicators", [])
-st.session_state.indicators = st.session_state.get("indicators", [])
-
-# PAGE TITLE
-st.title(":material/planner_review: Anomalist")
-
-
-# DATASET SEARCH
-st.markdown(
- """
- """,
- unsafe_allow_html=True,
-)
-with st.form(key="dataset_search"):
- st.session_state.datasets_selected = st.multiselect(
- "Select datasets",
- options=cached.load_dataset_uris(),
- max_selections=1,
- )
-
- st.form_submit_button("Detect anomalies", type="primary")
-
-
-# FILTER PARAMS
-with st.container(border=True):
- st.markdown("##### Filter Parameters")
- options = []
- if len(st.session_state.datasets_selected) > 0:
- st.session_state.indicators = cached.load_variables_in_dataset(st.session_state.datasets_selected)
- options = [o.catalogPath for o in st.session_state.indicators]
-
- st.session_state.filter_indicators = st.multiselect(
- label="Indicator",
- options=options,
- )
-
- with st_horizontal():
- st.session_state.filter_indicators = st.multiselect(
- label="Indicator type",
- options=["New indicator", "Indicator upgrade"],
- )
- st.session_state.filter_indicators = st.multiselect(
- label="Anomaly type",
- options=ANOMALY_TYPES,
- )
-
- # st.multiselect("Anomaly type", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
- st.number_input("Minimum score", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
-
-# SHOW ANOMALIES
-data = {
- "anomaly": ["Anomaly 1", "Anomaly 2", "Anomaly 3"],
- "description": ["Description 1", "Description 2", "Description 3"],
-}
-
-
-# SHOW ANOMALIES
-def show_anomaly(df: pd.DataFrame):
- if len(st.session_state.anomalies["selection"]["rows"]) > 0:
- # Get selected row number
- row_num = st.session_state.anomalies["selection"]["rows"][0]
- # Get indicator id
- indicator_id = df.index[row_num]
- action(indicator_id)
-
-
-@st.dialog("Show anomaly", width="large")
-def action(indicator_id):
- grapher_chart(variable_id=indicator_id)
-
-
-if len(st.session_state.indicators) > 0:
- df = pd.DataFrame(
- {
- "indicator_id": [i.id for i in st.session_state.indicators],
- "reviewed": [False for i in st.session_state.indicators],
- },
- ).set_index("indicator_id")
-
- st.dataframe(
- df,
- key="anomalies",
- selection_mode="single-row",
- on_select=lambda df=df: show_anomaly(df),
- use_container_width=True,
- )
diff --git a/apps/wizard/config/config.yml b/apps/wizard/config/config.yml
index f445f1dd50f..0ec6f7414fd 100644
--- a/apps/wizard/config/config.yml
+++ b/apps/wizard/config/config.yml
@@ -99,6 +99,15 @@ sections:
image_url: "https://superheroetc.wordpress.com/wp-content/uploads/2017/05/bulbasaur-line.jpg"
disable:
production: True
+ - title: "Anomalist"
+ alias: anomalist
+ entrypoint: app_pages/anomalist/app.py
+ description: List anomalies in data
+ maintainer: "@lucas"
+ icon: ":material/planner_review:"
+ image_url: "https://i0.pickpik.com/photos/87/645/315/halloween-ghosts-happy-halloween-ghost-preview.jpg"
+ disable:
+ production: True
- title: "Chart Diff"
alias: chart-diff
entrypoint: app_pages/chart_diff/app.py
@@ -108,15 +117,6 @@ sections:
image_url: "https://static.wikia.nocookie.net/dragonball/images/6/60/FusionDanceFinaleGotenTrunksBuuSaga.png"
disable:
production: True
- - title: "Anomalist"
- alias: anomalist
- entrypoint: app_pages/anomalist_2.py
- description: List anomalies in data
- maintainer: "@lucas"
- icon: ":material/planner_review:"
- image_url: "https://superheroetc.wordpress.com/wp-content/uploads/2017/05/bulbasaur-line.jpg"
- disable:
- production: True
- title: "Harmonizer"
alias: harmonizer
description: "Harmonize a column of a table"
@@ -124,20 +124,6 @@ sections:
entrypoint: app_pages/harmonizer.py
icon: ":material/music_note:"
image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/C_triad.svg/2560px-C_triad.svg.png"
- - title: "Map Bracketer"
- alias: map_brackets
- entrypoint: app_pages/map_brackets.py
- description: Create optimal map brackets
- maintainer: "@pablo"
- icon: ":material/map:"
- image_url: "https://upload.wikimedia.org/wikipedia/en/8/8c/Human_Language_Families_Map_%28Wikipedia_Colors_.PNG"
- - title: "Explorer editor"
- alias: explorer_editor
- entrypoint: app_pages/explorer_edit.py
- description: Edit explorer config
- maintainer: "@lucas"
- icon: ":material/explore:"
- image_url: "https://upload.wikimedia.org/wikipedia/en/1/18/Dora_the_Explorer_2004_album_cover.jpg"
- title: "Monitoring"
description: |-
@@ -158,6 +144,25 @@ sections:
icon: ":material/search:"
image_url: "https://upload.wikimedia.org/wikipedia/commons/c/c3/NGC_4414_%28NASA-med%29.jpg"
+ - title: "Explorers"
+ description: |-
+ Explorer tools.
+ apps:
+ - title: "Map Bracketer"
+ alias: map_brackets
+ entrypoint: app_pages/map_brackets.py
+ description: Create optimal map brackets
+ maintainer: "@pablo"
+ icon: ":material/map:"
+ image_url: "https://upload.wikimedia.org/wikipedia/en/8/8c/Human_Language_Families_Map_%28Wikipedia_Colors_.PNG"
+ - title: "ID to Path"
+ alias: explorer_editor
+ entrypoint: app_pages/explorer_edit.py
+ description: Migrate id-based explorers
+ maintainer: "@lucas"
+ icon: ":material/explore:"
+ image_url: "https://upload.wikimedia.org/wikipedia/en/1/18/Dora_the_Explorer_2004_album_cover.jpg"
+
- title: "Research"
description: |-
Research tools.
diff --git a/apps/wizard/utils/cached.py b/apps/wizard/utils/cached.py
index 58d0ffbeee5..d3a461a5d73 100644
--- a/apps/wizard/utils/cached.py
+++ b/apps/wizard/utils/cached.py
@@ -3,14 +3,28 @@
import pandas as pd
import streamlit as st
-from etl import grapher_io as io
+from etl import grapher_io as gio
from etl.config import OWID_ENV, OWIDEnv
from etl.grapher_model import Variable
+@st.cache_data
+def get_variable_uris(indicators: List[Variable], only_slug: Optional[bool] = False) -> List[str]:
+ options = [o.catalogPath for o in indicators]
+ if only_slug:
+ options = [o.rsplit("/", 1)[-1] if isinstance(o, str) else "" for o in options]
+ return options # type: ignore
+
+
+@st.cache_data
+def load_dataset_uris_new_in_server() -> List[str]:
+ """Load URIs of datasets that are new in staging server."""
+ return gio.load_dataset_uris()
+
+
@st.cache_data
def load_dataset_uris() -> List[str]:
- return io.load_dataset_uris()
+ return gio.load_dataset_uris()
@st.cache_data
@@ -19,7 +33,7 @@ def load_variables_in_dataset(
_owid_env: OWIDEnv = OWID_ENV,
) -> List[Variable]:
"""Load Variable objects that belong to a dataset with URI `dataset_uri`."""
- return io.load_variables_in_dataset(dataset_uri, _owid_env)
+ return gio.load_variables_in_dataset(dataset_uri, _owid_env)
@st.cache_data
@@ -29,7 +43,7 @@ def load_variable_metadata(
variable: Optional[Variable] = None,
_owid_env: OWIDEnv = OWID_ENV,
) -> Dict[str, Any]:
- return io.load_variable_metadata(
+ return gio.load_variable_metadata(
catalog_path=catalog_path,
variable_id=variable_id,
variable=variable,
@@ -44,7 +58,7 @@ def load_variable_data(
variable: Optional[Variable] = None,
_owid_env: OWIDEnv = OWID_ENV,
) -> pd.DataFrame:
- return io.load_variable_data(
+ return gio.load_variable_data(
catalog_path=catalog_path,
variable_id=variable_id,
variable=variable,
diff --git a/apps/wizard/utils/components.py b/apps/wizard/utils/components.py
index 52b5cc445c0..82c3ff1156b 100644
--- a/apps/wizard/utils/components.py
+++ b/apps/wizard/utils/components.py
@@ -9,7 +9,7 @@
import streamlit.components.v1 as components
from etl.config import OWID_ENV, OWIDEnv
-from etl.grapher_io import load_variable_data
+from etl.grapher_io import ensure_load_variable, load_variable_data
from etl.grapher_model import Variable
HORIZONTAL_STYLE = """