diff --git a/apps/wizard/app_pages/indicator_upgrade/app.py b/apps/wizard/app_pages/indicator_upgrade/app.py index 73344caef7f..b7bd5f43b21 100644 --- a/apps/wizard/app_pages/indicator_upgrade/app.py +++ b/apps/wizard/app_pages/indicator_upgrade/app.py @@ -26,7 +26,12 @@ from structlog import get_logger from apps.wizard import utils -from apps.wizard.app_pages.indicator_upgrade.charts_update import get_affected_charts_and_preview, push_new_charts +from apps.wizard.app_pages.indicator_upgrade.charts_update import ( + get_affected_charts_and_preview, + push_new_charts, + save_variable_mapping, + undo_upgrade_dialog, +) from apps.wizard.app_pages.indicator_upgrade.dataset_selection import build_dataset_form from apps.wizard.app_pages.indicator_upgrade.indicator_mapping import render_indicator_mapping from apps.wizard.app_pages.indicator_upgrade.utils import get_datasets @@ -40,7 +45,7 @@ page_title="Wizard: Indicator Upgrader", layout="wide", page_icon="🪄", - initial_sidebar_state="collapsed", + # initial_sidebar_state="collapsed", menu_items={ "Report a bug": "https://github.com/owid/etl/issues/new?assignees=marigold%2Clucasrodes&labels=wizard&projects=&template=wizard-issue---.md&title=wizard%3A+meaningful+title+for+the+issue", "About": """ @@ -148,14 +153,40 @@ if st.session_state.submitted_charts: if isinstance(charts, list) and len(charts) > 0: st.toast("Updating charts...") + + # Push charts push_new_charts(charts) + # Save variable mapping + save_variable_mapping( + indicator_mapping, + int(search_form.dataset_old_id), + int(search_form.dataset_new_id), + comments="Done with indicator-upgrader", + ) + + # Undo upgrade + st.markdown("Do you want to undo the indicator upgrade?") + st.button( + "Undo upgrade", + on_click=undo_upgrade_dialog, + icon=":material/undo:", + help="Undo all indicator upgrades", + key="btn_undo_upgrade_end", + ) + + ########################################################################################## -# 4 UPDATE CHARTS +# 5 UNDO UPGRADE # -# TODO: add description +# You may have accidentally upgraded the wrong indicators. Here you can undo the upgrade. ########################################################################################## -# if st.session_state.submitted_datasets and st.session_state.submitted_charts and st.session_state.submitted_indicators: -# if isinstance(charts, list) and len(charts) > 0: -# st.toast("Updating charts...") -# push_new_charts(charts, SCHEMA_CHART_CONFIG) +with st.sidebar: + st.markdown("### Advanced tools") + st.button( + "Undo upgrade", + on_click=undo_upgrade_dialog, + icon=":material/undo:", + help="Undo all indicator upgrades", + key="btn_undo_upgrade_sidebar", + ) diff --git a/apps/wizard/app_pages/indicator_upgrade/charts_update.py b/apps/wizard/app_pages/indicator_upgrade/charts_update.py index 5fd210f578d..b88800f4649 100644 --- a/apps/wizard/app_pages/indicator_upgrade/charts_update.py +++ b/apps/wizard/app_pages/indicator_upgrade/charts_update.py @@ -10,6 +10,7 @@ import etl.grapher_model as gm from apps.chart_sync.admin_api import AdminAPI from apps.wizard.utils import set_states, st_page_link, st_toast_error +from apps.wizard.utils.db import WizardDB from etl.config import OWID_ENV from etl.helpers import get_schema_from_url from etl.indicator_upgrade.indicator_update import find_charts_from_variable_ids, update_chart_config @@ -130,3 +131,59 @@ def push_new_charts(charts: List[gm.Chart]) -> None: "The charts were successfully updated! If indicators from other datasets also need to be upgraded, simply refresh this page, otherwise move on to `chart diff` to review all changes." ) st_page_link("chart-diff") + + +def save_variable_mapping( + indicator_mapping: Dict[int, int], dataset_id_new: int, dataset_id_old: int, comments: str = "" +) -> None: + WizardDB.add_variable_mapping( + mapping=indicator_mapping, + dataset_id_new=dataset_id_new, + dataset_id_old=dataset_id_old, + comments=comments, + ) + + +def undo_indicator_upgrade(indicator_mapping): + mapping_inverted = {v: k for k, v in indicator_mapping.items()} + with st.spinner("Undoing upgrade..."): + # Get affected charts + charts = get_affected_charts_and_preview( + mapping_inverted, + ) + + # TODO: instead of pushing new charts, we should revert the changes! + # To do this, we should have kept a copy or reference to the original revision. + # Idea: when 'push_new_charts' is called, store in a table the original revision of the chart. + push_new_charts(charts) + + # Reset variable mapping + WizardDB.delete_variable_mapping() + + +@st.dialog("Undo upgrade", width="large") +def undo_upgrade_dialog(): + mapping = WizardDB.get_variable_mapping() + + if mapping != {}: + st.markdown( + "The following table shows the indicator mapping that has been applied to the charts. Undoing means inverting this mapping." + ) + data = { + "id_old": list(mapping.keys()), + "id_new": list(mapping.values()), + } + st.dataframe(data) + st.button( + "Undo upgrade", + on_click=lambda m=mapping: undo_indicator_upgrade(m), + icon=":material/undo:", + help="Undo all indicator upgrades", + type="primary", + key="btn_undo_upgrade_2", + ) + st.warning( + "Charts will still appear in chart-diff. This is because the chart configs have actually changed (their version has beem bumped). In the future, we do not want to show these charts in chart-diff. For the time being, you should reject these chart diffs." + ) + else: + st.markdown("No indicator mapping found. Nothing to undo.") diff --git a/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py b/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py index b11324505e9..868727b6249 100644 --- a/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py +++ b/apps/wizard/app_pages/indicator_upgrade/indicator_mapping.py @@ -361,7 +361,7 @@ def __init__(self, indicator_upgrade: "IndicatorUpgrade"): @st.fragment def render(self, indicator_id_to_display, df_data=None): with st.container(border=True): - cols = [100, 10, 10] + cols = [100, 5, 10] cols = st.columns(cols, vertical_alignment="bottom") # Indicators (old, new) @@ -450,7 +450,7 @@ def _set_states_checkbox(): st.session_state[k][self.iu.key] = not st.session_state[k][self.iu.key] st.checkbox( - label="Ignore", + label="Skip", key=self.iu.key_ignore, # label_visibility="collapsed", value=self.iu.skip, diff --git a/apps/wizard/utils/cached.py b/apps/wizard/utils/cached.py index 9ae59fa6817..58d0ffbeee5 100644 --- a/apps/wizard/utils/cached.py +++ b/apps/wizard/utils/cached.py @@ -10,7 +10,7 @@ @st.cache_data def load_dataset_uris() -> List[str]: - return load_dataset_uris() + return io.load_dataset_uris() @st.cache_data @@ -19,7 +19,7 @@ def load_variables_in_dataset( _owid_env: OWIDEnv = OWID_ENV, ) -> List[Variable]: """Load Variable objects that belong to a dataset with URI `dataset_uri`.""" - return load_variables_in_dataset(dataset_uri, _owid_env) + return io.load_variables_in_dataset(dataset_uri, _owid_env) @st.cache_data diff --git a/apps/wizard/utils/db.py b/apps/wizard/utils/db.py index a10901d07b5..42167c7ce32 100644 --- a/apps/wizard/utils/db.py +++ b/apps/wizard/utils/db.py @@ -1,10 +1,16 @@ -"""Interact with database.""" +"""Interact with custom interim database. + +NOTE: We are currently migrating some of the logic to our actual MySQL database. The goal is to have a tracking of user interaction with Wizard in our main database. For now, we will be storing new tables with names 'wizard__*' in the main database. In the future, we might want to have these tables in another database in staging. + +Some of the tools here rely on a local temporary sqlite database. This database is a custom and temporary database used to store data in a server. Not intended for production use. + +""" import datetime as dt import hashlib import os import time from contextlib import contextmanager -from typing import Any, Generator, Literal, Optional, Tuple +from typing import Any, Dict, Generator, Literal, Optional, Tuple import pandas as pd import streamlit as st @@ -12,6 +18,7 @@ from sqlalchemy.orm import Session from apps.wizard.utils.paths import STREAMLIT_SECRETS, WIZARD_DB +from etl.db import get_engine, read_sql, to_sql # DB is set up DB_IS_SET_UP = STREAMLIT_SECRETS.exists() & WIZARD_DB.exists() @@ -20,6 +27,7 @@ TB_USAGE = "expert_usage" TB_PR = "pull_requests" TB_NS = "etl_news" +TB_VARMAP = "wiz__variable_mapping" # Window accepted values WND_LITERAL = Literal["1d", "7d"] @@ -152,6 +160,84 @@ def get_news_summary(cls, window_type: WND_LITERAL) -> Tuple[str, str, str] | No data[0][2], ) + @classmethod + def delete_variable_mapping(cls) -> None: + """Delete variable mapping.""" + if cls.table_exists(TB_VARMAP): + query = f"DELETE FROM {TB_VARMAP};" + engine = get_engine() + with Session(engine) as s: + s.execute(text(query)) + s.commit() + + @classmethod + def add_variable_mapping( + cls, mapping: Dict[int, int], dataset_id_old: int, dataset_id_new: int, comments: str = "" + ) -> None: + """Add a mapping to TB_VARMAP. + + This table should have columns 'id_old' (key), 'id_new' (value), 'timestamp', and 'dataset_id_old' and 'dataset_id_new'. + """ + timestamp = dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + + # Build dataframe + query_params = [ + { + "id_old": id_old, + "id_new": id_new, + "timestamp": timestamp, + "dataset_id_old": dataset_id_old, + "dataset_id_new": dataset_id_new, + "comments": comments, + } + for id_old, id_new in mapping.items() + ] + df = pd.DataFrame(query_params) + + # Insert in table + to_sql(df, TB_VARMAP, if_exists="append", index=False) + + @classmethod + def get_variable_mapping_raw(cls) -> pd.DataFrame: + """Get the mapping from TB_VARMAP.""" + if cls.table_exists(TB_VARMAP): + return read_sql(f"SELECT * FROM {TB_VARMAP};") + return pd.DataFrame() + + @classmethod + def get_variable_mapping(cls) -> Dict[int, int]: + """Get variable mapping. + + This mapping can be the result of multiple mappings. + + Example: you upgrade indicators twice, the mapping will be the result of the two mappings. + + First mapping is: 1 -> 4 and 2 -> 5 + Second mapping is: 4 -> 2 + + Then, the resulting mapping is 1 -> 2, 2 -> 5, and 4 -> 2. + + """ + df = cls.get_variable_mapping_raw() + + if df.empty: + return {} + + mapping = simplify_varmap(df) + + return mapping + + @classmethod + def table_exists(cls, tb_name: str): + """Check if table exists in the database.""" + query = """ + SELECT * + FROM information_schema.tables + WHERE table_schema = 'owid'; + """ + df = read_sql(query) + return tb_name in set(df["TABLE_NAME"]) + @contextmanager def create_session(db_name: str) -> Generator[Session, None, None]: @@ -165,3 +251,37 @@ def _prepare_query_insert(tb_name: str, fields: Tuple[Any]) -> str: values = ":" + ", :".join(fields) query = f"INSERT INTO {tb_name} {fields} VALUES ({values});" return query + + +def simplify_varmap(df): + groups = df.groupby("timestamp") + + mapping = {} + # Iterate over each 'submitted mapping' + for group in groups: + # Get mapping for a certain timestamp + mapping_ = group[1][["id_old", "id_new"]].set_index("id_old")["id_new"].to_dict() + + # Initialize the mapping + if mapping == {}: + mapping = mapping_ + continue + + # Sanity check that: there is no key in mapping_ already present in mapping + if any(k in mapping for k in mapping_): + raise ValueError( + "The variable mapping has an unexpected format. An indicator is being upgraded multiple times." + ) + + # Update the mapping sequentially + for k, v in mapping.items(): + if v in mapping_: + mapping[k] = mapping_[v] + + # Update with new mappings + mapping = mapping | mapping_ + + # Remove self-mappings + mapping_no_identical = {k: v for k, v in mapping.items() if k != v} + + return mapping_no_identical diff --git a/apps/wizard/utils/paths.py b/apps/wizard/utils/paths.py index 62cdeaa5dcc..aa4ed2d0b1e 100644 --- a/apps/wizard/utils/paths.py +++ b/apps/wizard/utils/paths.py @@ -1,14 +1,17 @@ from etl.paths import BASE_DIR +WIZARD_CFG_NAME = ".wizardcfg" +WIZARD_DB_NAME = "wizard.db" + # PATH TO WIZARD CONFIG FOLDER -WIZARD_CFG = BASE_DIR / ".wizardcfg" +WIZARD_CFG = BASE_DIR / WIZARD_CFG_NAME # PATH WIZARD DEFAULTS (old) WIZARD_VARIABLES_DEFAULTS_OLD = BASE_DIR / ".wizard" # PATH WIZARD DEFAULTS (new) WIZARD_VARIABLES_DEFAULTS = WIZARD_CFG / "defaults.json" -WIZARD_DB = WIZARD_CFG / "wizard.db" +WIZARD_DB = WIZARD_CFG / WIZARD_DB_NAME # STREAMLIT SECRETS STREAMLIT_SECRETS = BASE_DIR / ".streamlit" / "secrets.toml" diff --git a/etl/db.py b/etl/db.py index 690e0ce7524..2541f8f8472 100644 --- a/etl/db.py +++ b/etl/db.py @@ -79,3 +79,19 @@ def read_sql(sql: str, engine: Optional[Engine | Session] = None, *args, **kwarg return pd.read_sql(sql, engine.bind, *args, **kwargs) else: raise ValueError(f"Unsupported engine type {type(engine)}") + + +def to_sql(df: pd.DataFrame, name: str, engine: Optional[Engine | Session] = None, *args, **kwargs): + """Wrapper around pd.to_sql that creates a connection and closes it after reading the data. + This adds overhead, so if you need performance, reuse the same connection and cursor. + """ + engine = engine or get_engine() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + if isinstance(engine, Engine): + with engine.connect() as con: + return df.to_sql(name, con, *args, **kwargs) + elif isinstance(engine, Session): + return df.to_sql(name, engine.bind, *args, **kwargs) + else: + raise ValueError(f"Unsupported engine type {type(engine)}") diff --git a/etl/grapher_io.py b/etl/grapher_io.py index 23189454ab1..2a246f65fb8 100644 --- a/etl/grapher_io.py +++ b/etl/grapher_io.py @@ -56,7 +56,16 @@ def load_variable( id_or_path: str | int, owid_env: OWIDEnv = OWID_ENV, ) -> Variable: - """Load variable""" + """Load variable. + + If id_or_path is str, it'll be used as catalog path. + """ + if not isinstance(id_or_path, str): + try: + id_or_path = int(id_or_path) + except Exception: + pass + with Session(owid_env.engine) as session: variable = Variable.from_id_or_path( session=session,