✨ data workflow: various enhancements (#2706)

* init staging * deprecate meta_expert * update models, cost estimates * integrate oracle into expert * reorder options * make page link wrapper more flexible * make link to expert less verbose * align wizard theme with site * only ignore secrets * remove warning * wip * fix enum * three-state review * minor fix * minor tweak * remove unused var * error if same indicator old=new * fix bug * remove exception catch, so we can debug better * logo placeholder * add confusion matrix tab * confusion matrix if categorical * lint * add reject to owidbot and chart-sync * fix * fix --------- Co-authored-by: Marigold <[email protected]>
owid · Jun 3, 2024 · bccfd00 · bccfd00
1 parent e5d860d
commit bccfd00
Show file tree

Hide file tree

Showing 27 changed files with 835 additions and 1,504 deletions.
diff --git a/.gitignore b/.gitignore
@@ -47,7 +47,7 @@ site/
 
 .wizard
 .wizardcfg/*
-.streamlit/*
+.streamlit/secrets.toml
 .ipynb_lock
 .execution_time.json
 

diff --git a/.streamlit/config.toml b/.streamlit/config.toml
@@ -0,0 +1,5 @@
+[theme]
+base="light"
+primaryColor="#ce261e"
+textColor="#333333"
+secondaryBackgroundColor="#f0f4fa"
diff --git a/apps/chart_sync/cli.py b/apps/chart_sync/cli.py
@@ -100,7 +100,7 @@ def cli(
     - Both published charts and drafts from staging are synced.
     - Existing charts (with the same slug) are added as chart revisions in target. (Revisions could be pre-approved with `--approve-revisions` flag)
     - You get a warning if the chart **_has been modified on live_** after staging server was created.
-    - If the chart is unapproved in chart-diff, you'll get a warning and Slack notification
+    - If the chart is pending in chart-diff, you'll get a warning and Slack notification
     - Deleted charts are **_not synced_**.
 
     **Considerations on chart revisions:**
@@ -196,24 +196,23 @@ def cli(
                     ### New chart-diff workflow ###
                     if chartdiff:
                         # Change has been approved, update the chart
-                        if diff.approved:
+                        if diff.is_approved:
                             log.info("chart_sync.chart_update", slug=chart_slug, chart_id=chart_id)
                             charts_synced += 1
                             if not dry_run:
                                 target_api.update_chart(chart_id, diff.source_chart.config)
 
-                        # TODO: should we add rejected state?
                         # Rejected chart diff
-                        # elif diff.rejected:
-                        #     log.info(
-                        #         "chart_sync.rejected",
-                        #         slug=chart_slug,
-                        #         chart_id=chart_id,
-                        #     )
-                        #     continue
-
-                        # Not approved, notify us about it
-                        elif diff.unapproved:
+                        elif diff.is_rejected:
+                            log.info(
+                                "chart_sync.is_rejected",
+                                slug=chart_slug,
+                                chart_id=chart_id,
+                            )
+                            continue
+
+                        # Pending chart, notify us about it
+                        elif diff.is_pending:
                             log.warning(
                                 "chart_sync.pending_chart",
                                 slug=chart_slug,
@@ -309,7 +308,7 @@ def cli(
 
                     if chartdiff:
                         # New chart has been approved
-                        if diff.approved:
+                        if diff.is_approved:
                             charts_synced += 1
                             if not dry_run:
                                 resp = target_api.create_chart(diff.source_chart.config)
@@ -322,18 +321,17 @@ def cli(
                                 slug=chart_slug,
                                 new_chart_id=resp["chartId"],
                             )
-                        # TODO: should we add rejected state?
                         # Rejected chart diff
-                        # elif diff.rejected:
-                        #     log.info(
-                        #         "chart_sync.rejected",
-                        #         slug=chart_slug,
-                        #         chart_id=chart_id,
-                        #     )
-                        #     continue
+                        elif diff.is_rejected:
+                            log.info(
+                                "chart_sync.is_rejected",
+                                slug=chart_slug,
+                                chart_id=chart_id,
+                            )
+                            continue
 
                         # Not approved, create the chart, but notify us about it
-                        elif diff.unapproved:
+                        elif diff.is_pending:
                             log.warning(
                                 "chart_sync.new_unapproved_chart",
                                 slug=chart_slug,

diff --git a/apps/owidbot/chart_diff.py b/apps/owidbot/chart_diff.py
@@ -22,13 +22,13 @@ def create_check_run(repo_name: str, branch: str, charts_df: pd.DataFrame, dry_r
 
     if charts_df.empty:
         conclusion = "neutral"
-        title = "No new or modified charts"
-    elif charts_df.approved.all():
+        title = "No charts for review"
+    elif charts_df.is_reviewed.all():
         conclusion = "success"
-        title = "All charts are approved"
+        title = "All charts are reviewed"
     else:
         conclusion = "failure"
-        title = "Some charts are not approved"
+        title = "Some charts are not reviewed"
 
     if not dry_run:
         # Create the check run and complete it in a single command
@@ -49,7 +49,7 @@ def run(branch: str, charts_df: pd.DataFrame) -> str:
 
     chart_diff = format_chart_diff(charts_df)
 
-    if charts_df.empty or charts_df.approved.all():
+    if charts_df.empty or charts_df.is_reviewed.all():
         status = "✅"
     else:
         status = "❌"
@@ -83,7 +83,10 @@ def call_chart_diff(branch: str) -> pd.DataFrame:
                 df.append(
                     {
                         "chart_id": diff.chart_id,
-                        "approved": diff.approved,
+                        "is_approved": diff.is_approved,
+                        "is_pending": diff.is_pending,
+                        "is_rejected": diff.is_rejected,
+                        "is_reviewed": diff.is_reviewed,
                         "is_new": diff.is_new,
                     }
                 )
@@ -93,14 +96,14 @@ def call_chart_diff(branch: str) -> pd.DataFrame:
 
 def format_chart_diff(df: pd.DataFrame) -> str:
     if df.empty:
-        return "No new or modified charts."
+        return "No charts for review."
 
     new = df[df.is_new]
     modified = df[~df.is_new]
 
     return f"""
 <ul>
-    <li>{len(new)} new charts ({new.approved.sum()} approved)</li>
-    <li>{len(modified)} modified charts ({modified.approved.sum()} approved)</li>
+    <li>{len(new)} new charts ({new.is_reviewed.sum()} reviewed)</li>
+    <li>{len(modified)} modified charts ({modified.is_reviewed.sum()} reviewed)</li>
 </ul>
     """.strip()
diff --git a/apps/wizard/app.py b/apps/wizard/app.py
@@ -10,6 +10,9 @@
 from apps.wizard import utils
 from apps.wizard.config import WIZARD_CONFIG
 
+# Logo
+# st.logo("docs/assets/logo.png")
+
 # Get current directory
 CURRENT_DIR = Path(__file__).parent
 # Page config

diff --git a/apps/wizard/config/config.yml b/apps/wizard/config/config.yml
@@ -32,15 +32,11 @@ main:
   expert:
       title: "Expert"
       description: "Ask the expert ETL questions!"
-      maintainer: "@lucas"
+      maintainer:
+        - "@lucas"
+        - "@daniel"
       entrypoint: pages/expert/app.py
       emoji: "🧙"
-  oracle:
-      title: "OWID Datasette Oracle"
-      description: "Get help writing SQL queries for datasette!"
-      maintainer: "@daniel"
-      entrypoint: pages/owid_datasette_oracle.py
-      emoji: "🔮"
 
 # ETL steps
 etl:

diff --git a/apps/wizard/etl_steps/express.py b/apps/wizard/etl_steps/express.py
@@ -372,7 +372,7 @@ def remove_notebook(dataset_dir):
 # TITLE
 st.title("Create step 🐆 **:gray[Express]**")
 
-st.info("Use this step to create Meadow, Garden and Grapher step for a _single dataset_! **Requires ETL expertise**.")
+st.info("Use this step to create Meadow, Garden and Grapher step for a _single dataset_!.")
 
 # SIDEBAR
 with st.sidebar:

diff --git a/apps/wizard/home.py b/apps/wizard/home.py
@@ -20,9 +20,14 @@
 """
 )
 
-with st.container(border=True):
-    st.markdown("Questions about the documentation? Ask the expert!")
-    st_page_link("expert", help="Ask the expert any documentation question!", use_container_width=True)
+
+st_page_link(
+    "expert",
+    label="Questions about ETL or Grapher? Ask the expert!",
+    help="Ask the expert any documentation question!",
+    use_container_width=True,
+    border=True,
+)
 
 # Generic tools
 ## Default styling for the cards (Wizard apps are presented as cards)

diff --git a/apps/wizard/pages/chart_diff/app.py b/apps/wizard/pages/chart_diff/app.py
@@ -8,6 +8,7 @@
 from st_pages import add_indentation
 from structlog import get_logger
 
+import etl.grapher_model as gm
 from apps.chart_sync.cli import _modified_chart_ids_by_admin
 from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
 from apps.wizard.pages.chart_diff.config_diff import st_show_diff
@@ -36,27 +37,35 @@
     initial_sidebar_state="collapsed",
 )
 add_indentation()
-st.session_state.hide_approved_charts = st.session_state.get("hide_approved_charts", False)
+st.session_state.hide_reviewed_charts = st.session_state.get("hide_reviewed_charts", False)
 
 
 ########################################
 # LOAD ENVS
 ########################################
+warn_msg = []
+
 SOURCE = OWID_ENV
 assert OWID_ENV.env_type_id != "production", "Your .env points to production DB, please use a staging environment."
 
 # Try to compare against production DB if possible, otherwise compare against staging-site-master
 if config.ENV_FILE_PROD:
     TARGET = OWIDEnv.from_env_file(config.ENV_FILE_PROD)
 else:
-    warning_msg = "ENV file doesn't connect to production DB, comparing against staging-site-master"
+    warning_msg = "ENV file doesn't connect to production DB, comparing against `staging-site-master`."
     log.warning(warning_msg)
-    st.warning(warning_msg)
+    warn_msg.append(warning_msg)
     TARGET = OWIDEnv.from_staging("master")
 
 CHART_PER_PAGE = 10
 
 
+########################################
+# WARNING MSG
+########################################
+warn_msg += ["This tool is being developed! Please report any issues you encounter in `#proj-new-data-workflow`"]
+st.warning("- " + "\n\n- ".join(warn_msg))
+
 ########################################
 # FUNCTIONS
 ########################################
@@ -94,13 +103,16 @@ def get_chart_diffs(
 def st_show(diff: ChartDiffModified, source_session, target_session=None) -> None:
     """Show the chart diff in Streamlit."""
     # Define label
-    emoji = "✅" if diff.approved else "⏳"
+    print("Showing diff, state:", diff.is_approved, diff.is_rejected, diff.is_pending)
+    emoji = "✅" if diff.is_approved else ("❌" if diff.is_rejected else "⏳")
     label = f"{emoji} {diff.source_chart.config['slug']}"
 
     # Define action for Toggle on change
-    def tgl_on_change(diff, session) -> None:
+    def chart_state_change(diff, session) -> None:
+        # print(st.session_state.chart_diffs[diff.chart_id].approval_status)
         with st.spinner():
-            diff.switch_state(session=session)
+            status = st.session_state[f"radio-{diff.chart_id}"]
+            diff.set_status(session=session, status=status)
 
     # Define action for Refresh on click
     def refresh_on_click(source_session=source_session, target_session=None):
@@ -133,7 +145,7 @@ def refresh_on_click(source_session=source_session, target_session=None):
         raise ValueError("chart_diff show have flags `is_modified = not is_new`.")
 
     # Actually show stuff
-    with st.expander(label, not diff.approved):
+    with st.expander(label, not diff.is_reviewed):
         col1, col2 = st.columns([1, 3])
 
         # Refresh
@@ -145,11 +157,20 @@ def refresh_on_click(source_session=source_session, target_session=None):
                 help="Get the latest version of the chart from the staging server.",
             )
 
-        options = ["Pending", "Approve"]
+        # Actions on chart diff: approve, pending, reject
         options = {
-            "Approve": "green",
-            "Pending": "orange",
-            # "Reject": "red",
+            gm.ChartStatus.APPROVED.value: {
+                "label": "Approve",
+                "color": "green",
+            },
+            gm.ChartStatus.REJECTED.value: {
+                "label": "Reject",
+                "color": "red",
+            },
+            gm.ChartStatus.PENDING.value: {
+                "label": "Pending",
+                "color": "gray",
+            },
         }
         option_names = list(options.keys())
         with col1:
@@ -158,12 +179,13 @@ def refresh_on_click(source_session=source_session, target_session=None):
                 key=f"radio-{diff.chart_id}",
                 options=option_names,
                 horizontal=True,
-                format_func=lambda x: f":{options.get(x)}-background[{x}]",
-                index=option_names.index("Approve") if diff.approved else option_names.index("Pending"),
-                on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session),
+                format_func=lambda x: f":{options[x]['color']}-background[{options[x]['label']}]",
+                index=option_names.index(diff.approval_status),  # type: ignore
+                on_change=lambda diff=diff, session=source_session: chart_state_change(diff, session),
                 # label_visibility="collapsed",
             )
 
+        # Show diff
         if diff.is_modified:
             tab1, tab2 = st.tabs(["Charts", "Config diff"])
             with tab1:
@@ -230,14 +252,13 @@ def get_engines() -> tuple[Engine, Engine]:
 
 def show_help_text():
     with st.popover("How does this work?"):
-        staging_name = OWID_ENV.name.upper()
         st.markdown(
             f"""
-        **Chart diff** is a living page that compares all ongoing charts between `PRODUCTION` and your `{staging_name}` environment.
+        **Chart diff** is a living page that compares all ongoing charts between [`production`](http://owid.cloud) and your [`{OWID_ENV.name}`]({OWID_ENV.admin_site}) environment.
 
-        It lists all those charts that have been modified in the `{staging_name}` environment.
+        It lists all those charts that have been modified in the `{OWID_ENV.name}` environment.
 
-        If you want any of the modified charts in `{staging_name}` to be migrated to `PRODUCTION`, you can approve them by clicking on the toggle button.
+        If you want any of the modified charts in `{OWID_ENV.name}` to be migrated to `production`, you can approve them by clicking on the toggle button.
         """
         )
 
@@ -254,7 +275,6 @@ def reject_chart_diffs(engine):
 ########################################
 def main():
     st.title("Chart ⚡ **:gray[Diff]**")
-    st.warning("This tool is being developed! Please report any issues you encounter in #proj-new-data-workflow")
     show_help_text()
 
     # Get stuff from DB
@@ -278,22 +298,22 @@ def main():
             on_click=lambda e=source_engine: reject_chart_diffs(e),
         )
 
-        def hide_approved():
+        def hide_reviewed():
             set_states(
                 {
-                    "hide_approved_charts": not st.session_state.hide_approved_charts,
+                    "hide_reviewed_charts": not st.session_state.hide_reviewed_charts,
                 }
             )
 
             # Hide approved (if option selected)
-            if st.session_state.hide_approved_charts:
+            if st.session_state.hide_reviewed_charts:
                 st.session_state.chart_diffs_filtered = {
-                    k: v for k, v in st.session_state.chart_diffs.items() if not v.approved
+                    k: v for k, v in st.session_state.chart_diffs.items() if not v.is_reviewed
                 }
             else:
                 st.session_state.chart_diffs_filtered = st.session_state.chart_diffs
 
-        st.toggle("Hide appoved charts", key="hide-approved-charts", on_change=hide_approved)
+        st.toggle("Hide reviewed charts", key="hide-reviewed-charts", on_change=hide_reviewed)
 
     # Get actual charts
     if st.session_state.chart_diffs == {}: