From 370948f8e3bd9476637d00e0cf4e4121bc27ef8c Mon Sep 17 00:00:00 2001 From: Krishna Gopal Date: Tue, 19 Dec 2023 12:39:18 -0600 Subject: [PATCH 1/2] feat: update column stats ui, add sort by usage to columns (#1389) * feat: update column stats ui, add sort by usage to columns --- querybook/server/datasources/metastore.py | 42 +++++----- querybook/server/logic/metastore.py | 33 ++++++++ .../DataTableStats/DataTableColumnStats.tsx | 31 ++++--- .../DataTableView/DataTableView.tsx | 1 - .../DataTableColumnCard.tsx | 35 ++++---- .../DataTableViewColumn.tsx | 81 +++++++++++++++---- querybook/webapp/resource/table.ts | 2 + 7 files changed, 153 insertions(+), 72 deletions(-) diff --git a/querybook/server/datasources/metastore.py b/querybook/server/datasources/metastore.py index 706ee396d..7cde405c4 100644 --- a/querybook/server/datasources/metastore.py +++ b/querybook/server/datasources/metastore.py @@ -1,35 +1,29 @@ from typing import Tuple, Union -from flask_login import current_user from app.auth.permission import ( - verify_query_engine_environment_permission, - verify_environment_permission, - verify_metastore_permission, + verify_data_column_permission, verify_data_schema_permission, verify_data_table_permission, - verify_data_column_permission, + verify_environment_permission, + verify_metastore_permission, + verify_query_engine_environment_permission, ) +from app.datasource import admin_only, api_assert, register, with_impression from app.db import DBSession -from app.datasource import register, api_assert, with_impression, admin_only from app.flask_app import cache, limiter +from const.datasources import RESOURCE_NOT_FOUND_STATUS_CODE from const.impression import ImpressionItemType from const.metastore import DataTableWarningSeverity, MetadataType from const.time import seconds_in_a_day -from const.datasources import RESOURCE_NOT_FOUND_STATUS_CODE +from flask_login import current_user from lib.lineage.utils import lineage -from lib.metastore.utils import DataTableFinder from lib.metastore import get_metastore_loader +from lib.metastore.utils import DataTableFinder from lib.query_analysis.samples import make_samples_query from lib.utils import mysql_cache -from logic import metastore as logic from logic import admin as admin_logic -from logic import data_element as data_element_logic -from logic import tag as tag_logic -from models.metastore import ( - DataTableWarning, - DataTableStatistics, - DataTableColumnStatistics, -) +from logic import metastore as logic +from models.metastore import DataTableStatistics, DataTableWarning from tasks.run_sample_query import run_sample_query @@ -269,6 +263,13 @@ def get_columns_from_table(table_id): return logic.get_column_by_table_id(table_id, session=session) +@register("/table//detailed_column/", methods=["GET"]) +def get_detailed_columns_from_table(table_id): + with DBSession() as session: + verify_data_table_permission(table_id, session=session) + return logic.get_detailed_columns_dict_by_table_id(table_id, session=session) + + @register("/table//raw_samples_query/", methods=["GET"]) def get_table_samples_raw_query( table_id, @@ -380,14 +381,7 @@ def get_column_by_table(table_id, column_name, with_table=False): def get_column(column_id, with_table=False): column = logic.get_column_by_id(column_id) verify_data_table_permission(column.table_id) - column_dict = column.to_dict(with_table) - - column_dict["stats"] = DataTableColumnStatistics.get_all(column_id=column_id) - column_dict["tags"] = tag_logic.get_tags_by_column_id(column_id=column_id) - column_dict[ - "data_element_association" - ] = data_element_logic.get_data_element_association_by_column_id(column_id) - return column_dict + return logic.get_detailed_column_dict(column, with_table=with_table) @register("/column//", methods=["PUT"]) diff --git a/querybook/server/logic/metastore.py b/querybook/server/logic/metastore.py index 4f1546422..8f787f2be 100644 --- a/querybook/server/logic/metastore.py +++ b/querybook/server/logic/metastore.py @@ -5,6 +5,7 @@ from const.metastore import DataOwner, DataTableWarningSeverity from lib.logger import get_logger from lib.sqlalchemy import update_model_fields +from logic import data_element as data_element_logic from logic.user import get_user_by_name from models.admin import QueryEngineEnvironment from models.metastore import ( @@ -500,6 +501,38 @@ def get_column_by_table_id(table_id, session=None): ) +@with_session +def get_detailed_column_dict(column: DataTableColumn, with_table=False, session=None): + from logic import tag as tag_logic + + column_dict = column.to_dict(with_table) + column_dict["stats"] = DataTableColumnStatistics.get_all( + column_id=column.id, session=session + ) + column_dict["tags"] = tag_logic.get_tags_by_column_id( + column_id=column.id, session=session + ) + column_dict[ + "data_element_association" + ] = data_element_logic.get_data_element_association_by_column_id( + column.id, session=session + ) + return column_dict + + +@with_session +def get_detailed_columns_dict_by_table_id(table_id, session=None): + data_table_columns = ( + session.query(DataTableColumn) + .filter(DataTableColumn.table_id == table_id) + .all() + ) + columns_info = [] + for col in data_table_columns: + columns_info.append(get_detailed_column_dict(col, session=session)) + return columns_info + + @with_session def get_all_column_name_by_table_id(table_id, session=None): return ( diff --git a/querybook/webapp/components/DataTableStats/DataTableColumnStats.tsx b/querybook/webapp/components/DataTableStats/DataTableColumnStats.tsx index 63cbeab79..9fd5739db 100644 --- a/querybook/webapp/components/DataTableStats/DataTableColumnStats.tsx +++ b/querybook/webapp/components/DataTableStats/DataTableColumnStats.tsx @@ -1,10 +1,9 @@ import * as React from 'react'; import { ITableColumnStats } from 'const/metastore'; -import { isNumeric } from 'lib/utils/number'; -import { KeyContentDisplay } from 'ui/KeyContentDisplay/KeyContentDisplay'; -import { TableStats } from './DataTableStatsCommon'; +import { Tag, TagGroup } from 'ui/Tag/Tag'; +import { KeyContentDisplay } from 'ui/KeyContentDisplay/KeyContentDisplay'; interface IProps { stats: ITableColumnStats[]; @@ -13,15 +12,21 @@ interface IProps { export const DataTableColumnStats: React.FunctionComponent = ({ stats, }) => { - const statsDOM = (stats || []).map((tableColumnStat) => ( - - - - )); + const statsDOM = (stats || []).map((stat, i) => { + const formattedValue = Array.isArray(stat.value) + ? stat.value.join(', ') + : stat.value; + return ( + + {stat.key} + {formattedValue} + + ); + }); - return
{statsDOM}
; + return ( +
+ {statsDOM} +
+ ); }; diff --git a/querybook/webapp/components/DataTableView/DataTableView.tsx b/querybook/webapp/components/DataTableView/DataTableView.tsx index 0d471fd5b..40d33d810 100644 --- a/querybook/webapp/components/DataTableView/DataTableView.tsx +++ b/querybook/webapp/components/DataTableView/DataTableView.tsx @@ -281,7 +281,6 @@ export const DataTableView: React.FC = ({ tableId }) => { const makeColumnsDOM = (numberOfRows = null) => ( Promise>; updateDataColumnDescription: ( columnId: number, @@ -33,27 +32,25 @@ export const DataTableColumnCard: React.FunctionComponent = ({ onEditColumnDescriptionRedirect, updateDataColumnDescription, }) => { - const { data: detailedColumn } = useResource( - React.useCallback(() => TableColumnResource.get(column.id), [column.id]) - ); const parsedType = useMemo(() => parseType('', column.type), [column.type]); - const tagsDOM = (detailedColumn?.tags || []).map((tag) => ( + const tagsDOM = (column?.tags || []).map((tag) => ( )); + const columnDescription = convertRawToContentState( + column.description as string + ); const descriptionContent = (
- {detailedColumn?.data_element_association && - !(column.description as ContentState).hasText() && ( + {column?.data_element_association && + !columnDescription.hasText() && ( )} = ({
)} - {detailedColumn?.data_element_association && ( + {column?.data_element_association && ( )} @@ -103,7 +98,9 @@ export const DataTableColumnCard: React.FunctionComponent = ({ {descriptionContent} - + {!!column?.stats?.length && ( + + )} diff --git a/querybook/webapp/components/DataTableViewColumn/DataTableViewColumn.tsx b/querybook/webapp/components/DataTableViewColumn/DataTableViewColumn.tsx index 202b0f679..2c77c7132 100644 --- a/querybook/webapp/components/DataTableViewColumn/DataTableViewColumn.tsx +++ b/querybook/webapp/components/DataTableViewColumn/DataTableViewColumn.tsx @@ -1,18 +1,18 @@ import { ContentState } from 'draft-js'; import React from 'react'; -import { IDataColumn, IDataTable } from 'const/metastore'; +import { IDataTable } from 'const/metastore'; +import { useResource } from 'hooks/useResource'; import { Nullable } from 'lib/typescript'; +import { TableResource } from 'resource/table'; import { Loading } from 'ui/Loading/Loading'; import { SearchBar } from 'ui/SearchBar/SearchBar'; import { OrderByButton } from 'ui/OrderByButton/OrderByButton'; - import { DataTableColumnCard } from './DataTableColumnCard'; import './DataTableViewColumn.scss'; export interface IDataTableViewColumnProps { table: IDataTable; - tableColumns: IDataColumn[]; numberOfRows: number; updateDataColumnDescription: ( columnId: number, @@ -21,20 +21,50 @@ export interface IDataTableViewColumnProps { onEditColumnDescriptionRedirect?: Nullable<() => Promise>; } +type ColumnOrderBy = 'Default' | 'Aa' | 'Usage'; + +const COLUMN_STATS_USAGE_KEY = 'usage'; + export const DataTableViewColumn: React.FunctionComponent< IDataTableViewColumnProps > = ({ updateDataColumnDescription, table = null, - tableColumns = [], numberOfRows = null, onEditColumnDescriptionRedirect, }) => { const [filterString, setFilterString] = React.useState(''); - const [orderColumnsBy, setOrdeColumnsBy] = React.useState(true); - const [orderBoardBy, setOrderBoardBy] = React.useState(false); + const [orderColumnsBy, setOrderColumnsBy] = + React.useState('Aa'); + const [orderColumnsByAsc, setOrderColumnsByAsc] = React.useState(false); + const { data: tableColumns } = useResource( + React.useCallback( + () => TableResource.getColumnDetails(table.id), + [table.id] + ) + ); + const usageByColumnId = React.useMemo( + () => + tableColumns?.reduce((acc, column) => { + acc[column.id] = + column.stats?.find( + (stat) => stat.key === COLUMN_STATS_USAGE_KEY + )?.value ?? 0; + return acc; + }, {}), + [tableColumns] + ); + const canSortByUsage = React.useMemo( + () => + !!usageByColumnId && + Object.values(usageByColumnId).some((usage) => usage !== 0), + [usageByColumnId] + ); const filteredColumns = React.useMemo(() => { + if (!tableColumns) { + return []; + } const filteredCols = tableColumns.filter((column) => !!filterString ? column.name.toLowerCase().includes(filterString.toLowerCase()) @@ -43,34 +73,55 @@ export const DataTableViewColumn: React.FunctionComponent< if (numberOfRows != null) { filteredCols.splice(numberOfRows); } - if (orderBoardBy) { + if (orderColumnsBy === 'Aa') { filteredCols.sort( (a, b) => (a.name.toLowerCase() > b.name.toLowerCase() ? 1 : -1) * - (orderColumnsBy ? 1 : -1) + (orderColumnsByAsc ? 1 : -1) + ); + } else if (orderColumnsBy === 'Usage') { + filteredCols.sort( + (a, b) => + (usageByColumnId[a.id] > usageByColumnId[b.id] ? 1 : -1) * + (orderColumnsByAsc ? 1 : -1) ); } return filteredCols; }, [ tableColumns, - filterString, numberOfRows, orderColumnsBy, - orderBoardBy, + filterString, + orderColumnsByAsc, + usageByColumnId, ]); + const onOrderByFieldToggle = React.useCallback(() => { + setOrderColumnsBy((v) => { + if (v === 'Aa') { + v = canSortByUsage ? 'Usage' : 'Default'; + } else if (v === 'Usage') { + v = 'Default'; + } else { + // v = 'Default' + v = 'Aa'; + } + return v; + }); + }, [canSortByUsage]); + if (!table || !tableColumns) { return ; } const sortButton = ( setOrdeColumnsBy((v) => !v)} + asc={orderColumnsByAsc} + hideAscToggle={orderColumnsBy === 'Default'} + onAscToggle={() => setOrderColumnsByAsc((v) => !v)} orderByField="name" - orderByFieldSymbol={orderBoardBy ? 'Aa' : 'Default'} - onOrderByFieldToggle={() => setOrderBoardBy((v) => !v)} + orderByFieldSymbol={orderColumnsBy} + onOrderByFieldToggle={onOrderByFieldToggle} /> ); diff --git a/querybook/webapp/resource/table.ts b/querybook/webapp/resource/table.ts index 8d10bcd75..89073496e 100644 --- a/querybook/webapp/resource/table.ts +++ b/querybook/webapp/resource/table.ts @@ -131,6 +131,8 @@ export const TableResource = { ds.fetch(`/table_name/${schemaName}/${tableName}/`, { metastore_id: metastoreId, }), + getColumnDetails: (tableId: number) => + ds.fetch(`/table/${tableId}/detailed_column/`), getMetastoreLink: (tableId: number, metadataType: MetadataType) => ds.fetch(`/table/${tableId}/metastore_link/`, { From 900d2d671e4b358954587db0a7b6e509c5198183 Mon Sep 17 00:00:00 2001 From: Julian Rojas Valvo Date: Wed, 3 Jan 2024 14:34:02 -0500 Subject: [PATCH 2/2] feat: fixed up clean_up_archived_data_doc and added the option to pass in -1 for run_all_db_clean_up_jobs (#1387) --- querybook/server/tasks/db_clean_up_jobs.py | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/querybook/server/tasks/db_clean_up_jobs.py b/querybook/server/tasks/db_clean_up_jobs.py index a700f9d22..b646d9465 100644 --- a/querybook/server/tasks/db_clean_up_jobs.py +++ b/querybook/server/tasks/db_clean_up_jobs.py @@ -24,17 +24,24 @@ def run_all_db_clean_up_jobs( days_to_keep_event_logs=7, ): with DBSession() as session: - clean_up_task_run_record(days_to_keep=days_to_keep_task_record, session=session) - clean_up_query_execution( - days_to_keep_done=days_to_keep_query_exec_done, - days_to_keep_else=days_to_keep_query_exec_else, - session=session, - ) - clean_up_impression(days_to_keep=days_to_keep_impression, session=session) - clean_up_archived_data_doc( - days_to_keep=days_to_keep_archived_data_doc, session=session - ) - clean_up_event_logs(days_to_keep=days_to_keep_event_logs, session=session) + if days_to_keep_task_record != -1: + clean_up_task_run_record( + days_to_keep=days_to_keep_task_record, session=session + ) + if days_to_keep_query_exec_done != -1 and days_to_keep_query_exec_else != -1: + clean_up_query_execution( + days_to_keep_done=days_to_keep_query_exec_done, + days_to_keep_else=days_to_keep_query_exec_else, + session=session, + ) + if days_to_keep_impression != -1: + clean_up_impression(days_to_keep=days_to_keep_impression, session=session) + if days_to_keep_archived_data_doc != -1: + clean_up_archived_data_doc( + days_to_keep=days_to_keep_archived_data_doc, session=session + ) + if days_to_keep_event_logs != -1: + clean_up_event_logs(days_to_keep=days_to_keep_event_logs, session=session) @with_session @@ -90,7 +97,7 @@ def clean_up_impression(days_to_keep=30, session=None): def clean_up_archived_data_doc(days_to_keep=60, session=None): last_day = datetime.now() - timedelta(days_to_keep) - session.query(DataDoc).filter(archived=False).filter( + session.query(DataDoc).filter(DataDoc.archived).filter( DataDoc.updated_at < last_day ).delete(synchronize_session=False) session.commit()