Skip to content

Commit

Permalink
Merge pull request #1373 from Sage-Bionetworks/develop-FDS-258-mypy-v…
Browse files Browse the repository at this point in the history
…isualization

FDS-258 mypy visualization module
  • Loading branch information
andrewelamb authored Feb 23, 2024
2 parents f802ce6 + 8720383 commit b4a6525
Show file tree
Hide file tree
Showing 9 changed files with 343 additions and 260 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ jobs:
# add here when checked
# poetry run mypy --install-types --non-interactive
# add here when enforced
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py schematic/visualization
#----------------------------------------------
# linting
Expand Down
66 changes: 34 additions & 32 deletions schematic/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@
# pylint: disable=line-too-long
#!/usr/bin/env python3

from typing import get_args
from schematic.utils.schema_utils import DisplayLabelType
from schematic.visualization.tangled_tree import FigureType, TextType


DATA_MODEL_LABELS_DICT = {
"display_label": "use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label.",
"class_label": "default, use standard class or property label.",
}
# Ensure that all DisplayLabelTypes have a description
assert sorted(DATA_MODEL_LABELS_DICT.keys()) == sorted(get_args(DisplayLabelType))

# Combine each label and its description into one string
DATA_MODEL_LABELS_LIST = [
f"{label}, {description}" for label, description in DATA_MODEL_LABELS_DICT.items()
]

DATA_MODEL_LABELS_HELP = (
"Choose how to set the label in the data model. "
f"{' '.join(DATA_MODEL_LABELS_LIST)} "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
)


FIGURE_TYPES = " or ".join([f"'{item}'" for item in get_args(FigureType)])
TEXT_TYPES = " or ".join([f"'{item}'" for item in get_args(TextType)])

# `schematic manifest` related sub-commands description
manifest_commands = {
"manifest": {
Expand Down Expand Up @@ -54,12 +81,7 @@
"Specify to alphabetize valid attribute values either ascending (a) or descending (d)."
"Optional"
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
"data_model_labels": DATA_MODEL_LABELS_HELP,
},
"migrate": {
"short_help": (
Expand Down Expand Up @@ -139,12 +161,7 @@
"class_label, display_label, display_name, default, class_label. When true annotations and table columns will be uploaded with the display name formatting with blacklisted characters removed. "
"To use for tables, use in conjunction with the use_schema_label flag."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
"data_model_labels": DATA_MODEL_LABELS_HELP,
},
"validate": {
"short_help": ("Validation of manifest files."),
Expand All @@ -170,12 +187,7 @@
"project_scope": (
"Specify a comma-separated list of projects to search through for cross manifest validation."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
"data_model_labels": DATA_MODEL_LABELS_HELP,
},
}
}
Expand All @@ -191,12 +203,7 @@
"output_jsonld": (
"Path to where the generated JSON-LD file needs to be outputted."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
"data_model_labels": DATA_MODEL_LABELS_HELP,
}
}
}
Expand All @@ -219,17 +226,12 @@
),
"tangled_tree": {
"figure_type": (
"Specify the type of schema visualization to make. Either 'dependency' or 'component'."
f"Specify the type of schema visualization to make. Either {FIGURE_TYPES}."
),
"text_format": (
"Specify the type of text to gather for tangled tree visualization, either 'plain' or 'highlighted'."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
f"Specify the type of text to gather for tangled tree visualization, either {TEXT_TYPES}."
),
"data_model_labels": DATA_MODEL_LABELS_HELP,
},
}
}
5 changes: 3 additions & 2 deletions schematic/manifest/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import logging
from pathlib import Path
import sys
from typing import List
from typing import get_args, List
import click
import click_log

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.manifest.generator import ManifestGenerator

from schematic.utils.schema_utils import DisplayLabelType
from schematic.utils.cli_utils import log_value_from_config, query_dict, parse_syn_ids
from schematic.utils.google_api_utils import export_manifest_csv
from schematic.help import manifest_commands
Expand Down Expand Up @@ -109,7 +110,7 @@ def manifest(ctx, config): # use as `schematic manifest ...`
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
help=query_dict(manifest_commands, ("manifest", "get", "data_model_labels")),
)
@click.pass_obj
Expand Down
4 changes: 3 additions & 1 deletion schematic/models/commands.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3

from typing import get_args
from gc import callbacks
import logging
import sys
Expand All @@ -20,6 +21,7 @@
from schematic.help import model_commands
from schematic.exceptions import MissingConfigValueError
from schematic.configuration.configuration import CONFIG
from schematic.utils.schema_utils import DisplayLabelType

logger = logging.getLogger("schematic")
click_log.basic_config(logger)
Expand Down Expand Up @@ -112,7 +114,7 @@ def model(ctx, config): # use as `schematic model ...`
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
help=query_dict(model_commands, ("model", "submit", "data_model_labels")),
)
@click.option(
Expand Down
4 changes: 3 additions & 1 deletion schematic/schemas/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import sys
import time
import re
from typing import get_args

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.schemas.data_model_validator import DataModelValidator
from schematic.schemas.data_model_jsonld import DataModelJsonLD, convert_graph_to_jsonld

from schematic.utils.schema_utils import DisplayLabelType
from schematic.utils.cli_utils import query_dict
from schematic.utils.schema_utils import export_schema
from schematic.help import schema_commands
Expand Down Expand Up @@ -45,7 +47,7 @@ def schema(): # use as `schematic model ...`
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
help=query_dict(schema_commands, ("schema", "convert", "data_model_labels")),
)
@click.option(
Expand Down
71 changes: 40 additions & 31 deletions schematic/visualization/attributes_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import json
import logging
import os
from typing import Optional, no_type_check

import numpy as np
import pandas as pd

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.schemas.data_model_json_schema import DataModelJSONSchema
from schematic.utils.schema_utils import DisplayLabelType
from schematic.utils.io_utils import load_json

logger = logging.getLogger(__name__)
Expand All @@ -20,7 +22,7 @@ class AttributesExplorer:
def __init__(
self,
path_to_jsonld: str,
data_model_labels: str,
data_model_labels: DisplayLabelType,
) -> None:
self.path_to_jsonld = path_to_jsonld

Expand Down Expand Up @@ -66,7 +68,7 @@ def create_output_path(self, terminal_folder: str) -> str:
os.makedirs(output_path)
return output_path

def convert_string_cols_to_json(
def _convert_string_cols_to_json(
self, dataframe: pd.DataFrame, cols_to_modify: list[str]
) -> pd.DataFrame:
"""Converts values in a column from strings to JSON list
Expand All @@ -81,17 +83,16 @@ def convert_string_cols_to_json(
)
return dataframe

def parse_attributes(self, save_file: bool = True) -> pd.DataFrame:
def parse_attributes(self, save_file: bool = True) -> Optional[str]:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
False: merged_df is returned.
Args:
save_file (bool, optional):
True: merged_df is saved locally to output_path.
False: merged_df is returned as a string
Defaults to True.
Returns:
merged_df (pd.DataFrame): dataframe containing data relating to attributes
for the provided data model for all components in the data model.
Dataframe is saved locally as a csv if save_file == True, or returned if
save_file == False.
Optional[str]: if save_file=False, the dataframe as a string, otherwise None
"""
# get all components
Expand All @@ -102,44 +103,51 @@ def parse_attributes(self, save_file: bool = True) -> pd.DataFrame:
# have to provide.
return self._parse_attributes(components, save_file)

def parse_component_attributes(
self, component=None, save_file: bool = True, include_index: bool = True
) -> pd.DataFrame:
def _parse_component_attributes(
self,
component: Optional[str] = None,
save_file: bool = True,
include_index: bool = True,
) -> Optional[str]:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
False: merged_df is returned.
include_index (bool):
Whether to include the index in the returned dataframe (True) or not (False)
Args:
component (Optional[str], optional): A component. Defaults to None.
save_file (bool, optional):
True: merged_df is saved locally to output_path.
False: merged_df is returned as a string
Defaults to True.
include_index (bool, optional):
Whether to include the index in the returned dataframe (True) or not (False)
Defaults to True.
Raises:
ValueError: If Component is None
Returns:
merged_df (pd.DataFrame): dataframe containing data relating to attributes
for the provided data model for the specified component in the data model.
Dataframe is saved locally as a csv if save_file == True, or returned if
save_file == False.
Optional[str]: if save_file=False, the dataframe as a string, otherwise None
"""

if not component:
raise ValueError("You must provide a component to visualize.")
return self._parse_attributes([component], save_file, include_index)

@no_type_check
def _parse_attributes(
self, components: list, save_file=True, include_index=True
) -> pd.DataFrame:
self, components: list[str], save_file: bool = True, include_index: bool = True
) -> Optional[str]:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
False: merged_df is returned.
components (list):
components (list[str]):
list of components to parse attributes for
include_index (bool):
Whether to include the index in the returned dataframe (True) or not (False)
Returns:
merged_df (pd.DataFrame): dataframe containing data relating to attributes
for the provided data model for specified components in the data model.
Dataframe is saved locally as a csv if save_file == True, or returned if
save_file == False.
Optional[str]:
if save_file=False, the dataframe as a string, otherwise None
Raises:
ValueError:
If unable hits an error while attempting to get conditional requirements.
Expand All @@ -150,6 +158,7 @@ def _parse_attributes(
# pylint: disable=too-many-nested-blocks
# pylint: disable=too-many-branches
# pylint: disable=too-many-statements
# type

# For each data type to be loaded gather all attributes the user would
# have to provide.
Expand Down Expand Up @@ -264,7 +273,7 @@ def _parse_attributes(
]
cols = [col for col in cols if col in data_dict_df.columns]
data_dict_df = data_dict_df[cols]
data_dict_df = self.convert_string_cols_to_json(
data_dict_df = self._convert_string_cols_to_json(
data_dict_df, ["Valid Values"]
)
df_store.append(data_dict_df)
Expand Down
Loading

0 comments on commit b4a6525

Please sign in to comment.