Merge pull request #1373 from Sage-Bionetworks/develop-FDS-258-mypy-v…

…isualization FDS-258 mypy visualization module
Sage-Bionetworks · Feb 23, 2024 · b4a6525 · b4a6525
2 parents f802ce6 + 8720383
commit b4a6525
Show file tree

Hide file tree

Showing 9 changed files with 343 additions and 260 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -101,7 +101,7 @@ jobs:
           # add here when checked
           # poetry run mypy --install-types --non-interactive
           # add here when enforced
-          poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py
+          poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py schematic/visualization
 
       #----------------------------------------------
       #             linting

diff --git a/schematic/help.py b/schematic/help.py
@@ -2,6 +2,33 @@
 # pylint: disable=line-too-long
 #!/usr/bin/env python3
 
+from typing import get_args
+from schematic.utils.schema_utils import DisplayLabelType
+from schematic.visualization.tangled_tree import FigureType, TextType
+
+
+DATA_MODEL_LABELS_DICT = {
+    "display_label": "use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label.",
+    "class_label": "default, use standard class or property label.",
+}
+# Ensure that all DisplayLabelTypes have a description
+assert sorted(DATA_MODEL_LABELS_DICT.keys()) == sorted(get_args(DisplayLabelType))
+
+# Combine each label and its description into one string
+DATA_MODEL_LABELS_LIST = [
+    f"{label}, {description}" for label, description in DATA_MODEL_LABELS_DICT.items()
+]
+
+DATA_MODEL_LABELS_HELP = (
+    "Choose how to set the label in the data model. "
+    f"{' '.join(DATA_MODEL_LABELS_LIST)} "
+    "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
+)
+
+
+FIGURE_TYPES = " or ".join([f"'{item}'" for item in get_args(FigureType)])
+TEXT_TYPES = " or ".join([f"'{item}'" for item in get_args(TextType)])
+
 # `schematic manifest` related sub-commands description
 manifest_commands = {
     "manifest": {
@@ -54,12 +81,7 @@
                 "Specify to alphabetize valid attribute values either ascending (a) or descending (d)."
                 "Optional"
             ),
-            "data_model_labels": (
-                "Choose how to set the label in the data model. "
-                "display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
-                "class_label, default, use standard class or property label. "
-                "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
-            ),
+            "data_model_labels": DATA_MODEL_LABELS_HELP,
         },
         "migrate": {
             "short_help": (
@@ -139,12 +161,7 @@
                 "class_label, display_label, display_name, default, class_label. When true annotations and table columns will be uploaded with the display name formatting with blacklisted characters removed. "
                 "To use for tables, use in conjunction with the use_schema_label flag."
             ),
-            "data_model_labels": (
-                "Choose how to set the label in the data model. "
-                "display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
-                "class_label, default, use standard class or property label. "
-                "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
-            ),
+            "data_model_labels": DATA_MODEL_LABELS_HELP,
         },
         "validate": {
             "short_help": ("Validation of manifest files."),
@@ -170,12 +187,7 @@
             "project_scope": (
                 "Specify a comma-separated list of projects to search through for cross manifest validation."
             ),
-            "data_model_labels": (
-                "Choose how to set the label in the data model. "
-                "display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
-                "class_label, default, use standard class or property label. "
-                "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
-            ),
+            "data_model_labels": DATA_MODEL_LABELS_HELP,
         },
     }
 }
@@ -191,12 +203,7 @@
             "output_jsonld": (
                 "Path to where the generated JSON-LD file needs to be outputted."
             ),
-            "data_model_labels": (
-                "Choose how to set the label in the data model. "
-                "display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
-                "class_label, default, use standard class or property label. "
-                "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
-            ),
+            "data_model_labels": DATA_MODEL_LABELS_HELP,
         }
     }
 }
@@ -219,17 +226,12 @@
         ),
         "tangled_tree": {
             "figure_type": (
-                "Specify the type of schema visualization to make. Either 'dependency' or 'component'."
+                f"Specify the type of schema visualization to make. Either {FIGURE_TYPES}."
             ),
             "text_format": (
-                "Specify the type of text to gather for tangled tree visualization, either 'plain' or 'highlighted'."
-            ),
-            "data_model_labels": (
-                "Choose how to set the label in the data model. "
-                "display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
-                "class_label, default, use standard class or property label. "
-                "Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
+                f"Specify the type of text to gather for tangled tree visualization, either {TEXT_TYPES}."
             ),
+            "data_model_labels": DATA_MODEL_LABELS_HELP,
         },
     }
 }
diff --git a/schematic/manifest/commands.py b/schematic/manifest/commands.py
@@ -2,14 +2,15 @@
 import logging
 from pathlib import Path
 import sys
-from typing import List
+from typing import get_args, List
 import click
 import click_log
 
 from schematic.schemas.data_model_parser import DataModelParser
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
 from schematic.manifest.generator import ManifestGenerator
 
+from schematic.utils.schema_utils import DisplayLabelType
 from schematic.utils.cli_utils import log_value_from_config, query_dict, parse_syn_ids
 from schematic.utils.google_api_utils import export_manifest_csv
 from schematic.help import manifest_commands
@@ -109,7 +110,7 @@ def manifest(ctx, config):  # use as `schematic manifest ...`
     "--data_model_labels",
     "-dml",
     default="class_label",
-    type=click.Choice(["display_label", "class_label"], case_sensitive=True),
+    type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
     help=query_dict(manifest_commands, ("manifest", "get", "data_model_labels")),
 )
 @click.pass_obj

diff --git a/schematic/models/commands.py b/schematic/models/commands.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+from typing import get_args
 from gc import callbacks
 import logging
 import sys
@@ -20,6 +21,7 @@
 from schematic.help import model_commands
 from schematic.exceptions import MissingConfigValueError
 from schematic.configuration.configuration import CONFIG
+from schematic.utils.schema_utils import DisplayLabelType
 
 logger = logging.getLogger("schematic")
 click_log.basic_config(logger)
@@ -112,7 +114,7 @@ def model(ctx, config):  # use as `schematic model ...`
     "--data_model_labels",
     "-dml",
     default="class_label",
-    type=click.Choice(["display_label", "class_label"], case_sensitive=True),
+    type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
     help=query_dict(model_commands, ("model", "submit", "data_model_labels")),
 )
 @click.option(

diff --git a/schematic/schemas/commands.py b/schematic/schemas/commands.py
@@ -6,12 +6,14 @@
 import sys
 import time
 import re
+from typing import get_args
 
 from schematic.schemas.data_model_parser import DataModelParser
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
 from schematic.schemas.data_model_validator import DataModelValidator
 from schematic.schemas.data_model_jsonld import DataModelJsonLD, convert_graph_to_jsonld
 
+from schematic.utils.schema_utils import DisplayLabelType
 from schematic.utils.cli_utils import query_dict
 from schematic.utils.schema_utils import export_schema
 from schematic.help import schema_commands
@@ -45,7 +47,7 @@ def schema():  # use as `schematic model ...`
     "--data_model_labels",
     "-dml",
     default="class_label",
-    type=click.Choice(["display_label", "class_label"], case_sensitive=True),
+    type=click.Choice(list(get_args(DisplayLabelType)), case_sensitive=True),
     help=query_dict(schema_commands, ("schema", "convert", "data_model_labels")),
 )
 @click.option(

diff --git a/schematic/visualization/attributes_explorer.py b/schematic/visualization/attributes_explorer.py
@@ -2,13 +2,15 @@
 import json
 import logging
 import os
+from typing import Optional, no_type_check
 
 import numpy as np
 import pandas as pd
 
 from schematic.schemas.data_model_parser import DataModelParser
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
 from schematic.schemas.data_model_json_schema import DataModelJSONSchema
+from schematic.utils.schema_utils import DisplayLabelType
 from schematic.utils.io_utils import load_json
 
 logger = logging.getLogger(__name__)
@@ -20,7 +22,7 @@ class AttributesExplorer:
     def __init__(
         self,
         path_to_jsonld: str,
-        data_model_labels: str,
+        data_model_labels: DisplayLabelType,
     ) -> None:
         self.path_to_jsonld = path_to_jsonld
 
@@ -66,7 +68,7 @@ def create_output_path(self, terminal_folder: str) -> str:
             os.makedirs(output_path)
         return output_path
 
-    def convert_string_cols_to_json(
+    def _convert_string_cols_to_json(
         self, dataframe: pd.DataFrame, cols_to_modify: list[str]
     ) -> pd.DataFrame:
         """Converts values in a column from strings to JSON list
@@ -81,17 +83,16 @@ def convert_string_cols_to_json(
                 )
         return dataframe
 
-    def parse_attributes(self, save_file: bool = True) -> pd.DataFrame:
+    def parse_attributes(self, save_file: bool = True) -> Optional[str]:
         """
-        Args: save_file (bool):
-                True: merged_df is saved locally to output_path.
-                False: merged_df is returned.
+        Args:
+            save_file (bool, optional):
+              True: merged_df is saved locally to output_path.
+              False: merged_df is returned as a string
+              Defaults to True.
 
         Returns:
-            merged_df (pd.DataFrame): dataframe containing data relating to attributes
-                for the provided data model for all components in the data model.
-                Dataframe is saved locally as a csv if save_file == True, or returned if
-                save_file == False.
+            Optional[str]: if save_file=False, the dataframe as a string, otherwise None
 
         """
         # get all components
@@ -102,44 +103,51 @@ def parse_attributes(self, save_file: bool = True) -> pd.DataFrame:
         # have to provide.
         return self._parse_attributes(components, save_file)
 
-    def parse_component_attributes(
-        self, component=None, save_file: bool = True, include_index: bool = True
-    ) -> pd.DataFrame:
+    def _parse_component_attributes(
+        self,
+        component: Optional[str] = None,
+        save_file: bool = True,
+        include_index: bool = True,
+    ) -> Optional[str]:
         """
-        Args: save_file (bool):
-                True: merged_df is saved locally to output_path.
-                False: merged_df is returned.
-              include_index (bool):
-                Whether to include the index in the returned dataframe (True) or not (False)
+
+        Args:
+            component (Optional[str], optional): A component. Defaults to None.
+            save_file (bool, optional):
+              True: merged_df is saved locally to output_path.
+              False: merged_df is returned as a string
+              Defaults to True.
+            include_index (bool, optional):
+              Whether to include the index in the returned dataframe (True) or not (False)
+              Defaults to True.
+
+        Raises:
+            ValueError: If Component is None
 
         Returns:
-            merged_df (pd.DataFrame): dataframe containing data relating to attributes
-                for the provided data model for the specified component in the data model.
-                Dataframe is saved locally as a csv if save_file == True, or returned if
-                save_file == False.
+            Optional[str]: if save_file=False, the dataframe as a string, otherwise None
         """
-
         if not component:
             raise ValueError("You must provide a component to visualize.")
         return self._parse_attributes([component], save_file, include_index)
 
+    @no_type_check
     def _parse_attributes(
-        self, components: list, save_file=True, include_index=True
-    ) -> pd.DataFrame:
+        self, components: list[str], save_file: bool = True, include_index: bool = True
+    ) -> Optional[str]:
         """
         Args: save_file (bool):
                 True: merged_df is saved locally to output_path.
                 False: merged_df is returned.
-              components (list):
+              components (list[str]):
                 list of components to parse attributes for
               include_index (bool):
                 Whether to include the index in the returned dataframe (True) or not (False)
 
         Returns:
-            merged_df (pd.DataFrame): dataframe containing data relating to attributes
-                for the provided data model for specified components in the data model.
-                Dataframe is saved locally as a csv if save_file == True, or returned if
-                save_file == False.
+            Optional[str]:
+              if save_file=False, the dataframe as a string, otherwise None
+
         Raises:
             ValueError:
                 If unable hits an error while attempting to get conditional requirements.
@@ -150,6 +158,7 @@ def _parse_attributes(
         # pylint: disable=too-many-nested-blocks
         # pylint: disable=too-many-branches
         # pylint: disable=too-many-statements
+        # type
 
         # For each data type to be loaded gather all attributes the user would
         # have to provide.
@@ -264,7 +273,7 @@ def _parse_attributes(
             ]
             cols = [col for col in cols if col in data_dict_df.columns]
             data_dict_df = data_dict_df[cols]
-            data_dict_df = self.convert_string_cols_to_json(
+            data_dict_df = self._convert_string_cols_to_json(
                 data_dict_df, ["Valid Values"]
             )
             df_store.append(data_dict_df)