From 7d12c30b9c3402f87883cfa3f9ef1b60082446c8 Mon Sep 17 00:00:00 2001
From: Alejandro Jaramillo <alejojaramillo96@gmail.com>
Date: Mon, 23 Oct 2023 11:03:38 +0200
Subject: [PATCH] Revert "Revert "Fix environment variables documentation
 generation (#362)""

This reverts commit 7fa4683cd04812bd9aa4dc495ae9efb0eddc6a9c.
---
 docs/docs/resources/variables/cli_env_vars.md |   1 -
 .../resources/variables/config_env_vars.md    |   1 -
 hooks/gen_docs/__init__.py                    |  17 +-
 hooks/gen_docs/gen_docs_cli_usage.py          |   2 +-
 hooks/gen_docs/gen_docs_components.py         |   4 +-
 hooks/gen_docs/gen_docs_env_vars.py           |  69 +++--
 tests/utils/resources/__init__.py             |   0
 tests/utils/resources/nested_base_settings.py |  14 +
 tests/utils/test_doc_gen.py                   | 252 ++++++++++++++++++
 9 files changed, 318 insertions(+), 42 deletions(-)
 create mode 100644 tests/utils/resources/__init__.py
 create mode 100644 tests/utils/resources/nested_base_settings.py
 create mode 100644 tests/utils/test_doc_gen.py

diff --git a/docs/docs/resources/variables/cli_env_vars.md b/docs/docs/resources/variables/cli_env_vars.md
index 66615a185..763cb936e 100644
--- a/docs/docs/resources/variables/cli_env_vars.md
+++ b/docs/docs/resources/variables/cli_env_vars.md
@@ -1,4 +1,3 @@
-
 These variables are a lower priority alternative to the commands' flags. If a variable is set, the corresponding flag does not have to be specified in commands. Variables marked as required can instead be set as flags.
 
 |         Name          |Default Value|Required|                             Description                              |
diff --git a/docs/docs/resources/variables/config_env_vars.md b/docs/docs/resources/variables/config_env_vars.md
index 889985257..2928f2ccd 100644
--- a/docs/docs/resources/variables/config_env_vars.md
+++ b/docs/docs/resources/variables/config_env_vars.md
@@ -1,4 +1,3 @@
-
 These variables are a lower priority alternative to the settings in `config.yaml`. Variables marked as required can instead be set in the pipeline config.
 
 |          Name           |Default Value|Required|                                                                                Description                                                                                 |   Setting name    |
diff --git a/hooks/gen_docs/__init__.py b/hooks/gen_docs/__init__.py
index 63b0a9366..5052e8077 100644
--- a/hooks/gen_docs/__init__.py
+++ b/hooks/gen_docs/__init__.py
@@ -1,30 +1,27 @@
 """Documentation generation."""
 
-from collections.abc import Generator
+from collections.abc import Iterator
 from enum import Enum
-from typing import Any, Generic, TypeVar
 
-_T = TypeVar("_T")
 
+class IterableStrEnum(str, Enum):
+    """Polyfill that also introduces dict-like behavior
 
-class SuperEnum(Generic[_T], Enum):
-    """Adds constructors that return all items in a ``Generator``.
-
-    Introduces constructors that return a ``Generator`` object
+    Introduces constructors that return a ``Iterator`` object
     either containing all items, only their names or their values.
     """
 
     @classmethod
-    def items(cls) -> Generator[tuple[_T, Any], None, None]:
+    def items(cls) -> Iterator[tuple[str, str]]:
         """Return all item names and values in tuples."""
         return ((e.name, e.value) for e in cls)
 
     @classmethod
-    def keys(cls) -> Generator[_T, None, None]:
+    def keys(cls) -> Iterator[str]:
         """Return all item names."""
         return (e.name for e in cls)
 
     @classmethod
-    def values(cls) -> Generator[Any, None, None]:
+    def values(cls) -> Iterator[str]:
         """Return all item values."""
         return (e.value for e in cls)
diff --git a/hooks/gen_docs/gen_docs_cli_usage.py b/hooks/gen_docs/gen_docs_cli_usage.py
index f03c4fd62..469274745 100644
--- a/hooks/gen_docs/gen_docs_cli_usage.py
+++ b/hooks/gen_docs/gen_docs_cli_usage.py
@@ -21,7 +21,7 @@
         "--output",
         str(PATH_CLI_COMMANDS_DOC),
     ]
-    subprocess.run(typer_args)
+    subprocess.run(typer_args, check=True, capture_output=True)
 
     # Replace wrong title in CLI Usage doc
     with PATH_CLI_COMMANDS_DOC.open("r") as f:
diff --git a/hooks/gen_docs/gen_docs_components.py b/hooks/gen_docs/gen_docs_components.py
index 6c0c28693..45ca61ae1 100644
--- a/hooks/gen_docs/gen_docs_components.py
+++ b/hooks/gen_docs/gen_docs_components.py
@@ -212,9 +212,9 @@ def get_sections(component_name: str, *, exist_changes: bool) -> KpopsComponent:
         ]
         component_sections_not_inherited: list[
             str
-        ] = DEFAULTS_PIPELINE_COMPONENT_DEPENDENCIES[  # type: ignore [reportGeneralTypeIssues]
+        ] = DEFAULTS_PIPELINE_COMPONENT_DEPENDENCIES[
             component_file_name
-        ]
+        ]  # type: ignore [reportGeneralTypeIssues]
     return KpopsComponent(component_sections, component_sections_not_inherited)
 
 
diff --git a/hooks/gen_docs/gen_docs_env_vars.py b/hooks/gen_docs/gen_docs_env_vars.py
index 436ba19de..ac88b82b6 100644
--- a/hooks/gen_docs/gen_docs_env_vars.py
+++ b/hooks/gen_docs/gen_docs_env_vars.py
@@ -2,12 +2,14 @@
 
 import csv
 import shutil
-from collections.abc import Callable
+from collections.abc import Callable, Iterator
 from dataclasses import dataclass
 from pathlib import Path
 from textwrap import fill
 from typing import Any
 
+from pydantic import BaseSettings
+from pydantic.fields import ModelField
 from pytablewriter import MarkdownTableWriter
 from typer.models import ArgumentInfo, OptionInfo
 
@@ -17,7 +19,7 @@
     from typing_extensions import Self
 
 from hooks import PATH_ROOT
-from hooks.gen_docs import SuperEnum
+from hooks.gen_docs import IterableStrEnum
 from kpops.cli import main
 from kpops.cli.pipeline_config import PipelineConfig
 
@@ -90,7 +92,7 @@ def from_record(cls, record: dict[str, Any]) -> Self:
         )
 
 
-class EnvVarAttrs(str, SuperEnum):
+class EnvVarAttrs(IterableStrEnum):
     """The attr names are used as columns for the markdown tables."""
 
     NAME = "Name"
@@ -103,7 +105,7 @@ class EnvVarAttrs(str, SuperEnum):
 def csv_append_env_var(
     file: Path,
     name: str,
-    default_value,
+    default_value: Any,
     description: str | list[str] | None,
     *args,
 ) -> None:
@@ -218,7 +220,7 @@ def write_csv_to_md_file(
     target: Path,
     title: str | None,
     description: str | None = None,
-    heading: str = "###",
+    heading: str | None = "###",
 ) -> None:
     """Write csv data from a file into a markdown file.
 
@@ -227,11 +229,15 @@ def write_csv_to_md_file(
     :param title: Title for the table, optional
 
     """
+    if heading:
+        heading += " "
+    else:
+        heading = ""
     with target.open("w+") as f:
         if title:
-            f.write(f"{heading} {title}\n")
+            f.write(f"{heading}{title}\n\n")
         if description:
-            f.write(f"\n{description}\n\n")
+            f.write(f"{description}\n\n")
         writer = MarkdownTableWriter()
         with source.open("r", newline="") as source_contents:
             writer.from_csv(source_contents.read())
@@ -239,7 +245,7 @@ def write_csv_to_md_file(
         writer.dump(output=f)
 
 
-def __fill_csv_pipeline_config(target: Path) -> None:
+def fill_csv_pipeline_config(target: Path) -> None:
     """Append all ``PipelineConfig``-related env vars to a ``.csv`` file.
 
     Finds all ``PipelineConfig``-related env vars and appends them to
@@ -248,29 +254,38 @@ def __fill_csv_pipeline_config(target: Path) -> None:
     :param target: The path to the `.csv` file. Note that it must already
         contain the column names
     """
-    # NOTE: This does not see nested fields, hence if there are env vars in a class like
-    # TopicConfig(), they wil not be listed. Possible fix with recursion.
-    config_fields = PipelineConfig.__fields__
-    for config_field in config_fields.values():
-        config_field_info = PipelineConfig.Config.get_field_info(config_field.name)
-        config_field_description: str = (
-            config_field.field_info.description
+    for field in collect_fields(PipelineConfig):
+        field_info = PipelineConfig.Config.get_field_info(field.name)
+        field_description: str = (
+            field.field_info.description
             or "No description available, please refer to the pipeline config documentation."
         )
-        config_field_default = None or config_field.field_info.default
-        if config_env_var := config_field_info.get(
+        field_default = field.field_info.default
+        if config_env_var := field_info.get(
             "env",
-        ) or config_field.field_info.extra.get("env"):
+        ) or field.field_info.extra.get("env"):
             csv_append_env_var(
                 target,
                 config_env_var,
-                config_field_default,
-                config_field_description,
-                config_field.name,
+                field_default,
+                field_description,
+                field.name,
             )
 
 
-def __fill_csv_cli(target: Path) -> None:
+def collect_fields(settings: type[BaseSettings]) -> Iterator[ModelField]:
+    """Collect and yield all fields in a settings class.
+
+    :param model: settings class
+    :yield: all settings including nested ones in settings classes
+    """
+    for field in settings.__fields__.values():
+        if issubclass(field_type := field.type_, BaseSettings):
+            yield from collect_fields(field_type)
+        yield field
+
+
+def fill_csv_cli(target: Path) -> None:
     """Append all CLI-commands-related env vars to a ``.csv`` file.
 
     Finds all CLI-commands-related env vars and appends them to a ``.csv``
@@ -282,7 +297,7 @@ def __fill_csv_cli(target: Path) -> None:
         var_in_main = getattr(main, var_in_main_name)
         if (
             not var_in_main_name.startswith("__")
-            and isinstance(var_in_main, (OptionInfo, ArgumentInfo))
+            and isinstance(var_in_main, OptionInfo | ArgumentInfo)
             and var_in_main.envvar
         ):
             cli_env_var_description: list[str] = [
@@ -315,7 +330,7 @@ def gen_vars(
     csv_file: Path,
     title_dotenv_file: str,
     description_dotenv_file: str,
-    columns: list,
+    columns: list[str],
     description_md_file: str,
     variable_extraction_function: Callable[[Path], None],
 ) -> None:
@@ -330,7 +345,7 @@ def gen_vars(
     :param description_dotenv_file: The description to be written in the dotenv file
     :param columns: The column names in the table
     :param description_md_file: The description to be written in the markdown file
-    :param variable_extraction_function: Function that ooks for variables and appends
+    :param variable_extraction_function: Function that looks for variables and appends
         them to the temp csv file.
     """
     # Overwrite/create the temp csv file
@@ -373,7 +388,7 @@ def gen_vars(
         + DESCRIPTION_CONFIG_ENV_VARS,
         columns=list(EnvVarAttrs.values()),
         description_md_file=DESCRIPTION_CONFIG_ENV_VARS,
-        variable_extraction_function=__fill_csv_pipeline_config,
+        variable_extraction_function=fill_csv_pipeline_config,
     )
     # Find all cli-related env variables, write them into a file
     gen_vars(
@@ -385,5 +400,5 @@ def gen_vars(
         + DESCRIPTION_CLI_ENV_VARS,
         columns=list(EnvVarAttrs.values())[:-1],
         description_md_file=DESCRIPTION_CLI_ENV_VARS,
-        variable_extraction_function=__fill_csv_cli,
+        variable_extraction_function=fill_csv_cli,
     )
diff --git a/tests/utils/resources/__init__.py b/tests/utils/resources/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/utils/resources/nested_base_settings.py b/tests/utils/resources/nested_base_settings.py
new file mode 100644
index 000000000..f7f92358a
--- /dev/null
+++ b/tests/utils/resources/nested_base_settings.py
@@ -0,0 +1,14 @@
+from pydantic import BaseSettings, Field
+
+
+class NestedSettings(BaseSettings):
+    attr: str = Field("attr")
+
+
+class ParentSettings(BaseSettings):
+    not_nested_field: str = Field("not_nested_field")
+    nested_field: NestedSettings = Field(...)
+    field_with_env_defined: str = Field(
+        default=...,
+        env="FIELD_WITH_ENV_DEFINED",
+    )
diff --git a/tests/utils/test_doc_gen.py b/tests/utils/test_doc_gen.py
new file mode 100644
index 000000000..d234bd79d
--- /dev/null
+++ b/tests/utils/test_doc_gen.py
@@ -0,0 +1,252 @@
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from hooks.gen_docs.gen_docs_env_vars import (
+    EnvVarAttrs,
+    append_csv_to_dotenv_file,
+    collect_fields,
+    csv_append_env_var,
+    write_csv_to_md_file,
+    write_title_to_dotenv_file,
+)
+from tests.utils.resources.nested_base_settings import ParentSettings
+
+
+class TestEnvDocGen:
+    def test_collect_fields(self):
+        expected: list[Any] = [
+            "not_nested_field",
+            "attr",
+            Ellipsis,
+            Ellipsis,
+        ]
+        actual = [field.field_info.default for field in collect_fields(ParentSettings)]
+        assert actual == expected
+
+    @pytest.mark.parametrize(
+        ("var_name", "default_value", "description", "extra_args", "expected_outcome"),
+        [
+            pytest.param(
+                "var_name",
+                "default_value",
+                "description",
+                (),
+                "var_name,default_value,False,description",
+                id="String desc",
+            ),
+            pytest.param(
+                "var_name",
+                "default_value",
+                ["description", " description"],
+                (),
+                "var_name,default_value,False,description description",
+                id="List desc",
+            ),
+            pytest.param(
+                "var_name",
+                "default_value",
+                "description",
+                ("extra arg 1", "extra arg 2"),
+                "var_name,default_value,False,description,extra arg 1,extra arg 2",
+                id="Extra args",
+            ),
+            pytest.param(
+                "var_name",
+                "default_value",
+                None,
+                (),
+                "var_name,default_value,False,",
+                id="No desc",
+            ),
+        ],
+    )
+    def test_csv_append_env_var(
+        self,
+        tmp_path: Path,
+        var_name: str,
+        default_value: Any,
+        description: str | list[str],
+        extra_args: tuple,
+        expected_outcome: str,
+    ):
+        target = tmp_path / "target.csv"
+        csv_append_env_var(target, var_name, default_value, description, *extra_args)
+        with target.open() as t:
+            assert (
+                t.read().replace("\r\n", "\n").replace("\r", "\n")
+                == expected_outcome + "\n"
+            )
+
+    def test_write_title_to_dotenv_file(self, tmp_path: Path):
+        target = tmp_path / "target.ENV"
+        write_title_to_dotenv_file(target, "title", "description of length 72" * 3)
+        with target.open() as t:
+            assert t.read().replace("\r\n", "\n").replace("\r", "\n") == (
+                "# title\n"
+                "#\n"
+                "# "
+                "description of length 72description of length 72description of"
+                "\n"
+                "# length 72\n#\n"
+            )
+
+    @pytest.mark.parametrize(
+        ("var_name", "default", "required", "description", "setting_name", "expected"),
+        [
+            pytest.param(
+                "NAME",
+                "default",
+                "True",
+                "description",
+                "setting_name",
+                "# setting_name\n# description\nNAME=default\n",
+                id="default exists, required",
+            ),
+            pytest.param(
+                "NAME",
+                "",
+                "True",
+                "description",
+                "setting_name",
+                "# setting_name\n"
+                "# description\n"
+                "NAME # No default value, required\n",
+                id="default not exists, required",
+            ),
+            pytest.param(
+                "NAME",
+                "default",
+                "False",
+                "description",
+                "setting_name",
+                "# setting_name\n# description\nNAME=default\n",
+                id="default exists, not required",
+            ),
+            pytest.param(
+                "NAME",
+                "",
+                "False",
+                "description",
+                "setting_name",
+                "# setting_name\n"
+                "# description\n"
+                "NAME # No default value, not required\n",
+                id="default not exists, not required",
+            ),
+            pytest.param(
+                "NAME",
+                "default",
+                "True",
+                "description",
+                "",
+                "# description\nNAME=default\n",
+                id="no setting name",
+            ),
+        ],
+    )
+    def test_append_csv_to_dotenv_file(
+        self,
+        tmp_path: Path,
+        var_name: str,
+        default: Any,
+        required: str,
+        description: str | list[str],
+        setting_name: str,
+        expected: str,
+    ):
+        source = tmp_path / "source.csv"
+        target = tmp_path / "target.env"
+        csv_record = [var_name, default, required, description]
+        csv_column_names = [
+            EnvVarAttrs.NAME,
+            EnvVarAttrs.DEFAULT_VALUE,
+            EnvVarAttrs.REQUIRED,
+            EnvVarAttrs.DESCRIPTION,
+        ]
+        with source.open("w+", newline="") as f:
+            if setting_name is not None:
+                csv_record.append(setting_name)
+                csv_column_names.append(EnvVarAttrs.CORRESPONDING_SETTING_NAME)
+            f.write(",".join(csv_column_names) + "\n")
+            f.write(",".join(csv_record))
+        append_csv_to_dotenv_file(source, target)
+        with target.open("r", newline="") as f:
+            assert f.read().replace("\r\n", "\n").replace("\r", "\n") == expected
+
+    @pytest.mark.parametrize(
+        ("title", "description", "heading", "expected"),
+        [
+            pytest.param(
+                "title",
+                "description",
+                "###",
+                "### title\n\ndescription\n\n",
+                id="all provided, default heading",
+            ),
+            pytest.param(
+                "title",
+                "description",
+                "##",
+                "## title\n\ndescription\n\n",
+                id="all provided, different heading",
+            ),
+            pytest.param(
+                "title",
+                "description",
+                "",
+                "title\n\ndescription\n\n",
+                id="all provided, heading empty str",
+            ),
+            pytest.param(
+                "title",
+                "description",
+                None,
+                "title\n\ndescription\n\n",
+                id="all provided, heading is None",
+            ),
+            pytest.param(
+                None,
+                "description",
+                "###",
+                "description\n\n",
+                id="no title",
+            ),
+            pytest.param(
+                "title",
+                None,
+                "###",
+                "### title\n\n",
+                id="no description",
+            ),
+        ],
+    )
+    def test_write_csv_to_md_file(
+        self,
+        tmp_path: Path,
+        title: str,
+        description: str,
+        heading: str,
+        expected: str,
+    ):
+        source = tmp_path / "source.csv"
+        target = tmp_path / "target.env"
+        csv_record = ["NAME", "default", "True", "description", "setting_name"]
+        csv_column_names = [
+            EnvVarAttrs.NAME,
+            EnvVarAttrs.DEFAULT_VALUE,
+            EnvVarAttrs.REQUIRED,
+            EnvVarAttrs.DESCRIPTION,
+            EnvVarAttrs.CORRESPONDING_SETTING_NAME,
+        ]
+        with source.open("w+", newline="") as f:
+            f.write(",".join(csv_column_names) + "\n")
+            f.write(",".join(csv_record))
+        write_csv_to_md_file(source, target, title, description, heading)
+        with target.open("r", newline="") as f:
+            assert f.read().replace("\r\n", "\n").replace("\r", "\n") == expected + str(
+                "|Name|Default Value|Required|Description|Setting name|\n"
+                "|----|-------------|--------|-----------|------------|\n"
+                "|NAME|default      |True    |description|setting_name|\n",
+            )