Update excel to word script to match new format

SANDAG · Nov 18, 2024 · 3d99dc6 · 3d99dc6
1 parent 4bfee26
commit 3d99dc6
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 48 deletions.
diff --git a/notebooks/make-excel-data-dictionary-from-data-model.ipynb b/notebooks/make-excel-data-dictionary-from-data-model.ipynb
@@ -98,7 +98,7 @@
     "    ]\n",
     "    for computed_var in computed_variables:\n",
     "        description = getattr(model_cls, computed_var).__doc__ or \"Computed property.\"\n",
-    "        datatype = \"Computed\"\n",
+    "        datatype = \"int\"\n",
     "        response_option = \"Computed Value\"\n",
     "        field_details[computed_var] = (datatype, response_option, description, class_name)\n",
     "\n",
@@ -112,27 +112,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "def document_all_models(module, output_file):\n",
     "    # Create a workbook and select the active worksheet\n",
     "    wb = Workbook()\n",
     "    ws = wb.active\n",
-    "    ws.title = \"Pydantic Class Documentation\"\n",
+    "    ws.title = \"Variables\"\n",
     "\n",
     "    # Add headers\n",
     "    ws.append([\"Field\", \"Data Type\", \"Response Option\", \"Description\", \"Market Segment\"])\n",
     "\n",
     "    # Create a new worksheet for enum mappings\n",
-    "    enum_ws = wb.create_sheet(title=\"Enum Mappings\")\n",
+    "    enum_ws = wb.create_sheet(title=\"Response Options (Enums)\")\n",
     "    enum_ws.append([\"Enum\", \"Codes\", \"Labels\"])\n",
     "\n",
     "    enums_set = set()\n",
     "    # Iterate over all classes in the module\n",
     "    for name, obj in inspect.getmembers(module, inspect.isclass):\n",
-    "            if name in ['Respondent']:\n",
+    "            if name in ['Respondent', 'Trip']:\n",
     "                document_pydantic_model(obj, \"All Respondents\", ws, enum_ws, enums_set)\n",
     "\n",
     "    for name, obj in inspect.getmembers(module, inspect.isclass):\n",

diff --git a/notebooks/make-word-document-from-excel-data-dictionary.ipynb b/notebooks/make-word-document-from-excel-data-dictionary.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,13 +46,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Load the Excel sheets into \n",
-    "df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Pydantic Class Documentation')\n",
-    "df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Enum Mappings')"
+    "df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Variables')\n",
+    "df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Response Options (Enums)')"
    ]
   },
   {
@@ -64,56 +64,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Create a new Word document\n",
     "doc = Document()\n",
     "doc.add_heading('Data Dictionary', 0)\n",
     "\n",
+    "market_segments = {\"All Respondents\", \"AirPassenger\", \"Employee\"}\n",
     "\n",
     "# Loop through each row in the fields DataFrame\n",
-    "for index, row in df_fields.iterrows():\n",
-    "    field_name = row['Field']\n",
-    "    data_type = row['Data Type']\n",
-    "    description = row['Description']\n",
-    "    \n",
-    "    # Skip rows where the Field (variable name) is blank\n",
-    "    if pd.isna(field_name):\n",
-    "        continue\n",
+    "for segment in market_segments:\n",
+    "    doc.add_heading(f'Market Segment: {segment}', level=2)\n",
+    "    for index, row in df_fields.iterrows():\n",
+    "        field_name = row['Field']\n",
+    "        data_type = row['Data Type']\n",
+    "        response_option = row['Response Option']\n",
+    "        description = row['Description']\n",
+    "        market_segment = row['Market Segment']\n",
+    "\n",
+    "        if market_segment == segment:\n",
+    "                doc.add_heading(f'{field_name}', level=3)\n",
+    "                doc.add_paragraph(f'Description: {description}')\n",
+    "                doc.add_paragraph(f'Data Type: {data_type}')\n",
+    "                doc.add_paragraph(f'Response Type: {response_option}')\n",
     "\n",
-    "    # Handle 'Data Model' differently\n",
-    "    if data_type == 'Data Model':\n",
-    "        doc.add_heading(f'Data Model: {field_name}', level=2)\n",
-    "        doc.add_paragraph(f'Description: {description}')\n",
-    "    else:\n",
-    "        doc.add_heading(f'{field_name}', level=3)\n",
-    "        doc.add_paragraph(f'Description: {description}')\n",
-    "        doc.add_paragraph(f'Response Type: {data_type}')\n",
-    "        \n",
-    "        # Check if the data type is not int, str, bool, or float, and handle enums\n",
-    "        if data_type not in ['int', 'str', 'bool', 'float']:\n",
-    "            doc.add_paragraph('Response Options:')\n",
-    "            \n",
-    "            # Filter the enums for the specific Data Type\n",
-    "            enum_rows = df_enums[df_enums['Enum'] == data_type]\n",
     "\n",
-    "            # Create a table for enum mappings/ response options if there are any\n",
-    "            if not enum_rows.empty:\n",
-    "                table = doc.add_table(rows=1, cols=2)\n",
-    "                table.style = 'Table Grid'\n",
+    "                # If Response Options exist\n",
+    "                if response_option != 'Actual Value':\n",
+    "                    doc.add_paragraph('Response Options:')\n",
     "\n",
-    "                # Add header row\n",
-    "                hdr_cells = table.rows[0].cells\n",
-    "                hdr_cells[0].text = 'Code'\n",
-    "                hdr_cells[1].text = 'Label'\n",
+    "                    # Filter the enums for the specific Data Type\n",
+    "                    enum_rows = df_enums[df_enums['Enum'] == response_option]\n",
     "\n",
-    "                # Add enum mappings to the table\n",
-    "                for _, enum_row in enum_rows.iterrows():\n",
-    "                    row_cells = table.add_row().cells\n",
-    "                    row_cells[0].text = str(enum_row['Codes'])\n",
-    "                    row_cells[1].text = str(enum_row['Labels'])"
+    "                    # Create a table for enum mappings/ response options if there are any\n",
+    "                    if not enum_rows.empty:\n",
+    "                        table = doc.add_table(rows=1, cols=2)\n",
+    "                        table.style = 'Table Grid'\n",
+    "\n",
+    "                        # Add header row\n",
+    "                        hdr_cells = table.rows[0].cells\n",
+    "                        hdr_cells[0].text = 'Code'\n",
+    "                        hdr_cells[1].text = 'Label'\n",
+    "\n",
+    "                        # Add enum mappings to the table\n",
+    "                        for _, enum_row in enum_rows.iterrows():\n",
+    "                            row_cells = table.add_row().cells\n",
+    "                            row_cells[0].text = str(enum_row['Codes'])\n",
+    "                            row_cells[1].text = str(enum_row['Labels'])"
    ]
   },
   {
@@ -125,7 +124,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/reports/data_dictionary.docx b/reports/data_dictionary.docx
diff --git a/reports/data_dictionary.xlsx b/reports/data_dictionary.xlsx