Skip to content

Commit

Permalink
Update excel to word script to match new format
Browse files Browse the repository at this point in the history
  • Loading branch information
vivverma9 committed Nov 18, 2024
1 parent 4bfee26 commit 3d99dc6
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 48 deletions.
10 changes: 5 additions & 5 deletions notebooks/make-excel-data-dictionary-from-data-model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
" ]\n",
" for computed_var in computed_variables:\n",
" description = getattr(model_cls, computed_var).__doc__ or \"Computed property.\"\n",
" datatype = \"Computed\"\n",
" datatype = \"int\"\n",
" response_option = \"Computed Value\"\n",
" field_details[computed_var] = (datatype, response_option, description, class_name)\n",
"\n",
Expand All @@ -112,27 +112,27 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def document_all_models(module, output_file):\n",
" # Create a workbook and select the active worksheet\n",
" wb = Workbook()\n",
" ws = wb.active\n",
" ws.title = \"Pydantic Class Documentation\"\n",
" ws.title = \"Variables\"\n",
"\n",
" # Add headers\n",
" ws.append([\"Field\", \"Data Type\", \"Response Option\", \"Description\", \"Market Segment\"])\n",
"\n",
" # Create a new worksheet for enum mappings\n",
" enum_ws = wb.create_sheet(title=\"Enum Mappings\")\n",
" enum_ws = wb.create_sheet(title=\"Response Options (Enums)\")\n",
" enum_ws.append([\"Enum\", \"Codes\", \"Labels\"])\n",
"\n",
" enums_set = set()\n",
" # Iterate over all classes in the module\n",
" for name, obj in inspect.getmembers(module, inspect.isclass):\n",
" if name in ['Respondent']:\n",
" if name in ['Respondent', 'Trip']:\n",
" document_pydantic_model(obj, \"All Respondents\", ws, enum_ws, enums_set)\n",
"\n",
" for name, obj in inspect.getmembers(module, inspect.isclass):\n",
Expand Down
85 changes: 42 additions & 43 deletions notebooks/make-word-document-from-excel-data-dictionary.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -46,13 +46,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Load the Excel sheets into \n",
"df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Pydantic Class Documentation')\n",
"df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Enum Mappings')"
"df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Variables')\n",
"df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Response Options (Enums)')"
]
},
{
Expand All @@ -64,56 +64,55 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create a new Word document\n",
"doc = Document()\n",
"doc.add_heading('Data Dictionary', 0)\n",
"\n",
"market_segments = {\"All Respondents\", \"AirPassenger\", \"Employee\"}\n",
"\n",
"# Loop through each row in the fields DataFrame\n",
"for index, row in df_fields.iterrows():\n",
" field_name = row['Field']\n",
" data_type = row['Data Type']\n",
" description = row['Description']\n",
" \n",
" # Skip rows where the Field (variable name) is blank\n",
" if pd.isna(field_name):\n",
" continue\n",
"for segment in market_segments:\n",
" doc.add_heading(f'Market Segment: {segment}', level=2)\n",
" for index, row in df_fields.iterrows():\n",
" field_name = row['Field']\n",
" data_type = row['Data Type']\n",
" response_option = row['Response Option']\n",
" description = row['Description']\n",
" market_segment = row['Market Segment']\n",
"\n",
" if market_segment == segment:\n",
" doc.add_heading(f'{field_name}', level=3)\n",
" doc.add_paragraph(f'Description: {description}')\n",
" doc.add_paragraph(f'Data Type: {data_type}')\n",
" doc.add_paragraph(f'Response Type: {response_option}')\n",
"\n",
" # Handle 'Data Model' differently\n",
" if data_type == 'Data Model':\n",
" doc.add_heading(f'Data Model: {field_name}', level=2)\n",
" doc.add_paragraph(f'Description: {description}')\n",
" else:\n",
" doc.add_heading(f'{field_name}', level=3)\n",
" doc.add_paragraph(f'Description: {description}')\n",
" doc.add_paragraph(f'Response Type: {data_type}')\n",
" \n",
" # Check if the data type is not int, str, bool, or float, and handle enums\n",
" if data_type not in ['int', 'str', 'bool', 'float']:\n",
" doc.add_paragraph('Response Options:')\n",
" \n",
" # Filter the enums for the specific Data Type\n",
" enum_rows = df_enums[df_enums['Enum'] == data_type]\n",
"\n",
" # Create a table for enum mappings/ response options if there are any\n",
" if not enum_rows.empty:\n",
" table = doc.add_table(rows=1, cols=2)\n",
" table.style = 'Table Grid'\n",
" # If Response Options exist\n",
" if response_option != 'Actual Value':\n",
" doc.add_paragraph('Response Options:')\n",
"\n",
" # Add header row\n",
" hdr_cells = table.rows[0].cells\n",
" hdr_cells[0].text = 'Code'\n",
" hdr_cells[1].text = 'Label'\n",
" # Filter the enums for the specific Data Type\n",
" enum_rows = df_enums[df_enums['Enum'] == response_option]\n",
"\n",
" # Add enum mappings to the table\n",
" for _, enum_row in enum_rows.iterrows():\n",
" row_cells = table.add_row().cells\n",
" row_cells[0].text = str(enum_row['Codes'])\n",
" row_cells[1].text = str(enum_row['Labels'])"
" # Create a table for enum mappings/ response options if there are any\n",
" if not enum_rows.empty:\n",
" table = doc.add_table(rows=1, cols=2)\n",
" table.style = 'Table Grid'\n",
"\n",
" # Add header row\n",
" hdr_cells = table.rows[0].cells\n",
" hdr_cells[0].text = 'Code'\n",
" hdr_cells[1].text = 'Label'\n",
"\n",
" # Add enum mappings to the table\n",
" for _, enum_row in enum_rows.iterrows():\n",
" row_cells = table.add_row().cells\n",
" row_cells[0].text = str(enum_row['Codes'])\n",
" row_cells[1].text = str(enum_row['Labels'])"
]
},
{
Expand All @@ -125,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Binary file modified reports/data_dictionary.docx
Binary file not shown.
Binary file modified reports/data_dictionary.xlsx
Binary file not shown.

0 comments on commit 3d99dc6

Please sign in to comment.