(feat):added missing docstrings and made improvements to all package …

…modules
erik-ingwersen-ey · Aug 9, 2024 · 57756cf · 57756cf
1 parent e22c627
commit 57756cf
Show file tree

Hide file tree

Showing 164 changed files with 27,620 additions and 779,239 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
@@ -0,0 +1,40 @@
+# .github/workflows/deploy-docs.yml
+
+name: Deploy Documentation to GitHub Pages
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout the repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -e .
+        pip install sphinx sphinx-rtd-theme  # Add other Sphinx extensions here
+
+    - name: Build documentation
+      run: |
+        sphinx-build -b html docs/source docs/docs/html
+
+    - name: Deploy to GitHub Pages
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: docs/build/html
+        keep_files: true
diff --git a/.github/workflows/test-code.yml b/.github/workflows/test-code.yml
@@ -0,0 +1,41 @@
+name: CI Test Pipeline
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+
+jobs:
+  test:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -e .
+
+    - name: Run tests
+      run: |
+        pytest --cov=.
+
+    - name: Upload coverage report
+      uses: actions/upload-artifact@v3
+      with:
+        name: coverage-report
+        path: ./htmlcov/
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+*.json
+/.idea
diff --git a/Dockerfile b/Dockerfile
@@ -1,17 +1,38 @@
-# Python image to use.
-FROM python:3.12-alpine
+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
 
-# Set the working directory to /app
+# Set the working directory
 WORKDIR /app
 
-# copy the requirements file used for dependencies
-COPY requirements.txt .
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Copy the credentials file into the container
+COPY ./iowa-sales-forecast-service-account.json /gcloud/application_default_credentials.json
+
+# Update pip
+RUN pip install --upgrade pip
 
 # Install any needed packages specified in requirements.txt
-RUN pip install --trusted-host pypi.python.org -r requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install the iowa_forecast package to the container.
+RUN pip install -e .
+
+# Make port 8080 available to the world outside this container
+EXPOSE 8080
+
+# Define environment variables
+
+# Project ID where the tables and models will be saved to inside BigQuery
+ENV PROJECT_ID "iowa-liquor-sales-forecast-v2"
+
+# Dataset name where the tables and models will be stored to
+ENV DATASET_NAME "bqmlforecast"
+
+# Set the environment variable to point to the credentials file
+ENV GOOGLE_APPLICATION_CREDENTIALS="/gcloud/application_default_credentials.json"
 
-# Copy the rest of the working directory contents into the container at /app
-COPY . .
 
 # Run app.py when the container launches
-ENTRYPOINT ["python", "app.py"]
+ENTRYPOINT ["python", "pipelines/execute_load_data.py"]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,95 @@
+.PHONY: clean clean-test clean-pyc clean-build docs help
+.DEFAULT_GOAL := help
+
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+
+from urllib.request import pathname2url
+
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+	if match:
+		target, help = match.groups()
+		print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+help:
+	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+# remove all build, test, coverage and Python artifacts
+clean: clean-build clean-pyc clean-test
+
+clean-build: # remove build artifacts
+	rm -fr build/
+	rm -fr dist/
+	rm -fr .eggs/
+	find . -name '*.egg-info' -exec rm -fr {} +
+	find . -name '*.egg' -exec rm -f {} +
+
+clean-pyc: # remove Python file artifacts
+	find . -name '*.DS_Store' -exec rm -f {} +
+	find . -name '*.pyc' -exec rm -f {} +
+	find . -name '*.pyo' -exec rm -f {} +
+	find . -name '*~' -exec rm -f {} +
+	find . -name '__pycache__' -exec rm -fr {} +
+	find . -name '.ipynb_checkpoints' -exec rm -fr {} +
+
+clean-test: # remove test and coverage artifacts
+	rm -fr .tox/
+	rm -f .coverage
+	rm -fr htmlcov/
+	rm -fr .pytest_cache
+
+lint: ##check style with flake8
+	flake8 iowa_forecast tests
+
+test: # run tests quickly with the default Python
+	pytest
+
+test-all: # run tests on every Python version with tox
+	tox
+
+coverage: # check code coverage quickly with the default Python
+	coverage run --source iowa_forecast -m pytest
+	coverage report -m
+	coverage html
+	$(BROWSER) htmlcov/index.html
+
+docs: # generate Sphinx HTML documentation, including API docs
+	rm -f docs/iowa_forecast.rst
+	rm -f docs/modules.rst
+	$(sphinx-apidoc) -o docs/iowa_forecast
+	$(MAKE) -C docs clean
+	$(MAKE) -C docs html
+	$(BROWSER) docs/html/index.html
+
+servedocs: docs # compile the docs watching for changes
+	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
+
+install:  # install the package to the active Python's site-packages
+	pip install --upgrade pip
+	pip install -r requirements.txt
+	pip install .
+	make set-env
+
+install-dev: # install the package to the active Python's using development mode
+	pip install --upgrade pip
+	pip install -r requirements.txt
+	pip install -e .
+	make set-env
+
+#install:
+#	pip install -r ./requirements.txt
+
+update-requirements:
+	poetry export --without-hashes --ansi -o ./requirements.txt
diff --git a/README.md b/README.md
@@ -1,63 +1,103 @@
-# Cloud Run Hello World with Cloud Code
+# Iowa Liquor Sales Forecast
 
-"Hello World" is a [Cloud Run](https://cloud.google.com/run/docs) application that renders a simple webpage.
+This repository contains the functions created to generate a sales forecasting
+model that predicts sales based on the historical data of liquor purchases from
+the state of Iowa.
 
-For details on how to use this sample as a template in Cloud Code, read the documentation for Cloud Code for [VS Code](https://cloud.google.com/code/docs/vscode/quickstart-cloud-run?utm_source=ext&utm_medium=partner&utm_campaign=CDR_kri_gcp_cloudcodereadmes_012521&utm_content=-) or [IntelliJ](https://cloud.google.com/code/docs/intellij/quickstart-cloud-run?utm_source=ext&utm_medium=partner&utm_campaign=CDR_kri_gcp_cloudcodereadmes_012521&utm_content=-).
+The created model consists of a multivariate ARIMA model that includes
+relevant features such as moving averages of key columns from the dataset, 
+lag columns and weather forecast information.
 
-### Table of Contents
-* [Getting Started with VS Code](#getting-started-with-vs-code)
-* [Getting Started with IntelliJ](#getting-started-with-intellij)
-* [Sign up for User Research](#sign-up-for-user-research)
+All data used to train the model was obtained from the library
+of `BigQuery` public datasets.
 
----
-## Getting Started with VS Code
+All the datasets and models created are stored inside BigQuery. Therefore, 
+to run this solution and generate the sales forecasts, you need to register an
+account in Google Cloud. Then you have to create a new project, enable the 
+BigQuery service to your account and configure your credentials.
 
-### Run the application locally with the Cloud Run Emulator
-1. Click on the Cloud Code status bar and select 'Run on Cloud Run Emulator'.
-![image](./img/status-bar.png)
+## Forecast Results
 
-2. Use the Cloud Run Emulator dialog to specify your [builder option](https://cloud.google.com/code/docs/vscode/deploying-a-cloud-run-app#deploying_a_cloud_run_service). Cloud Code supports `Docker`, `Jib`, and `Buildpacks`.
-   See the skaffold documentation on [builders](https://skaffold.dev/docs/pipeline-stages/builders/)
-   for more information about build artifact types.  
-![image](./img/build-config.png)
+A report with the latest forecast results can be found at:
+[Iowa Liquor Sales Forecast Report](https://lookerstudio.google.com/reporting/df348e6b-5d25-47bd-ae51-d7d40906a73b)
 
-3. Click `Run`. Cloud Code begins building your image.
+## Code Walkthrough
 
-4. View the build progress in the _OUTPUT_ window.
-   Once the build has finished, click on the URL in the _OUTPUT_ window to view
-   your live application.  
-![image](./img/cloud-run-url.png)
+You can find a step-by-step walkthrough of the entire solution, including 
+the data extraction, feature engineering, and transformation, model training
+and evaluation, as well as forecasting future sales at:
+[notebooks/Walkthrough.ipynb](./notebooks/Walkthrough.ipynb)
 
-5. To stop the application, click the stop icon on the Debug Toolbar.
+## Pipelines
 
----
+The [pipelines](./pipelines) folder contains scripts that can be used as
+entrypoints to perform several tasks related to the solution.
 
-## Getting Started with IntelliJ
+## Additional Information
 
-### Run the application locally with the Cloud Run Emulator
+### Docker Container
 
-#### Define run configuration
+The [Dockerfile](./Dockerfile) defines the Docker container configuration to
+replicate the environment used to develop and run the forecasting model.
+By using this Docker container, you can ensure that the code runs consistently
+across different environments. 
 
-1. Click the Run/Debug configurations dropdown on the top taskbar and select 'Edit Configurations'.  
-![image](./img/edit-config.png)
+To build and run the Docker container, you can use the following commands:
 
-2. Select 'Cloud Run: Run Locally' and specify your [builder option](https://cloud.google.com/code/docs/intellij/developing-a-cloud-run-app#defining_your_run_configuration). Cloud Code supports Docker, Jib, and Buildpacks. See the skaffold documentation on [builders](https://skaffold.dev/docs/pipeline-stages/builders/) for more information about build artifact types.  
-![image](./img/local-build-config.png)
+* **Build the Docker image:**
+
+  ```bash
+  docker build -t iowa-liquor-sales-forecast .
+  ```
 
-#### Run the application
-1. Click the Run/Debug configurations dropdown and select 'Cloud Run: Run Locally'. Click the run icon.  
-![image](./img/config-run-locally.png)
+* **Run the Docker container:**
+
+  ```bash
+  docker run -it --rm iowa-liquor-sales-forecast
+  ```
 
-2. View the build process in the output window. Once the build has finished, you will receive a notification from the Event Log. Click 'View' to access the local URLs for your deployed services.  
-![image](./img/local-success.png)
+### Environment Variables
 
----
-## Sign up for User Research
+The solution relies on a few environment variables that need to be set up for proper operation.
+These include:
 
-We want to hear your feedback!
+- `GOOGLE_APPLICATION_CREDENTIALS`: Path to the JSON file that contains your Google Cloud service account credentials.
+- `PROJECT_ID`: The ID of your Google Cloud project.
+- `DATASET_ID`: The ID of the BigQuery dataset where the data is stored.
 
-The Cloud Code team is inviting our user community to sign-up to participate in Google User Experience Research. 
+You can set these environment variables in your shell or define them in a `.env` file,
+which will be automatically loaded when running the Docker container or scripts.
 
-If you’re invited to join a study, you may try out a new product or tell us what you think about the products you use every day. At this time, Google is only sending invitations for upcoming remote studies. Once a study is complete, you’ll receive a token of thanks for your participation such as a gift card or some Google swag. 
+### Testing
+
+The repository includes a suite of tests to ensure that the code behaves as expected.
+You can run the tests using `pytest`:
+
+```bash
+# Run tests
+pytest tests/
+```
+
+### Continuous Integration
+
+This repository is set up with a Continuous Integration (CI) pipeline using GitHub Actions.
+The CI pipeline is configured to run the tests automatically whenever code is pushed to the
+repository or a pull request is created. This helps to ensure that new changes do not break existing
+functionality.
+
+
+### License
+
+This project is licensed under the MIT License. See the [LICENSE](./LICENSE) file for more details.
+
+
+### Codebase Static Test Results
+
+The `iowa_forecast` package received the following pylint scores:
+
+* `iowa_forecast/ml_train.py`: 10.0
+* `iowa_forecast/plots.py`: 9.8
+* `iowa_forecast/utils.py`: 9.65
+* `iowa_forecast/load_data.py`: 9.28
+* `iowa_forecast/ml_eval.py`: 8.41
 
-[Sign up using this link](https://google.qualtrics.com/jfe/form/SV_4Me7SiMewdvVYhL?reserved=1&utm_source=In-product&Q_Language=en&utm_medium=own_prd&utm_campaign=Q1&productTag=clou&campaignDate=January2021&referral_code=UXbT481079) and answer a few questions about yourself, as this will help our research team match you to studies that are a great fit.
-Original file line number
+Diff line change
@@ Expand Up / @@ -158,3 +158,5 @@ cython_debug/ @@
     #  and can be added to the global gitignore or merged into this file.  For a more nuclear
     #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
     #.idea/
+    *.json
+    /.idea