eugene · Madu01 · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,41 @@
+name: CI
+
+on:
+  pull_request:
+    branches: [master]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          pip3 install --upgrade pip
+          pip3 install pytest
+          pip3 install -r datasets/requirements.txt
+
+      - name: Install build   
+        run: python3 -m pip install --upgrade build
+
+      - name: Build a binary wheel and a source tarball
+        run: >-
+          python -m
+          build
+          --sdist
+          --wheel
+          --outdir dist/
+
+      - name: Test with pytest
+        run: |
+          pytest test.py
+
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
 __pycache__/
+.pytest_cache
+
diff --git a/cccp-spngp.py b/cccp-spngp.py
@@ -1,9 +1,7 @@
 import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
 from learnspngp import build, query, build_bins
 from spngp import structure
-import sys
 
 np.random.seed(58)
 

diff --git a/concrete-spngp.py b/concrete-spngp.py
@@ -2,7 +2,6 @@
 import pandas as pd
 from learnspngp import build, query, build_bins
 from spngp import structure
-import sys
 
 np.random.seed(58)
 

diff --git a/datasets/requirements.txt b/datasets/requirements.txt
@@ -0,0 +1,3 @@
+numpy==1.23
+pandas==1.5
+torch==2.0.1
diff --git a/dockerfile b/dockerfile
@@ -0,0 +1,21 @@
+FROM ubuntu:20.04
+
+WORKDIR /app
+
+ENV TZ="America/Sao_Paulo"
+
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt update -y \
+    && dpkg --configure -a &&  apt install -y python3-pip libpq-dev python-dev ant make
+
+COPY makefile makefile
+COPY datasets datasets
+COPY cccp-spngp.py .
+COPY concrete-spngp.py .
+COPY energy-spngp.py .
+COPY learnspngp.py .
+COPY spngp.py .
+
+RUN pip3 install --upgrade pip \
+    && pip3 install -r datasets/requirements.txt 
diff --git a/energy-spngp.py b/energy-spngp.py
@@ -1,10 +1,7 @@
 import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
 from learnspngp import build, query, build_bins
 from spngp import structure
-import sklearn.preprocessing as preprocessing
-import sys
 
 np.random.seed(58)
 

diff --git a/makefile b/makefile
@@ -0,0 +1,14 @@
+start:
+	docker build -t image-spngp .
+	docker run --name container-spngp
+
+init:
+	docker exec -it container-spngp bash
+	clear
+	echo "Welcome to container"
+
+results:
+	python3 cccp-spngp.py
+	python3 energy-spngp.py
+	python3 concrete-spngp.py
+
diff --git a/readme.md b/readme.md
@@ -28,6 +28,29 @@ Following is a summary of the results:
 | concrete  | 8     | 1030 | **4.84**          | 6.25         |
 | ccpp      | 4     | 9568 | **3.68**          | 4.11         |
 
+## Run the project with docker
+
+### first steps
+
+- You need download docker
+
+### start docker container
+
+Run the docker container 
+
+``` make start ```
+
+### Install dependencies and exec docker
+
+To install the dependencies and enter the container
+
+``` make init ```
+### Results
+
+Process the results
+
+```make results```
+
 ## :mortar_board: Credits
 This code is a part of a MSc thesis written by Yevgen "Eugene" Zainchkovskyy at DTU Compute, department of Applied Mathematics and Computer Science at the Technical University of Denmark with an industrial partner Alipes Capital ApS. The work was carried out under supervision of Ole Winther, Professor at Section for Cognitive Systems, DTU Compute and Carsten Stahlhut, PhD, Principal Data Scientist, Novo Nordisk A/S (former Head of Quants at Alipes Capital). 
 

diff --git a/setup.py b/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup, find_packages
+
+VERSION = '0.0.1' 
+DESCRIPTION = 'First package of the SPNGP project'
+# Setting up
+setup(
+       # 'name' deve corresponder ao nome da pasta 'verysimplemodule'
+        name="projectspngp", 
+        version=VERSION,
+        author="Maria Eduarda Barbosa",
+        author_email="[email protected]",
+        description=DESCRIPTION,
+        packages=find_packages(),        
+        keywords=['python', 'first package'],
+        classifiers= [
+            "Development Status :: 3 - Alpha",
+            "Intended Audience :: Developers",
+            "Programming Language :: Python :: 3.10",
+        ],
+)
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/energy-spngp.py b/src/energy-spngp.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pandas as pd
+from learnspngp import build, query, build_bins
+from spngp import structure
+
+np.random.seed(58)
+
+data = pd.read_csv('datasets/energy/energy.csv')
+data = pd.DataFrame(data).dropna()
+dmean, dstd = data.mean(), data.std()
+data = (data-dmean)/dstd
+
+target_index = 9 # 8 or 9
+
+# GPSPN on full data
+train = data.sample(frac=0.8, random_state=58)
+test  = data.drop(train.index)
+x, y  = train.iloc[:, :-2].values, train.iloc[:, target_index].values.reshape(-1,1)
+
+opts = {
+    'min_samples':          0,
+    'X':                    x, 
+    'qd':                   3, 
+    'max_depth':            3, 
+    'max_samples':     10**10, 
+    'log':               True,
+    'min_samples':          0,
+    'jump':              True,
+    'reduce_branching':  True
+}
+root_region, gps_ = build_bins(**opts)
+#sys.exit()
+root, gps         = structure(root_region, gp_types=['rbf'])
+
+for i, gp in enumerate(gps):
+    idx = query(x, gp.mins, gp.maxs)
+    gp.x, gp.y = x[idx], y[idx]
+    print(f"Training GP {i+1}/{len(gps)} ({len(idx)})")
+    gp.init(steps=30, cuda=True)
+
+root.update()
+
+for smudge in np.arange(0, 0.5, 0.05):
+    mu_s, cov_s = root.forward(test.iloc[:, 0:-2].values, smudge=smudge)
+    mu_s = (mu_s.ravel() * dstd.iloc[target_index]) + dmean.iloc[target_index]
+    mu_t = (test.iloc[:, target_index]*dstd.iloc[target_index]) + dmean.iloc[target_index]
+    sqe = (mu_s - mu_t.values)**2
+    rmse = np.sqrt(sqe.sum()/len(test))
+    print(f"SPN-GP (smudge={round(smudge, 4)}) \t RMSE: {rmse}")