Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ketterer #5

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 71 additions & 5 deletions app/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,84 @@


class Database:
""" Class to create and manage a Database that stores Monsters with their attributes.

This class stores and manages Monster data in a MongoDB database. The class provides methods to
seed the database with Monsters, reset the database, count the instances of monsters and extract
data in different formats.

Attributes:
collection: A collection in a MongoDB database where you want the data stored.

Methods:
- __init__(self, collection: str): Initializes the Database class, with the collection name gives as a string.
- seed(self, amount: int): Seeds the collection with specified amount of Monster instances
- count(self) -> int: Counts and returns the number of monsters in the collection
- dataframe(self) -> DataFrame: Returns data in the collection as a pandas DataFrame
- html_table(self) -> str: Returns html table of the data for display

Example:
db = Database('Monsters')
db.seed(1000)
print(db.count()) # Output : 1000
db.reset()
print(db.count()) # Output : 0

"""
load_dotenv()
database = MongoClient(getenv("DB_URL"), tlsCAFile=where())["Database"]

def __init__(self, collection: str):
"""Initializes the Database class with the specified collection.

Args:
collection (str): The name of the collection to work with, passed as string.
"""
self.collection = self.database[collection]

def seed(self, amount):
pass
"""Seeds the collection with a specified number of random Monster records.

Args:
amount (int): The number of Monster records to generate and insert.

Returns:
Formatted string to report if seed was successful.
"""
return {'Seed successful':
f"{self.collection.insert_many([Monster().to_dict() for _ in range(amount)]).acknowledged}"}

def reset(self):
pass
"""Resets the collection by removing all records.

Returns:
Formatted string providing boolean about reset success.
"""
return {'Collection reset successful?': f'{self.collection.delete_many(filter={}).acknowledged}'}

def count(self) -> int:
pass
"""Counts the number of records in the collection.

Returns:
int: Number of records in the collection.
"""
return self.collection.count_documents(filter={})

def dataframe(self) -> DataFrame:
pass
"""Retrieves data from the collection and returns it as a pandas DataFrame.

Returns:
DataFrame: A pandas DataFrame containing the collection data.
"""
return DataFrame(self.collection.find({}, {"_id": False}))

def html_table(self) -> str:
pass
"""Generates an HTML table from the data for display in flask app.

Returns:
str: An HTML table as a string, or 'None' if the collection is empty.
"""
if self.count() > 0:
return self.dataframe().to_html(index=False)
else:
return 'None'
42 changes: 41 additions & 1 deletion app/graph.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,45 @@
from altair import Chart
import altair as alt


def chart(df, x, y, target) -> Chart:
pass
"""
Generates an interactive chart with configurable appearance and interaction controls.

Parameters:
- df (pd.DataFrame): The input DataFrame containing the data to be plotted.
- x (str): The column name in 'df' to be used for the x-axis of the plot.
- y (str): The column name in 'df' to be used for the y-axis of the plot.
- target (str): The column name in 'df' to be used for coloring the scatter plot points.

Returns:
- Chart: An Altair chart object representing the interactive scatter plot.

Features:
- The chart's title is dynamically generated based on the 'y', 'x', and 'target' values.
- The chart's background is gray and padding is added around the chart.
- The chart has interactive zoom and pan functionality:
- Zoom: Users can drag to create a rectangle to zoom into a specific area.
- Pan: Users can hold the 'alt' key and drag to pan across the chart.
- Axes and titles have customized font sizes and appearances.
- The chart's view size is set to a fixed width and height with a gray fill and no stroke.
"""

result = (Chart(df, title=f"{y} by {x} for {target}").
mark_circle().encode(x=x, y=y, tooltip=df.columns.to_list(), color=target).interactive())
result = result.configure(background='gray', padding={"left": 50, "top": 50, "right": 50, "bottom": 50})

zoom = alt.selection(type='interval', bind='scales')
pan = alt.selection(type='interval', bind='scales',
on="[mousedown[event.altKey], window:mouseup] > window:mousemove!", encodings=['x'])
result = result.add_params(zoom, pan)

result = result.configure_axis(gridOpacity=0.3, titleFontSize=20)
result = result.configure_view(continuousWidth=500, continuousHeight=500, fill='gray', stroke=None)
result = result.configure_title(fontSize=30, color='black')

result = result.transform_filter(
zoom
).transform_filter(
pan)
return result
87 changes: 82 additions & 5 deletions app/machine.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,94 @@
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from datetime import datetime


class Machine:
"""A customized Random Forest Classifier tailored for working with our dataset.

Attributes:
-----------
name : str
A descriptor for the classifier, defaulted to "Random Forest Classifier".
model : RandomForestClassifier
The trained and tuned Random Forest model.
timestamp : str
The date and time when the model was trained.

"""
def __init__(self, df):
pass
"""
Initializes the Random Forest Classifier model with the given dataset and fits it to the data.
Creates timestamp after model has been fit to the data.

Parameters:
-----------
df : pd.DataFrame
A dataframe where 'Rarity' is the target column and the rest are feature columns.
"""
self.name = "Random Forest Classifier"
target = df["Rarity"]
features = df.drop(columns=["Rarity"])
self.model = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=30, bootstrap=False,
criterion='gini', max_features='log2', min_samples_leaf=1,
min_samples_split=2)
self.model.fit(features, target)
self.timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

def __call__(self, feature_basis):
pass
"""
Predicts the rarity and its probability for the given features.

Parameters:
-----------
feature_basis : pd.DataFrame
A dataframe containing the features for which predictions need to be made.

Returns:
--------
prediction : str
The predicted class.
probability : float
The probability of the predicted class.
"""
prediction, *_ = self.model.predict(feature_basis)
probas, *_ = self.model.predict_proba(feature_basis)
return prediction, max(probas)

def save(self, filepath):
pass
"""
Saves the current instance to the specified filepath.

Parameters:
-----------
filepath : str
The path where the current instance should be saved.
"""
dump(self, filepath)

@staticmethod
def open(filepath):
pass
"""
Loads and returns a saved model from the given filepath.

Parameters:
-----------
filepath : str
The path from which the instance should be loaded.

Returns:
--------
The loaded instance of the class.
"""
return load(filepath)

def info(self):
pass
"""
Provides a string description of the classifier with its timestamp.

Returns:
--------
str
A string formatted as: "Currently running {name}, from {timestamp}".
"""
return f'Currently running {self.name}, from {self.timestamp}'
8 changes: 4 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from app.graph import chart
from app.machine import Machine

SPRINT = 0
SPRINT = 3
APP = Flask(__name__)


Expand All @@ -28,7 +28,7 @@ def home():
def data():
if SPRINT < 1:
return render_template("data.html")
db = Database()
db = Database('Monsters')
return render_template(
"data.html",
count=db.count(),
Expand All @@ -40,7 +40,7 @@ def data():
def view():
if SPRINT < 2:
return render_template("view.html")
db = Database()
db = Database('Monsters')
options = ["Level", "Health", "Energy", "Sanity", "Rarity"]
x_axis = request.values.get("x_axis") or options[1]
y_axis = request.values.get("y_axis") or options[2]
Expand All @@ -66,7 +66,7 @@ def view():
def model():
if SPRINT < 3:
return render_template("model.html")
db = Database()
db = Database('Monsters')
options = ["Level", "Health", "Energy", "Sanity", "Rarity"]
filepath = os.path.join("app", "model.joblib")
if not os.path.exists(filepath):
Expand Down
Empty file modified install.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python3 -m gunicorn app.main:APP
python3 -m gunicorn app.main:APP --reload