From 8cd549bae05da838ea84fd0ece9f819b59f49b8b Mon Sep 17 00:00:00 2001 From: kettererdavid Date: Thu, 12 Oct 2023 17:30:58 -0600 Subject: [PATCH 1/4] Ticket 1 first commit --- app/data.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++---- app/main.py | 8 +++--- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/app/data.py b/app/data.py index 61e69e5..8b513e4 100644 --- a/app/data.py +++ b/app/data.py @@ -8,18 +8,84 @@ class Database: + """ Class to create and manage a Database that stores Monsters with their attributes. + + This class stores and manages Monster data in a MongoDB database. The class provides methods to + seed the database with Monsters, reset the database, count the instances of monsters and extract + data in different formats. + + Attributes: + collection: A collection in a MongoDB database where you want the data stored. + + Methods: + - __init__(self, collection: str): Initializes the Database class, with the collection name gives as a string. + - seed(self, amount: int): Seeds the collection with specified amount of Monster instances + - count(self) -> int: Counts and returns the number of monsters in the collection + - dataframe(self) -> DataFrame: Returns data in the collection as a pandas DataFrame + - html_table(self) -> str: Returns html table of the data for display + + Example: + db = Database('Monsters') + db.seed(1000) + print(db.count()) # Output : 1000 + db.reset() + print(db.count()) # Output : 0 + + """ + load_dotenv() + database = MongoClient(getenv("DB_URL"), tlsCAFile=where())["Database"] + + def __init__(self, collection: str): + """Initializes the Database class with the specified collection. + + Args: + collection (str): The name of the collection to work with, passed as string. + """ + self.collection = self.database[collection] def seed(self, amount): - pass + """Seeds the collection with a specified number of random Monster records. + + Args: + amount (int): The number of Monster records to generate and insert. + + Returns: + Formatted string to report if seed was successful. + """ + return {'Seed successful': + f"{self.collection.insert_many([Monster().to_dict() for _ in range(amount)]).acknowledged}"} def reset(self): - pass + """Resets the collection by removing all records. + + Returns: + Formatted string providing boolean about reset success. + """ + return {'Collection reset successful?': f'{self.collection.delete_many(filter={}).acknowledged}'} def count(self) -> int: - pass + """Counts the number of records in the collection. + + Returns: + int: Number of records in the collection. + """ + return self.collection.count_documents(filter={}) def dataframe(self) -> DataFrame: - pass + """Retrieves data from the collection and returns it as a pandas DataFrame. + + Returns: + DataFrame: A pandas DataFrame containing the collection data. + """ + return DataFrame(self.collection.find({}, {"_id": False})) def html_table(self) -> str: - pass + """Generates an HTML table from the data for display in flask app. + + Returns: + str: An HTML table as a string, or 'None' if the collection is empty. + """ + if self.count() > 0: + return self.dataframe().to_html(index=False) + else: + return 'None' diff --git a/app/main.py b/app/main.py index 1f9e0b0..6f5704f 100644 --- a/app/main.py +++ b/app/main.py @@ -10,7 +10,7 @@ from app.graph import chart from app.machine import Machine -SPRINT = 0 +SPRINT = 1 APP = Flask(__name__) @@ -28,7 +28,7 @@ def home(): def data(): if SPRINT < 1: return render_template("data.html") - db = Database() + db = Database('Monsters') return render_template( "data.html", count=db.count(), @@ -40,7 +40,7 @@ def data(): def view(): if SPRINT < 2: return render_template("view.html") - db = Database() + db = Database('Monsters') options = ["Level", "Health", "Energy", "Sanity", "Rarity"] x_axis = request.values.get("x_axis") or options[1] y_axis = request.values.get("y_axis") or options[2] @@ -66,7 +66,7 @@ def view(): def model(): if SPRINT < 3: return render_template("model.html") - db = Database() + db = Database('Monsters') options = ["Level", "Health", "Energy", "Sanity", "Rarity"] filepath = os.path.join("app", "model.joblib") if not os.path.exists(filepath): From 358540e030768b16b0fbfb7961ee3dbdb30af34b Mon Sep 17 00:00:00 2001 From: kettererdavid Date: Mon, 23 Oct 2023 09:26:18 -0600 Subject: [PATCH 2/4] Sprint 2 commit --- app/graph.py | 21 ++++++++++++++++++++- app/main.py | 2 +- install.sh | 0 3 files changed, 21 insertions(+), 2 deletions(-) mode change 100644 => 100755 install.sh diff --git a/app/graph.py b/app/graph.py index 7fb68f1..435d568 100644 --- a/app/graph.py +++ b/app/graph.py @@ -1,5 +1,24 @@ from altair import Chart +import altair as alt def chart(df, x, y, target) -> Chart: - pass + + result = (Chart(df, title=f"{y} by {x} for {target}"). + mark_circle().encode(x=x, y=y, tooltip=df.columns.to_list(), color=target).interactive()) + result = result.configure(background='gray', padding={"left": 50, "top": 50, "right": 50, "bottom": 50}) + + zoom = alt.selection(type='interval', bind='scales') + pan = alt.selection(type='interval', bind='scales', + on="[mousedown[event.altKey], window:mouseup] > window:mousemove!", encodings=['x']) + result = result.add_params(zoom, pan) + + result = result.configure_axis(gridOpacity=0.3, titleFontSize=20) + result = result.configure_view(continuousWidth=500, continuousHeight=500, fill='gray', stroke=None) + result = result.configure_title(fontSize=30, color='black') + + result = result.transform_filter( + zoom + ).transform_filter( + pan) + return result diff --git a/app/main.py b/app/main.py index 6f5704f..b0cf64c 100644 --- a/app/main.py +++ b/app/main.py @@ -10,7 +10,7 @@ from app.graph import chart from app.machine import Machine -SPRINT = 1 +SPRINT = 2 APP = Flask(__name__) diff --git a/install.sh b/install.sh old mode 100644 new mode 100755 From 03a118d7acc9a61988d7b08b9a9e0559798621fa Mon Sep 17 00:00:00 2001 From: kettererdavid Date: Mon, 23 Oct 2023 10:16:15 -0600 Subject: [PATCH 3/4] Sprint2 commit, updated docstring --- app/graph.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/app/graph.py b/app/graph.py index 435d568..e62cfcc 100644 --- a/app/graph.py +++ b/app/graph.py @@ -3,6 +3,27 @@ def chart(df, x, y, target) -> Chart: + """ + Generates an interactive chart with configurable appearance and interaction controls. + + Parameters: + - df (pd.DataFrame): The input DataFrame containing the data to be plotted. + - x (str): The column name in 'df' to be used for the x-axis of the plot. + - y (str): The column name in 'df' to be used for the y-axis of the plot. + - target (str): The column name in 'df' to be used for coloring the scatter plot points. + + Returns: + - Chart: An Altair chart object representing the interactive scatter plot. + + Features: + - The chart's title is dynamically generated based on the 'y', 'x', and 'target' values. + - The chart's background is gray and padding is added around the chart. + - The chart has interactive zoom and pan functionality: + - Zoom: Users can drag to create a rectangle to zoom into a specific area. + - Pan: Users can hold the 'alt' key and drag to pan across the chart. + - Axes and titles have customized font sizes and appearances. + - The chart's view size is set to a fixed width and height with a gray fill and no stroke. + """ result = (Chart(df, title=f"{y} by {x} for {target}"). mark_circle().encode(x=x, y=y, tooltip=df.columns.to_list(), color=target).interactive()) From 17f53f65f9179246406d81beecbf60e50d98ea54 Mon Sep 17 00:00:00 2001 From: kettererdavid Date: Thu, 26 Oct 2023 17:08:54 -0600 Subject: [PATCH 4/4] Sprint3 submission --- app/machine.py | 87 +++++++++++++++++++++++++++++++++++++++++++++++--- app/main.py | 2 +- run.sh | 2 +- 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/app/machine.py b/app/machine.py index 1785a57..60bf9ad 100644 --- a/app/machine.py +++ b/app/machine.py @@ -1,17 +1,94 @@ +from sklearn.ensemble import RandomForestClassifier +from joblib import load, dump +from datetime import datetime + + class Machine: + """A customized Random Forest Classifier tailored for working with our dataset. + + Attributes: + ----------- + name : str + A descriptor for the classifier, defaulted to "Random Forest Classifier". + model : RandomForestClassifier + The trained and tuned Random Forest model. + timestamp : str + The date and time when the model was trained. + """ def __init__(self, df): - pass + """ + Initializes the Random Forest Classifier model with the given dataset and fits it to the data. + Creates timestamp after model has been fit to the data. + + Parameters: + ----------- + df : pd.DataFrame + A dataframe where 'Rarity' is the target column and the rest are feature columns. + """ + self.name = "Random Forest Classifier" + target = df["Rarity"] + features = df.drop(columns=["Rarity"]) + self.model = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=30, bootstrap=False, + criterion='gini', max_features='log2', min_samples_leaf=1, + min_samples_split=2) + self.model.fit(features, target) + self.timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') def __call__(self, feature_basis): - pass + """ + Predicts the rarity and its probability for the given features. + + Parameters: + ----------- + feature_basis : pd.DataFrame + A dataframe containing the features for which predictions need to be made. + + Returns: + -------- + prediction : str + The predicted class. + probability : float + The probability of the predicted class. + """ + prediction, *_ = self.model.predict(feature_basis) + probas, *_ = self.model.predict_proba(feature_basis) + return prediction, max(probas) def save(self, filepath): - pass + """ + Saves the current instance to the specified filepath. + + Parameters: + ----------- + filepath : str + The path where the current instance should be saved. + """ + dump(self, filepath) @staticmethod def open(filepath): - pass + """ + Loads and returns a saved model from the given filepath. + + Parameters: + ----------- + filepath : str + The path from which the instance should be loaded. + + Returns: + -------- + The loaded instance of the class. + """ + return load(filepath) def info(self): - pass + """ + Provides a string description of the classifier with its timestamp. + + Returns: + -------- + str + A string formatted as: "Currently running {name}, from {timestamp}". + """ + return f'Currently running {self.name}, from {self.timestamp}' diff --git a/app/main.py b/app/main.py index b0cf64c..5259f11 100644 --- a/app/main.py +++ b/app/main.py @@ -10,7 +10,7 @@ from app.graph import chart from app.machine import Machine -SPRINT = 2 +SPRINT = 3 APP = Flask(__name__) diff --git a/run.sh b/run.sh index bdddf39..a541173 100644 --- a/run.sh +++ b/run.sh @@ -1 +1 @@ -python3 -m gunicorn app.main:APP +python3 -m gunicorn app.main:APP --reload