BloomTech-Labs · kettererdavid · Oct 12, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 26, 2023
diff --git a/app/data.py b/app/data.py
@@ -8,18 +8,84 @@
 
 
 class Database:
+    """ Class to create and manage a Database that stores Monsters with their attributes.
+
+    This class stores and manages Monster data in a MongoDB database. The class provides methods to
+    seed the database with Monsters, reset the database, count the instances of monsters and extract
+    data in different formats.
+
+    Attributes:
+        collection: A collection in a MongoDB database where you want the data stored.
+
+    Methods:
+        - __init__(self, collection: str): Initializes the Database class, with the collection name gives as a string.
+        - seed(self, amount: int): Seeds the collection with specified amount of Monster instances
+        - count(self) -> int: Counts and returns the number of monsters in the collection
+        - dataframe(self) -> DataFrame: Returns data in the collection as a pandas DataFrame
+        - html_table(self) -> str: Returns html table of the data for display
+
+    Example:
+        db = Database('Monsters')
+        db.seed(1000)
+        print(db.count())  # Output : 1000
+        db.reset()
+        print(db.count())  # Output : 0
+
+    """
+    load_dotenv()
+    database = MongoClient(getenv("DB_URL"), tlsCAFile=where())["Database"]
+
+    def __init__(self, collection: str):
+        """Initializes the Database class with the specified collection.
+
+        Args:
+            collection (str): The name of the collection to work with, passed as string.
+        """
+        self.collection = self.database[collection]
 
     def seed(self, amount):
-        pass
+        """Seeds the collection with a specified number of random Monster records.
+
+        Args:
+            amount (int): The number of Monster records to generate and insert.
+
+        Returns:
+            Formatted string to report if seed was successful.
+        """
+        return {'Seed successful':
+                f"{self.collection.insert_many([Monster().to_dict() for _ in range(amount)]).acknowledged}"}
 
     def reset(self):
-        pass
+        """Resets the collection by removing all records.
+
+        Returns:
+            Formatted string providing boolean about reset success.
+        """
+        return {'Collection reset successful?': f'{self.collection.delete_many(filter={}).acknowledged}'}
 
     def count(self) -> int:
-        pass
+        """Counts the number of records in the collection.
+
+        Returns:
+            int: Number of records in the collection.
+        """
+        return self.collection.count_documents(filter={})
 
     def dataframe(self) -> DataFrame:
-        pass
+        """Retrieves data from the collection and returns it as a pandas DataFrame.
+
+        Returns:
+            DataFrame: A pandas DataFrame containing the collection data.
+        """
+        return DataFrame(self.collection.find({}, {"_id": False}))
 
     def html_table(self) -> str:
-        pass
+        """Generates an HTML table from the data for display in flask app.
+
+        Returns:
+            str: An HTML table as a string, or 'None' if the collection is empty.
+        """
+        if self.count() > 0:
+            return self.dataframe().to_html(index=False)
+        else:
+            return 'None'
diff --git a/app/graph.py b/app/graph.py
@@ -1,5 +1,45 @@
 from altair import Chart
+import altair as alt
 
 
 def chart(df, x, y, target) -> Chart:
-    pass
+    """
+    Generates an interactive chart with configurable appearance and interaction controls.
+
+    Parameters:
+    - df (pd.DataFrame): The input DataFrame containing the data to be plotted.
+    - x (str): The column name in 'df' to be used for the x-axis of the plot.
+    - y (str): The column name in 'df' to be used for the y-axis of the plot.
+    - target (str): The column name in 'df' to be used for coloring the scatter plot points.
+
+    Returns:
+    - Chart: An Altair chart object representing the interactive scatter plot.
+
+    Features:
+    - The chart's title is dynamically generated based on the 'y', 'x', and 'target' values.
+    - The chart's background is gray and padding is added around the chart.
+    - The chart has interactive zoom and pan functionality:
+      - Zoom: Users can drag to create a rectangle to zoom into a specific area.
+      - Pan: Users can hold the 'alt' key and drag to pan across the chart.
+    - Axes and titles have customized font sizes and appearances.
+    - The chart's view size is set to a fixed width and height with a gray fill and no stroke.
+    """
+
+    result = (Chart(df, title=f"{y} by {x} for {target}").
+              mark_circle().encode(x=x, y=y, tooltip=df.columns.to_list(), color=target).interactive())
+    result = result.configure(background='gray', padding={"left": 50, "top": 50, "right": 50, "bottom": 50})
+
+    zoom = alt.selection(type='interval', bind='scales')
+    pan = alt.selection(type='interval', bind='scales',
+                        on="[mousedown[event.altKey], window:mouseup] > window:mousemove!", encodings=['x'])
+    result = result.add_params(zoom, pan)
+
+    result = result.configure_axis(gridOpacity=0.3, titleFontSize=20)
+    result = result.configure_view(continuousWidth=500, continuousHeight=500, fill='gray', stroke=None)
+    result = result.configure_title(fontSize=30, color='black')
+
+    result = result.transform_filter(
+        zoom
+    ).transform_filter(
+        pan)
+    return result
diff --git a/app/machine.py b/app/machine.py
@@ -1,17 +1,94 @@
+from sklearn.ensemble import RandomForestClassifier
+from joblib import load, dump
+from datetime import datetime
+
+
 class Machine:
+    """A customized Random Forest Classifier tailored for working with our dataset.
+
+        Attributes:
+        -----------
+        name : str
+            A descriptor for the classifier, defaulted to "Random Forest Classifier".
+        model : RandomForestClassifier
+            The trained and tuned Random Forest model.
+        timestamp : str
+            The date and time when the model was trained.
 
+        """
     def __init__(self, df):
-        pass
+        """
+               Initializes the Random Forest Classifier model with the given dataset and fits it to the data.
+                Creates timestamp after model has been fit to the data.
+
+               Parameters:
+               -----------
+               df : pd.DataFrame
+                   A dataframe where 'Rarity' is the target column and the rest are feature columns.
+               """
+        self.name = "Random Forest Classifier"
+        target = df["Rarity"]
+        features = df.drop(columns=["Rarity"])
+        self.model = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=30, bootstrap=False,
+                                            criterion='gini', max_features='log2', min_samples_leaf=1,
+                                            min_samples_split=2)
+        self.model.fit(features, target)
+        self.timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
     def __call__(self, feature_basis):
-        pass
+        """
+                Predicts the rarity and its probability for the given features.
+
+                Parameters:
+                -----------
+                feature_basis : pd.DataFrame
+                    A dataframe containing the features for which predictions need to be made.
+
+                Returns:
+                --------
+                prediction : str
+                    The predicted class.
+                probability : float
+                    The probability of the predicted class.
+                """
+        prediction, *_ = self.model.predict(feature_basis)
+        probas, *_ = self.model.predict_proba(feature_basis)
+        return prediction, max(probas)
 
     def save(self, filepath):
-        pass
+        """
+                Saves the current instance to the specified filepath.
+
+                Parameters:
+                -----------
+                filepath : str
+                    The path where the current instance should be saved.
+                """
+        dump(self, filepath)
 
     @staticmethod
     def open(filepath):
-        pass
+        """
+                Loads and returns a saved model from the given filepath.
+
+                Parameters:
+                -----------
+                filepath : str
+                    The path from which the instance should be loaded.
+
+                Returns:
+                --------
+                    The loaded instance of the class.
+                """
+        return load(filepath)
 
     def info(self):
-        pass
+        """
+                Provides a string description of the classifier with its timestamp.
+
+                Returns:
+                --------
+                str
+                    A string formatted as: "Currently running {name}, from {timestamp}".
+                """
+        return f'Currently running {self.name}, from {self.timestamp}'
diff --git a/app/main.py b/app/main.py
@@ -10,7 +10,7 @@
 from app.graph import chart
 from app.machine import Machine
 
-SPRINT = 0
+SPRINT = 3
 APP = Flask(__name__)
 
 
@@ -28,7 +28,7 @@ def home():
 def data():
     if SPRINT < 1:
         return render_template("data.html")
-    db = Database()
+    db = Database('Monsters')
     return render_template(
         "data.html",
         count=db.count(),
@@ -40,7 +40,7 @@ def data():
 def view():
     if SPRINT < 2:
         return render_template("view.html")
-    db = Database()
+    db = Database('Monsters')
     options = ["Level", "Health", "Energy", "Sanity", "Rarity"]
     x_axis = request.values.get("x_axis") or options[1]
     y_axis = request.values.get("y_axis") or options[2]
@@ -66,7 +66,7 @@ def view():
 def model():
     if SPRINT < 3:
         return render_template("model.html")
-    db = Database()
+    db = Database('Monsters')
     options = ["Level", "Health", "Energy", "Sanity", "Rarity"]
     filepath = os.path.join("app", "model.joblib")
     if not os.path.exists(filepath):

diff --git a/install.sh b/install.sh
diff --git a/run.sh b/run.sh
@@ -1 +1 @@
-python3 -m gunicorn app.main:APP
+python3 -m gunicorn app.main:APP --reload