yale-swe · plin349 · Mar 31, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 31, 2024
diff --git a/backend/README.md b/backend/README.md
@@ -0,0 +1,55 @@
+# Flask Backend
+
+This is the backend for the project, built using Flask.
+
+## Installation
+
+1. Install the required dependencies:
+
+    ```bash
+    conda env create -f environment.yml
+    conda activate bluebook
+    ```
+
+2. You will need to have your IP address allowlisted from Mongo to query the database.
+3. You will also need to create an `.env` file that contains your API keys:
+
+```
+MONGO_URI="mongodb+srv://xxx"
+OPENAI_API_KEY="sk-xxx"
+```
+
+## Usage
+
+1. Start the Flask server:
+
+    ```bash
+    python app.py
+    ```
+
+2. The server will start running on `http://localhost:8000`.
+
+3. Use your favorite API client (e.g., Postman) to send a POST request to `http://localhost:8000/api/chat` with the following JSON payload:
+
+    ```json
+    {
+        "role": "user",
+        "content": "Tell me some courses about personal finance"
+    }
+    ```
+
+    You should receive a response with the recommended courses like this:
+
+    ```json
+    {
+        "courses": [
+            {
+                "course_code": "ECON 436",
+                "description": "How much should I be saving at age 35? How much of my portfolio should be invested in stocks at age 50? Which mortgage should I choose, and when should I refinance it? How much can I afford to spend per year in retirement? This course covers prescriptive models of personal saving, asset allocation, borrowing, and spending. The course is designed to answer questions facing anybody who manages their own money or is a manager in an organization that is trying to help clients manage their money.",
+                "title": "Personal Finance"
+            },
+            ...
+        ],
+        "response": "To learn more about personal finance, you can start by taking courses or workshops that focus on financial management, budgeting, investing, and retirement planning. Some universities and educational platforms offer online courses on personal finance, such as ECON 436: Personal Finance and ECON 361: Corporate Finance. Additionally, you can explore resources like books, podcasts, and websites dedicated to personal finance advice and tips. It may also be helpful to consult with a financial advisor or planner for personalized guidance on managing your finances effectively."
+    }
+    ```
diff --git a/backend/__pycache__/lib.cpython-311.pyc b/backend/__pycache__/lib.cpython-311.pyc
diff --git a/backend/__pycache__/lib.cpython-312.pyc b/backend/__pycache__/lib.cpython-312.pyc
diff --git a/backend/app.py b/backend/app.py
@@ -1,100 +1,148 @@
 from flask import Flask, request, jsonify
+from flask_cors import CORS
 import os
 from dotenv import load_dotenv
 from lib import chat_completion_request, create_embedding
 import json
-
 from pymongo.mongo_client import MongoClient
 
-uri = "mongodb+srv://bluebookairoot:<password>@bluebookcluster.0hf4pzi.mongodb.net/?retryWrites=true&w=majority&appName=BluebookCluster"
+COURSE_QUERY_LIMIT = 5
+
+load_dotenv()
 
-# connect to the MongoDB cluster
+# database initialization
+uri = os.getenv('MONGO_URI')
 client = MongoClient(uri)
-db = client['bluebookai']
-collection = db['course-info']
+db = client['course_db']
+collection = db['parsed_courses']
 
+# mongo connection
 try:
     client.admin.command('ping')
     print("Pinged your deployment. You successfully connected to MongoDB!")
 except Exception as e:
     print(e)
 
+# flask
 app = Flask(__name__)
+CORS(app)
 
-load_dotenv()
-
-@app.route('/chat', methods=['POST'])
+@app.route('/api/chat', methods=['POST'])
 def chat():
+
     data = request.get_json()
-    if 'message' not in data:
-        return jsonify({"error": "Missing 'messages' in request body"}), 400
-
     user_messages = data['message']
-    response = chat_completion_request(messages=user_messages)
-    message = response.choices[0].message
-    print(message)
-    # if message.tool_calls is None:
-    #     return 'success'
-    #     args = json.loads(message.tool_calls[0].function.arguments)
-    #     query_vector = create_embedding(user_messages[-1]['content'])
-    #     database_response = collection.aggregate([
-    #         {
-    #         '$vectorSearch': {
-    #             'index': 'course-rating-index',
-    #             'path': 'embedding',
-    #             'filter': {
-    #                 'rating': {
-    #                     args['operator']: args['rating']
-    #                 }
-    #             },
-    #             'queryVector': query_vector,
-    #             'numCandidates': 5,
-    #             'limit': 5
-    #         }
-    #         }
-    #     ])
-    #     # print(database_response)
-
-    #     top_class = list(database_response)[0]
-    #     json_response = {
-    #         'title': top_class['title'],
-    #         'rating': top_class['rating'],
-    #     }
-    #     return jsonify(json_response)
-
-    # "{\"operator\":\"$gt\",\"rating\":4}"
+
+    # remove id before sending to OpenAI
+    for message in user_messages:
+        if 'id' in message:
+            del message['id']
+        if message['role'] == 'ai':
+            message['role'] = 'assistant'
+
+    print(user_messages)
+
+    # for safety check, not to be included in final response
+    user_messages_safety_check = user_messages.copy()
+    user_messages_safety_check.append({
+        'role': 'user',
+        'content': 'Am I asking for help with courses or academics? Answer "yes" or "no".'
+    })
+
+    response_safety_check = chat_completion_request(messages=user_messages_safety_check)
+    response_safety_check = response_safety_check.choices[0].message.content
 
-    # ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_Ub07GeA6kaC2OZ8b8KlVmtZz', function=Function(arguments='{\n  "subject_code": "CPSC",\n  "rating": 3.5,\n  "comparison_operator_rating": "$gte",\n  "workload": 1,\n  "comparison_operator_workload": "$lte"\n}', name='CourseFilter'), type='function')])
+    if 'no' in response_safety_check.lower():
+        response = 'I am sorry, but I can only assist with questions related to courses or academics at this time.'
+        json_response = {
+            'response': response,
+            'courses': []
+        }
+        print('failed safety check')
+        return jsonify(json_response)
+    else:
+        print('passed safety check')
+
+    # adding system message if user message does not include a system message header
+    if user_messages[0]['role'] != 'system':
+        user_messages.insert(0, {
+            'role': 'system',
+            'content': 'Your name is Eli. You are a helpful assistant for Yale University students to ask questions about courses and academics.'
+        })
+
+    # checking if database query is necessary
+    user_messages_database_relevancy_check = user_messages.copy()
+    user_messages_database_relevancy_check.append({
+        'role': 'user',
+        'content': 'Will you be able to better answer my questions with information about specific courses related to the user query at Yale University? You should answer "yes" if you need information about courses at Yale that you don\'t have, otherwise you should answer "no".'
+    })
+
+    user_messages_database_relevancy_check = chat_completion_request(messages=user_messages_database_relevancy_check)
+    response_user_messages_database_relevancy_check = user_messages_database_relevancy_check.choices[0].message.content
 
+    if 'no' in response_user_messages_database_relevancy_check.lower():
+        response = chat_completion_request(messages=user_messages)
+        response = response.choices[0].message.content
+        json_response = {
+            'response': response,
+            'courses': []
+        }
+        print('no need to query database for course information')
+        return jsonify(json_response)
+    else:
+        print('need to query database for course information')
+
+    # create embedding for user message to query against vector index
     query_vector = create_embedding(user_messages[-1]['content'])
 
-    print(user_messages[-1])
     database_response = collection.aggregate([
-            {
+        {
             '$vectorSearch': {
-                'index': 'course-rating-index',
+                'index': 'parsed_courses_title_description_index',
                 'path': 'embedding',
+                # 'filter': {
+                #     'rating': {
+                #         args['operator']: args['rating']
+                #     }
+                # },
                 'queryVector': query_vector,
-                'numCandidates': 5,
-                'limit': 5
-            }
+                'numCandidates': 30,
+                'limit': COURSE_QUERY_LIMIT
             }
-        ])
+        }
+    ])
+
+    database_response = list(database_response)        
+
+    recommended_courses = [
+        {
+            'course_code': course['course_code'],
+            'title': course['title'],
+            'description': course['description'],
+            'areas': course['areas']
+        } for course in database_response
+    ]
+
+    recommendation_prompt = f'Here are some courses that might be relevant to the user request:\n\n'
+    for course in recommended_courses:
+        recommendation_prompt += f'{course["course_code"]}: {course["title"]}\n{course["description"]}\n\n'
+    recommendation_prompt += 'Provide a response to the user. Incorporate specific course information if it is relevant to the user request.'
+
+    user_messages.append({
+        'role': 'system',
+        'content': recommendation_prompt
+    })
+
+    response = chat_completion_request(messages=user_messages)
+    response = response.choices[0].message.content
+
 
-    classes = list(database_response)
-    # top_class = classes[0]
-    print([c['title'] for c in classes])
-    top_class = classes[0]
     json_response = {
-        # 'message': [{
-        #     'role': response.choices[0].message.role,
-        #     'content': response.choices[0].message.content,
-        # }]
-        'title': top_class['title'],
-        # 'rating': top_class['rating'],
+        'response': response,
+        'courses': recommended_courses
     }
 
     return jsonify(json_response)
 
 if __name__ == '__main__':
-    app.run(debug=True)
+    app.run(debug=True, port=8000)