AI-Commandos · ErikTromp · Dec 23, 2024 · Nov 21, 2024 · Nov 21, 2024 · Nov 26, 2024
diff --git a/README.md b/README.md
diff --git a/neo4j/.env.template b/neo4j/.env.template
@@ -0,0 +1,4 @@
+ngrok_token='Your_ngrok_token'
+neo4j_location='your_neo4j_location_uri'
+neo4j_user='your_neo4j_user_name'
+neo4j_password='your_neo4j_password_here'
diff --git a/neo4j/requirements.txt b/neo4j/requirements.txt
@@ -0,0 +1,3 @@
+neo4j
+flask 
+pyngrok 
diff --git a/neo4j/server.py b/neo4j/server.py
@@ -0,0 +1,196 @@
+import csv
+from neo4j import GraphDatabase
+from flask import Flask, jsonify, request
+from pyngrok import ngrok
+import os
+
+# Define the Graph_whisperer class to interact with Neo4j
+class Graph_whisperer:
+
+    def __init__(self, uri, user, password):
+        self.driver = GraphDatabase.driver(uri, auth=(user, password))
+
+    def close(self):
+        self.driver.close()
+
+    def create_instance(self, payload):
+        with self.driver.session() as session:
+            return session.execute_write(self._create_instance, payload)
+
+    def add_document(self, payload):
+        with self.driver.session() as session:
+            return session.execute_write(self._add_document, payload)
+
+    def get_meta_schema(self):
+        """
+        Retrieve detailed schema information, including node labels, properties, and relationship types.
+
+        Returns:
+            dict: A detailed schema including labels, properties, and relationship types.
+        """
+        with self.driver.session() as session:
+            # Retrieve node labels and their properties
+            nodes_query = """
+            MATCH (n)
+            UNWIND labels(n) AS label
+            RETURN label, collect(DISTINCT keys(n)) AS properties
+            """
+            node_results = session.run(nodes_query)
+            nodes = {}
+            for record in node_results:
+                label = record["label"]
+                properties = set()
+                for prop_list in record["properties"]:
+                    properties.update(prop_list)
+                nodes[label] = list(properties)
+
+            # Retrieve relationship types and their properties
+            rels_query = """
+            MATCH ()-[r]->()
+            RETURN type(r) AS type, collect(DISTINCT keys(r)) AS properties
+            """
+            rel_results = session.run(rels_query)
+            relationships = {}
+            for record in rel_results:
+                rel_type = record["type"]
+                properties = set()
+                for prop_list in record["properties"]:
+                    properties.update(prop_list)
+                relationships[rel_type] = list(properties)
+
+            return {"nodes": nodes, "relationships": relationships}
+
+    def run_query(self, query):
+        """
+        Executes a Cypher query against the Neo4j database.
+
+        Args:
+            query (str): The Cypher query to execute.
+
+        Returns:
+            list: A list of query results, where each result is a dictionary.
+        """
+        with self.driver.session() as session:
+            result = session.run(query)
+            return [record.data() for record in result]
+
+    @staticmethod
+    def _create_instance(tx, payload):
+        for instance in payload:
+            tx.run(instance["query"], instance["parameters"])
+        return instance
+
+    @staticmethod
+    def _add_document(self, csv_file_path):
+        """
+        Loads a CSV file into Neo4j by constructing and executing queries for each row.
+
+        Args:
+            csv_file_path (str): The path to the CSV file to be loaded.
+
+        Returns:
+            dict: A summary of the import process, including the number of records processed.
+        """
+        payloads = []
+        try:
+            with open(csv_file_path, mode="r", encoding="utf-8") as csvfile:
+                reader = csv.DictReader(csvfile)
+                for row in reader:
+                    # Construct the payload for each row
+                    payloads.append(
+                        {
+                            "query": "MERGE (q:Quote {text: $quoteText}) "
+                            "MERGE (t:Topic {name: $topicName}) "
+                            "MERGE (q)-[:IS_PART_OF]->(t)",
+                            "parameters": {
+                                "quoteText": row.get("quoteText"),
+                                "topicName": row.get("topicName"),
+                            },
+                        }
+                    )
+            # Execute all queries in the payload
+            self._create_instance(self, payloads)
+            return {
+                "message": f"Successfully loaded {len(payloads)} records into Neo4j."
+            }
+        except Exception as e:
+            return {"error": str(e)}
+
+
+# Initialize Flask app
+app = Flask(__name__)
+
+
+neo4j_location = os.getenv('neo4j_location')
+neo4j_user = os.getenv('neo4j_user')
+neo4j_password = os.getenv('neo4j_password')
+# Initialize Neo4j database connection
+neo4j_db = Graph_whisperer(neo4j_location, neo4j_user, neo4j_password)
+
+
+@app.route("/add_instances", methods=["POST"])
+def add_instance():
+    json_data = request.get_json()
+    # print(json_data)
+    try:
+        # Use the json data to insert directly into Neo4j
+        insert_result = neo4j_db.create_instance(json_data)
+        return jsonify({"last inserted instance": insert_result})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@app.route("/add_csv", methods=["POST"])
+def add_csv():
+    json_data = request.get_json()
+    # print(json_data)
+    try:
+        # Use the json data to insert directly into Neo4j
+        insert_result = neo4j_db.add_document(json_data)
+        return jsonify({"last inserted instance": insert_result})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+@app.route("/close_db")
+def close_db():
+    try:
+        neo4j_db.close()
+        return jsonify({"message": "Database connection closed."})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+@app.route("/schema", methods=["GET"])
+def get_meta_schema():
+    try:
+        schema = neo4j_db.get_meta_schema()
+        app.logger.info(f"Retrieved schema: {schema}")
+        return jsonify(schema)
+    except Exception as e:
+        app.logger.error(f"Error retrieving schema: {e}")
+        return jsonify({"error": str(e)}), 500
+
+@app.route("/run_query", methods=["POST"])
+def run_query():
+    try:
+        # Extract the Cypher query from the request body
+        query = request.json.get("query")
+        if not query:
+            return jsonify({"error": "No query provided"}), 400
+
+        # Execute the query
+        results = neo4j_db.run_query(query)
+        return jsonify({"results": results})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+
+if __name__ == "__main__":
+    # # Set ngrok auth token and expose the app
+    ngrok_token = os.getenv('ngrok_token')
+    ngrok.set_auth_token(ngrok_token)  # Replace with your actual ngrok auth token
+    public_url = ngrok.connect(4000)  # Expose port 5000
+    print(f"ngrok tunnel available at: {public_url}")
+
+    # Start Flask app
+    app.run(host="0.0.0.0",port=4000)
diff --git a/server/.env.template b/server/.env.template
@@ -23,18 +23,22 @@ The source document that you need to score is the following:
 {context}"
 
 data_directory='data'
-file_types="pdf,json,docx,pptx,xslx,csv,xml"
+file_types="pdf,json,docx,pptx,xslx,csv,xml,txt"
 json_schema="."
 json_text_content=False
 xml_xpath="//"
 
+max_document_limit=10
+neo4j_location='URL_to_neo4j_server'
 vector_store=milvus
 vector_store_uri='data.db'
 vector_store_collection=ragmeup_documents
 vector_store_sparse_uri=bm25_db.pickle
 vector_store_initial_load=True
 vector_store_k=10
 document_chunks_pickle=rag_chunks.pickle
+file_upload_using_llm=True
+dynamic_neo4j_schema=False
 rerank=True
 rerank_k=3
 rerank_model=flashrank
@@ -76,8 +80,9 @@ number_of_chunks=None
 
 use_openai=False
 openai_model_name='gpt-4o-mini'
-use_gemini=False
+use_gemini=True
 gemini_model_name='gemini-pro'
+GOOGLE_API_KEY='Your_API_key'
 use_azure=False
 use_ollama=False
 ollama_model='llama3.1'
@@ -95,4 +100,50 @@ ragas_answer_instruction="You are a digital librarian and need to answer questio
 {context}"
 ragas_answer_query="Answer the following question, never give any explanation or other output than the generated article itself:
 
-{question}"
+{question}"
+
+rag_retrieval_instruction="Instruction: You are a graph database query assistant. Based on the graph schema below, generate a Cypher query to search for the answer to the user's question. If the schema does not support the query, respond with 'None'.
+Schema:
+{schema}"
+retrieval_few_shot="Few-shot examples:
+Example 1:
+User query: \'What topics are available?\'
+Output: MATCH (t:Topic) RETURN t.name
+
+Example 2:
+User query: \'What is the size of an elephant?\'
+Output: None"
+rag_retrieval_question="The user question is:
+
+{question}
+
+Please generate a Cypher question to answer, or return None if it does not fit the Schema"
+
+neo4j_insert_instruction= "You are a Neo4j database assistant. Your task is to generate Cypher queries for inserting data into the Neo4j graph database. Use only the nodes, properties, and relationships specified in the provided schema. Ensure that all generated queries are valid Cypher and JSON format, and conform to the schema. Make a maximum 9 additions. If the input data cannot be mapped to the schema, return 'None' and do not generate any invalid query. "
+neo4j_insert_schema= "Instruction: You are tasked with generating Cypher queries to insert data into the Neo4j graph database. Use only the nodes, properties, and relationships defined in the following schema. Ensure the queries are valid and align with the schema. If the input data cannot be mapped to the schema, return 'None'.
+
+Schema:
+{schema}
+
+Input data:
+{data}
+
+Output: "
+
+
+neo4j_insert_data_only= "Instruction: You are tasked with generating Cypher queries to insert data into the Neo4j graph database. Use only the nodes, properties, and relationships defined in the following schema. Ensure the queries are valid and align with the schema. If the input data cannot be mapped to the schema, return 'None'.
+
+Schema:
+Nodes:
+- Topic: name
+- Fact: name
+Relationships:
+- IS_PART_OF: No properties
+
+Input data:
+{data}
+
+Output: "
+
+neo4j_insert_few_shot="Few-shot examples:Example 1: Schema:  Nodes: - Quote: text - Topic: name Relationships:  - IS_PART_OF: None  Input data:Course block 4 Pitching Tools you need (all available on Canvas > Files): • A series of short videos on pitching by Nathalie Mangelaars (links available on Canvas) • Pitch Toolkit by Pitch Academy • Example Pitch Deck by Horseplay Ventures Expected deliverables: • Pitch script • Slide deck • A Minimum Viable Product (MVP) (also see here and here) Notes: • You are strongly encouraged to already draft a pitch script and create a preliminary slide deck before the pitch training takes place (i.e., on Wednesday November 22). If you come prepared, then Cyrille van Hoof and Nathalie Mangelaars can focus on important opportunities for improvement instead of starting from scratch, which saves us valuable time. In case you do so, include both your draft and final versions to your portfolio. Output:[{\"query\": \"MERGE (q:Quote {text: $quoteText}) MERGE (t:Topic {name: $topicName}) MERGE (q)-[:IS_PART_OF]->(t)\",\"parameters\": { \"quoteText\": \"Pitch Toolkit by Pitch Academy\",\"topicName\": \"Needed tools\"}},{\"query\": \"MERGE (q:Quote {text: $quoteText}) MERGE (t:Topic {name: $topicName}) MERGE (q)-[:IS_PART_OF]->(t)\",\"parameters\": {\"quoteText\": \"A Minimum Viable Product (MVP) (also see here and here) \",\"topicName\": \"Deliverables\"}}]"
+
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    neo4j
+    flask
+    pyngrok