migration for link suggestion and less verbose alembic-check (#258)

* migration for link suggestion and less verbose alembic-check * removing Dapp model and related bits * removing Dapp class, alembic file, updated fetch_app_info * improved link suggestion prompt
yieldprotocol · Sep 28, 2023 · 3c7ee70 · 3c7ee70
1 parent cc84ec3
commit 3c7ee70
Show file tree

Hide file tree

Showing 6 changed files with 24 additions and 70 deletions.
diff --git a/.github/workflows/alembic-check.yaml b/.github/workflows/alembic-check.yaml
@@ -73,9 +73,9 @@ jobs:
       run: |
         GCP_SSH_CMD="gcloud compute ssh cacti-bastion-server --zone us-east1-b --ssh-key-file /tmp/gcp/google_compute_engine --quiet --tunnel-through-iap --ssh-flag"
         if [[ ${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}} == 'master' ]]; then
-          $GCP_SSH_CMD '-vvv -fN -L 5432:${{ secrets.PROD_CHATDB_INTERNAL_IP }}'
+          $GCP_SSH_CMD '-fN -L 5432:${{ secrets.PROD_CHATDB_INTERNAL_IP }}'
         else
-          $GCP_SSH_CMD '-vvv -fN -L 5432:${{ secrets.DEV_CHATDB_INTERNAL_IP }}'
+          $GCP_SSH_CMD '-fN -L 5432:${{ secrets.DEV_CHATDB_INTERNAL_IP }}'
         fi
 
         cd backend

diff --git a/index/dapps.py b/index/dapps.py
@@ -68,7 +68,7 @@ def backfill():
         with open('./knowledge_base/dapps_ranked_unique.json') as f: 
             dapp_list = json.load(f)
 
-        documents = [d.pop("description") for d in dapp_list]
+        documents = [d.get("description") for d in dapp_list]
 
         metadatas = dapp_list
 

diff --git a/knowledge_base/widgets.yaml b/knowledge_base/widgets.yaml
@@ -427,11 +427,14 @@
     type: object
   return_value_description: ''
 - _name_: fetch_app_info
-  description: Used when we need to handle common questions and answers about the
-    chat assistant app, what it can do, how to interact with it, at a high-level.
-    Only useful for questions about the chat app experience. It does not know specific
-    information about the web3 ecosystem, of tokens or NFTs or contracts, or access
-    to live data and APIs.
+  description: This function is invoked exclusively to address questions pertaining to the capabilities, 
+    features, and interactions related to the chat assistant application itself. Users may 
+    inquire about the chat app's high-level functionalities, its operational attributes, or 
+    best practices for engagement. However, this function should NOT be triggered for queries 
+    related to specific details of the web3 ecosystem, such as tokens, NFTs, contracts, or any 
+    live data and API access. Ensure that the function responds solely to inquiries about 
+    the chat assistant app's core capabilities and user experience.
+
   parameters:
     properties:
       query:

diff --git a/scrape/dapp_scraper.py b/scrape/dapp_scraper.py
@@ -8,11 +8,6 @@
 import json
 from typing import List
 
-from scrape.models import (
-    db_session,
-    Dapp
-)
-
 BROWSERLESS_API_KEY = os.getenv('BROWSERLESS_API_KEY', '')
 SCRAPE_API_URL = f'https://chrome.browserless.io/scrape?token={BROWSERLESS_API_KEY}'
 
@@ -172,44 +167,6 @@ def clean_payload_data(original_data):
 
     return reduced_data
 
-
-def load_data_from_json_to_db(session=db_session, json_path=dapps_json_path):
-    print("Loading data from JSON to DB")
-    # 1. Setup
-    # If the table doesn't exist, create it
-    # Base.metadata.create_all(session.bind) Dont need this - jacob b
-
-    # 2. Data Loading
-
-    # Read the JSON data
-    with open(json_path, "r") as file:
-        dapps_data = json.load(file)
-
-    # Loop through the JSON data and insert each entry into the database
-    for dapp in dapps_data:
-        print(f'adding {dapp["name"]}')
-        dapp_instance = Dapp(
-            description=dapp["description"],
-            name=dapp["name"],
-            url=dapp["url"],
-            twitter_handle=dapp["twitterHandle"],
-            blog_links=dapp["blogLinks"],
-            discord=dapp["discord"],
-            facebook=dapp["facebook"],
-            instagram=dapp["instagram"],
-            telegram=dapp["telegram"]
-        )
-        session.add(dapp_instance)
-
-    # 3. Finalization
-
-    # Commit the transactions
-    session.commit()
-
-    print("Finished loading data from JSON to DB")
-
-
-
 if __name__ == "__main__":
 
     # create an ArgumentParser instance

diff --git a/scrape/models.py b/scrape/models.py
@@ -36,18 +36,3 @@ class ScrapedUrl(Base, Timestamp):  # type: ignore
 
     Index('scraped_url_lookup', url, unique=True)
 
-class Dapp(Base, Timestamp):
-    __tablename__ = 'dapp'
-
-    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    description = Column(TEXT, nullable=False)
-    name = Column(String(255), nullable=False, unique=True)
-    url = Column(String(255), nullable=False)
-    twitter_handle = Column(String(255), nullable=True)
-    blog_links = Column(ARRAY(String(255)), nullable=True)
-    discord = Column(String(255), nullable=True)
-    facebook = Column(String(255), nullable=True)
-    instagram = Column(String(255), nullable=True)
-    telegram = Column(String(255), nullable=True)
-
-    Index('dapp_by_name', 'name', unique=True)
diff --git a/tools/index_link_suggestion.py b/tools/index_link_suggestion.py
@@ -12,13 +12,22 @@
 import utils.timing as timing
 
 
-TEMPLATE = '''You are a web3 assistant. You help users with answering web3-related questions. Your responses should sound natural, helpful, cheerful, and engaging, and you should use easy to understand language with explanations for jargon.
+TEMPLATE = '''**Immediate Action & Review Needed**: Every time you mention specific platforms, tools, technologies, or any topic deserving of a URL, you **must** incorporate it into the text using markdown-style linking. There are two ways to do this:
 
-Information to help complete your task is below. Only use the information below to answer the question. If you don't know the answer, just say that you don't know. Don't try to make up an answer.
+1. Seamlessly embed the URL into descriptive text.
+2. If you need to specify the exact URL for clarity, make sure it is still formatted in markdown.
 
-When mentioning specific platforms, tools, or technologies, it's crucial to provide a relevant URL. Ensure this URL is seamlessly integrated into the content of the answer using markdown formatting. The link should feel like a natural part of the sentence.
+Here's your blueprint:
 
-For example: One of the leading platforms in the web3 space is [Ethereum](https://www.ethereum.org/), which offers a decentralized platform for building smart contracts and dapps."
+**Correct - Embedded**: Learn more about [Ethereum](https://www.ethereum.org/).
+**Correct - Explicit**: Visit the Ethereum website at [https://www.ethereum.org/](https://www.ethereum.org/).
+**Incorrect**: Learn more at https://www.ethereum.org/ or "Visit the Ethereum website here: https://www.ethereum.org/".
+
+Being a web3 assistant, aim to deliver answers that are clear, engaging, and most importantly, user-friendly. Web3 topics can be intricate, so your goal is to be the bridge to understanding. Always simplify jargon and ensure URLs are user-friendly and clickable.
+
+Before finalizing any response, stop and verify: "Did I format all URLs in markdown?"
+
+If you can't provide an answer, it's perfectly fine to admit it. But regardless of the content of your response, ensure all URLs are **formatted correctly**.
 ---
 {task_info}
 ---