Merge pull request #193 from nulib/deploy/staging

Deploy to production
nulib · Mar 14, 2024 · 0c7f290 · 0c7f290
2 parents e29ddc1 + b0f8d86
commit 0c7f290
Show file tree

Hide file tree

Showing 16 changed files with 194 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ The `env.json` file contains environment variable values for the lambda function
 Some of the values can be found as follows:
 
 - `API_TOKEN_SECRET` - already defined; value has to exist but doesn't matter in dev mode
-- `ELASTICSEARCH_ENDPOINT` - run the following command:
+- `OPENSEARCH_ENDPOINT` - run the following command:
   ```
   aws secretsmanager get-secret-value \
     --secret-id dev-environment/config/meadow --query SecretString \
@@ -132,6 +132,22 @@ bin/start-with-step
 aws stepfunctions create-state-machine --endpoint http://localhost:8083 --definition file://state_machines/av_download.json --name "hlsStitcherStepFunction" --role-arn arn:aws:iam::012345678901:role/DummyRole
 ```
 
+## Deploying a development branch
+
+```
+# sam sync --watch will do hot deploys as you make changes. If you don't want this, switch below command to sam sync or deploy
+
+export STACK_NAME=dc-api-yourdevprefix
+export CONFIG_ENV=staging 
+
+sam sync --watch --stack-name $STACK_NAME \             
+  --config-env $CONFIG_ENV \
+  --config-file ./samconfig.toml \
+  --parameter-overrides $(while IFS='=' read -r key value; do params+=" $key=$value"; done < ./$CONFIG_ENV.parameters && echo "$params CustomDomainHost=$STACK_NAME")
+```
+
+This will give you API routes like: `https://dc-api-yourdevprefix.rdc-staging.library.northwestern.edu/chat-endpoint`
+
 ## Deploying the API manually
 
 - Symlink the `*.parameters` file you need from `tfvars/dc-api/` to the application root

diff --git a/chat/src/event_config.py b/chat/src/event_config.py
@@ -20,7 +20,7 @@
 K_VALUE = 5
 MAX_K = 100
 TEMPERATURE = 0.2
-TEXT_KEY = "title"
+TEXT_KEY = "id"
 VERSION = "2023-07-01-preview"
 
 @dataclass
@@ -63,7 +63,6 @@ def __post_init__(self):
         self.attributes = self._get_attributes()
         self.azure_endpoint = self._get_azure_endpoint()
         self.azure_resource_name = self._get_azure_resource_name()
-        self.azure_endpoint = self._get_azure_endpoint()
         self.debug_mode = self._is_debug_mode_enabled()
         self.deployment_name = self._get_deployment_name()
         self.is_logged_in = self.api_token.is_logged_in()

diff --git a/chat/src/handlers/opensearch_neural_search.py b/chat/src/handlers/opensearch_neural_search.py
@@ -0,0 +1,88 @@
+from langchain_core.documents import Document
+from langchain_core.vectorstores import VectorStore
+from opensearchpy import OpenSearch
+from typing import Any, List, Tuple
+
+
+class OpenSearchNeuralSearch(VectorStore):
+    """Read-only OpenSearch vectorstore with neural search."""
+
+    def __init__(
+        self,
+        client: None,
+        endpoint: str,
+        index: str,
+        model_id: str,
+        vector_field: str = "embedding",
+        search_pipeline: str = None,
+        text_field: str = "id",
+        **kwargs: Any,
+    ):
+        self.client = client or OpenSearch(
+            hosts=[{"host": endpoint, "port": "443", "use_ssl": True}], **kwargs
+        )
+        self.index = index
+        self.model_id = model_id
+        self.vector_field = vector_field
+        self.search_pipeline = search_pipeline
+        self.text_field = text_field
+
+    def similarity_search(
+        self, query: str, k: int = 10, subquery: Any = None, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to the embedding vector."""
+        docs_with_scores = self.similarity_search_with_score(
+            query, k, subquery, **kwargs
+        )
+        return [doc[0] for doc in docs_with_scores]
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 10, subquery: Any = None, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query."""
+        dsl = {
+            "size": k,
+            "query": {
+                "hybrid": {
+                    "queries": [
+                        {
+                            "neural": {
+                                self.vector_field: {
+                                    "query_text": query,
+                                    "model_id": self.model_id,
+                                    "k": k,
+                                }
+                            }
+                        }
+                    ]
+                }
+            },
+        }
+
+        if subquery:
+            dsl["query"]["hybrid"]["queries"].append(subquery)
+
+        for key, value in kwargs.items():
+            dsl[key] = value
+
+        response = self.client.search(index=self.index, body=dsl, params={"search_pipeline": self.search_pipeline} if self.search_pipeline else None)
+
+        documents_with_scores = [
+            (
+                Document(
+                    page_content=hit["_source"][self.text_field],
+                    metadata=(hit["_source"]),
+                ),
+                hit["_score"],
+            )
+            for hit in response["hits"]["hits"]
+        ]
+
+        return documents_with_scores
+
+    def add_texts(self, texts: List[str], metadatas: List[dict], **kwargs: Any) -> None:
+       pass
+
+    @classmethod
+    def from_texts(cls, texts: List[str], metadatas: List[dict], **kwargs: Any) -> None:
+       pass
diff --git a/chat/src/helpers/prompts.py b/chat/src/helpers/prompts.py
@@ -2,10 +2,11 @@
 
 
 def prompt_template() -> str:
-    return """Please answer the question based on the documents provided, and include some details about why the documents might be relevant to the particular question:
+    return """Please answer the question based on the documents provided, and include some details about why the documents might be relevant to the particular question. The 'title' field is the document title, and the 'source' field is a UUID that uniquely identifies each document:
 
 Documents:
 {context}
+Format the answer as raw markdown. Insert links when referencing documents by title using it's UUID, as in the following guide: [title](https://dc.library.northwestern.edu/items/UUID). Example: [Judy Collins, Jackson Hole Folk Festival](https://dc.library.northwestern.edu/items/f1ca513b-7d13-4af6-ad7b-8c7ffd1d3a37). Suggest keywords searches using the following guide (example: [jazz musicians](https://dc.library.northwestern.edu/search?q=Jazz+musicians)). Offer search terms that vary in scope, highlight specific individuals or groups, or delve deeper into a topic. Remember to include as many direct links to Digital Collections searches as needed for comprehensive study. The `collection` field contains information about the collection the document belongs to. When many of the documents are from the same collection, mention the collection and link to the collection using the collection title and id: [collection['title']](https://dc.library.northwestern.edu/collections/collection['id']), for example [World War II Poster Collection](https://dc.library.northwestern.edu/collections/faf4f60e-78e0-4fbf-96ce-4ca8b4df597a):
 
 Question:
 {question}

diff --git a/chat/src/helpers/response.py b/chat/src/helpers/response.py
@@ -48,8 +48,9 @@ def extract_prompt_value(v):
 
 def prepare_response(config):
     try:
+        subquery = {"match": {"all_text": {"query": config.question}}}
         docs = config.opensearch.similarity_search(
-            config.question, k=config.k, vector_field="embedding", text_field="id"
+            query=config.question, k=config.k, subquery=subquery, _source={"excludes": ["embedding"]}
         )
         original_question = get_and_send_original_question(config, docs)
         response = config.chain({"question": config.question, "input_documents": docs})

diff --git a/chat/src/setup.py b/chat/src/setup.py
@@ -1,7 +1,5 @@
-from content_handler import ContentHandler
 from langchain_community.chat_models import AzureChatOpenAI
-from langchain_community.embeddings import SagemakerEndpointEmbeddings
-from langchain_community.vectorstores import OpenSearchVectorSearch
+from handlers.opensearch_neural_search import OpenSearchNeuralSearch
 from opensearchpy import OpenSearch, RequestsHttpConnection
 from requests_aws4auth import AWS4Auth
 import os
@@ -22,7 +20,7 @@ def opensearch_client(region_name=os.getenv("AWS_REGION")):
     print(region_name)
     session = boto3.Session(region_name=region_name)
     awsauth = AWS4Auth(region=region_name, service="es", refreshable_credentials=session.get_credentials())
-    endpoint = os.getenv("ELASTICSEARCH_ENDPOINT")
+    endpoint = os.getenv("OPENSEARCH_ENDPOINT")
 
     return OpenSearch(
         hosts=[{'host': endpoint, 'port': 443}],
@@ -35,20 +33,14 @@ def opensearch_vector_store(region_name=os.getenv("AWS_REGION")):
     session = boto3.Session(region_name=region_name)
     awsauth = AWS4Auth(region=region_name, service="es", refreshable_credentials=session.get_credentials())
 
-    sagemaker_client = session.client(service_name="sagemaker-runtime", region_name=session.region_name)
-    embeddings = SagemakerEndpointEmbeddings(
-        client=sagemaker_client,
-        region_name=session.region_name,
-        endpoint_name=os.getenv("EMBEDDING_ENDPOINT"),
-        content_handler=ContentHandler()
-    )
-
-    docsearch = OpenSearchVectorSearch(
-        index_name=prefix("dc-v2-work"),
-        embedding_function=embeddings,
-        opensearch_url="https://" + os.getenv("ELASTICSEARCH_ENDPOINT"),
+    docsearch = OpenSearchNeuralSearch(
+        index=prefix("dc-v2-work"),
+        model_id=os.getenv("OPENSEARCH_MODEL_ID"),
+        endpoint=os.getenv("OPENSEARCH_ENDPOINT"),
         connection_class=RequestsHttpConnection,
         http_auth=awsauth,
+        search_pipeline=prefix("dc-v2-work-pipeline"),
+        text_field= "id"
     )
     return docsearch
 

diff --git a/chat/template.yaml b/chat/template.yaml
@@ -8,21 +8,22 @@ Parameters:
   AzureOpenaiApiKey:
     Type: String
     Description: Azure OpenAI API Key
-  AzureOpenaiEmbeddingDeploymentId:
-    Type: String
-    Description: Azure OpenAI Embedding Deployment ID
   AzureOpenaiLlmDeploymentId:
     Type: String
     Description: Azure OpenAI LLM Deployment ID
   AzureOpenaiResourceName:
     Type: String
     Description: Azure OpenAI Resource Name
-  ElasticsearchEndpoint:
+  EnvironmentPrefix:
+    Type: String
+    Description: Prefix for Index names
+    Default: ""
+  OpenSearchEndpoint:
     Type: String
-    Description: Elasticsearch URL
-  EmbeddingEndpoint:
+    Description: OpenSearch Endpoint
+  OpenSearchModelId:
     Type: String
-    Description: Sagemaker Inference Endpoint
+    Description: OpenSearch Model ID
 Resources:
   ApiGwAccountConfig:
     Type: "AWS::ApiGateway::Account"
@@ -199,11 +200,11 @@ Resources:
         Variables:
           API_TOKEN_SECRET: !Ref ApiTokenSecret
           AZURE_OPENAI_API_KEY: !Ref AzureOpenaiApiKey
-          AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId
           AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId
           AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName
-          ELASTICSEARCH_ENDPOINT: !Ref ElasticsearchEndpoint
-          EMBEDDING_ENDPOINT: !Ref EmbeddingEndpoint
+          ENV_PREFIX: !Ref EnvironmentPrefix
+          OPENSEARCH_ENDPOINT: !Ref OpenSearchEndpoint
+          OPENSEARCH_MODEL_ID: !Ref OpenSearchModelId
       Policies:
       - Statement:
         - Effect: Allow
@@ -217,12 +218,6 @@ Resources:
           - 'es:ESHttpGet'
           - 'es:ESHttpPost'
           Resource: '*'
-      - Statement:
-        - Effect: Allow
-          Action:
-          - 'sagemaker:InvokeEndpoint'
-          - 'sagemaker:InvokeEndpointAsync'
-          Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:endpoint/${EmbeddingEndpoint}'
     Metadata:
       BuildMethod: nodejs18.x
   Deployment:

diff --git a/chat/test/handlers/test_opensearch_neural_search.py b/chat/test/handlers/test_opensearch_neural_search.py
@@ -0,0 +1,43 @@
+# ruff: noqa: E402
+import sys
+sys.path.append('./src')
+
+from unittest import TestCase
+from handlers.opensearch_neural_search import OpenSearchNeuralSearch
+from langchain_core.documents import Document
+
+class MockClient():
+    def search(self, index, body, params):
+        return {
+          "hits": {
+            "hits": [
+              {
+                "_source": {
+                  "id": "test"
+                },
+                "_score": 0.12345
+              }
+            ]
+          }
+        }
+
+class TestOpenSearchNeuralSearch(TestCase):
+    def test_similarity_search(self):
+        docs = OpenSearchNeuralSearch(client=MockClient(), endpoint="test", index="test", model_id="test").similarity_search(query="test", subquery={"_source": {"excludes": ["embedding"]}}, size=10)
+        self.assertEqual(docs, [Document(page_content='test', metadata={'id': 'test'})])
+
+    def test_similarity_search_with_score(self):
+        docs = OpenSearchNeuralSearch(client=MockClient(), endpoint="test", index="test", model_id="test").similarity_search_with_score(query="test")
+        self.assertEqual(docs, [(Document(page_content='test', metadata={'id': 'test'}), 0.12345)])
+
+    def test_add_texts(self):
+      try:
+        OpenSearchNeuralSearch(client=MockClient(), endpoint="test", index="test", model_id="test").add_texts(texts=["test"], metadatas=[{"id": "test"}])
+      except Exception as e:
+          self.fail(f"from_texts raised an exception: {e}")
+
+    def test_from_texts(self):
+      try:
+        OpenSearchNeuralSearch.from_texts(clas="test", texts=["test"], metadatas=[{"id": "test"}])
+      except Exception as e:
+          self.fail(f"from_texts raised an exception: {e}")
diff --git a/chat/test/helpers/test_metrics.py b/chat/test/helpers/test_metrics.py
@@ -48,7 +48,7 @@ def test_token_usage(self):
 
         expected_result = {
             "answer": 6,
-            "prompt": 36,
+            "prompt": 328,
             "question": 15,
             "source_documents": 1,
         }

diff --git a/chat/test/test_event_config.py b/chat/test/test_event_config.py
@@ -57,7 +57,7 @@ def test_attempt_override_without_superuser_status(self):
             "question": "test question",
             "ref": "test ref",
             "temperature": 0.2,
-            "text_key": "title",
+            "text_key": "id",
         }
         self.assertEqual(actual.azure_endpoint, expected_output["azure_endpoint"])
         self.assertEqual(actual.attributes, expected_output["attributes"])

diff --git a/dev/env.json b/dev/env.json
@@ -1,7 +1,7 @@
 {
   "Parameters": {
     "API_TOKEN_SECRET": "DEVELOPMENT_SECRET",
-    "ELASTICSEARCH_ENDPOINT": "",
+    "OPENSEARCH_ENDPOINT": "",
     "ENV_PREFIX": "",
     "DC_URL": ""
   }

diff --git a/node/src/api/opensearch.js b/node/src/api/opensearch.js
@@ -1,6 +1,6 @@
 const { HttpRequest } = require("@aws-sdk/protocol-http");
 const { awsFetch } = require("../aws/fetch");
-const { elasticsearchEndpoint, prefix } = require("../environment");
+const { openSearchEndpoint, prefix } = require("../environment");
 const Honeybadger = require("../honeybadger-setup");
 
 async function getCollection(id, opts) {
@@ -65,7 +65,7 @@ function isVisible(doc, { allowPrivate, allowUnpublished }) {
 }
 
 function initRequest(path) {
-  const endpoint = elasticsearchEndpoint();
+  const endpoint = openSearchEndpoint();
 
   return new HttpRequest({
     method: "GET",
@@ -80,7 +80,7 @@ function initRequest(path) {
 
 async function search(targets, body, optionsQuery = {}) {
   Honeybadger.addBreadcrumb("Searching", { metadata: { targets, body } });
-  const endpoint = elasticsearchEndpoint();
+  const endpoint = openSearchEndpoint();
 
   const request = new HttpRequest({
     method: "POST",
@@ -98,7 +98,7 @@ async function search(targets, body, optionsQuery = {}) {
 }
 
 async function scroll(scrollId) {
-  const endpoint = elasticsearchEndpoint();
+  const endpoint = openSearchEndpoint();
 
   const request = new HttpRequest({
     method: "POST",
@@ -114,7 +114,7 @@ async function scroll(scrollId) {
 }
 
 async function deleteScroll(scrollId) {
-  const endpoint = elasticsearchEndpoint();
+  const endpoint = openSearchEndpoint();
 
   const request = new HttpRequest({
     method: "DELETE",