Merge pull request #4 from AutonomousResearchGroup/postgres

init postgres, half of tests are passing
elizaOS · Aug 9, 2023 · 8e24899 · 8e24899
2 parents d4decac + f6234ca
commit 8e24899
Show file tree

Hide file tree

Showing 15 changed files with 507 additions and 198 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # agentmemory <a href="https://discord.gg/qetWd7J9De"><img style="float: right" src="https://dcbadge.vercel.app/api/server/qetWd7J9De" alt=""></a> <a href="https://github.com/AutonomousResearchGroup/agentmemory/stargazers"><img style="float: right; padding: 5px;" src="https://img.shields.io/github/stars/AutonomousResearchGroup/agentmemory?style=social" alt=""></a>
 
-Easy-to-use agent memory, powered by chromadb
+Easy-to-use memory for agents, document search, knowledge graphing and more.
 
 <img src="resources/image.jpg">
 
@@ -50,6 +50,15 @@ You can enable debugging by passing `debug=True` to most functions, or by settin
 create_memory("conversation", "I can't do that, Dave.", debug=True)
 ```
 
+# Deployment
+
+CLIENT_TYPE='POSTGRES' | 'CHROMA'
+POSTGRES_CONNECTION_STRING=postgres://postgres:MagickDatabase123!@db.xnzvmluhwpbngdufsbrd.supabase.co:6543/postgres
+
+By default, agentmemory will use a local ChromaDB instance. If you want to use a Postgres instance, you can set the environment variable `CLIENT_TYPE` to `POSTGRES` and set the `POSTGRES_CONNECTION_STRING` environment variable to your Postgres connection string.
+
+You can deploy an agentmemory-based application to the cloud in minutes using Supabase. Here is a [tutorial](https://supabase.com/blog/openai-embeddings-postgres-vector) and an explanation of [pgvector](https://supabase.com/docs/guides/database/extensions/pgvector).
+
 # Basic Usage Guide
 
 ## Importing into your project
@@ -178,7 +187,7 @@ embedding (array): Embedding of the document. Defaults to None. Use if you alrea
 
 #### `create_unique_memory(category, content, metadata={}, similarity=0.95)`
 
-Create a new memory only if there aren't any that are very similar to it. If a similar memory is found, the new memory's "unique" metadata field is set to "False" and it is linked to the existing memory.
+Create a new memory only if there aren't any that are very similar to it. If a similar memory is found, the new memory's "novel" metadata field is set to "False" and it is linked to the existing memory.
 
 ##### Parameters
 
@@ -193,7 +202,7 @@ None
 
 ## Search Memory
 
-#### `search_memory(category, search_text, n_results=5, min_distance=None, max_distance=None, filter_metadata=None, contains_text=None, include_embeddings=True, unique=False)`
+#### `search_memory(category, search_text, n_results=5, min_distance=None, max_distance=None, filter_metadata=None, contains_text=None, include_embeddings=True, novel=False)`
 
 Search a collection with given query texts.
 
@@ -216,7 +225,7 @@ max_distance (float): Only include memories with this distance threshold maximum
     0.1 = most memories will be exluded, 1.0 = no memories will be excluded
 min_distance (float): Only include memories that are at least this distance
     0.0 = No memories will be excluded, 0.9 = most memories will be excluded
-unique (bool): Whether to return only unique memories.
+novel (bool): Whether to return only novel memories.
 ```
 
 ##### Returns
@@ -263,7 +272,7 @@ dict: The retrieved memory.
 
 ## Get Memories
 
-#### `get_memories(category, sort_order="desc", filter_metadata=None, n_results=20, include_embeddings=True, unique=False)`
+#### `get_memories(category, sort_order="desc", filter_metadata=None, n_results=20, include_embeddings=True, novel=False)`
 
 Retrieve a list of memories from a given category, sorted by ID, with optional filtering. `sort_order` controls whether you get from the beginning or end of the list.
 
@@ -278,7 +287,7 @@ sort_order (str): The sorting order of the memories. Can be 'asc' or 'desc'. Def
 filter_metadata (dict): Filter to apply on metadata. Defaults to None.
 n_results (int): The number of results to return. Defaults to 20.
 include_embeddings (bool): Whether to include the embeddings. Defaults to True.
-unique (bool): Whether to return only unique memories. Defaults to False.
+novel (bool): Whether to return only novel memories. Defaults to False.
 ```
 
 ##### Returns
@@ -661,7 +670,7 @@ The `cluster` function in `agentmemory.clustering` provides an implementation of
 ## Function Signature
 
 ```python
-def cluster(epsilon, min_samples, category, filter_metadata=None, unique=False)
+def cluster(epsilon, min_samples, category, filter_metadata=None, novel=False)
 ```
 
 ## Parameters
@@ -670,7 +679,7 @@ def cluster(epsilon, min_samples, category, filter_metadata=None, unique=False)
 - `min_samples` (int): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
 - `category` (str): The category of the collection to be clustered.
 - `filter_metadata` (dict, optional): Additional metadata for filtering the memories before clustering. Defaults to None.
-- `unique` (bool, optional): Whether to return only unique memories. Defaults to False.
+- `novel` (bool, optional): Whether to return only novel memories. Defaults to False.
 
 ## Memory Clustering
 
@@ -693,9 +702,9 @@ epsilon = 0.1
 min_samples = 3
 category = "conversation"
 filter_metadata = {"speaker": "HAL"}  # Optional metadata filter
-unique = False  # Whether to return only unique memories
+novel = False  # Whether to return only novel memories
 
-cluster(epsilon, min_samples, category, filter_metadata=filter_metadata, unique=unique)
+cluster(epsilon, min_samples, category, filter_metadata=filter_metadata, novel=novel)
 ```
 
 ## Note

diff --git a/agentmemory/__init__.py b/agentmemory/__init__.py
@@ -35,8 +35,7 @@
 )
 
 from .client import (
-    get_chroma_client,
-    set_chroma_client,
+    get_client,
 )
 
 from .clustering import (
@@ -62,8 +61,7 @@
     "export_memory_to_file",
     "import_json_to_memory",
     "import_file_to_memory",
-    "get_chroma_client",
-    "set_chroma_client",
+    "get_client",
     "get_persistent_directory",
     "create_event",
     "get_epoch",

diff --git a/agentmemory/client.py b/agentmemory/client.py
@@ -1,55 +1,39 @@
-import chromadb
-
-from agentmemory.helpers import debug_log
-
-storage_path = "./memory"
-client = chromadb.PersistentClient(storage_path)
-
-
-def check_client_initialized():
-    """
-    Check if the client has been initialized, and initialize it if not.
+import os
+import json
 
-    Example:
-        >>> check_client_initialized()
-    """
-    if get_chroma_client() is None:
-        set_chroma_client(chromadb.PersistentClient(storage_path))
-
-
-def get_chroma_client():
-    """
-    Get the chromadb client.
+import chromadb
+import psycopg2
+from sentence_transformers import SentenceTransformer
+from dotenv import load_dotenv
 
-    Returns:
-        chromadb.Client: Chromadb client.
+from agentmemory.postgres import PostgresClient
 
-    Example:
-        >>> get_chroma_client()
-        <chromadb.client.Client object at 0x7f7b9c2f0d00>
-    """
-    global client
-    global storage_path
-    if client is None:
-        client = chromadb.PersistentClient(path=storage_path)
-    return client
+load_dotenv()
 
 
-def set_chroma_client(data_storage_path=storage_path):
-    """
-    Set the chromadb client.
+DEFAULT_CLIENT_TYPE = "CHROMA"
+CLIENT_TYPE = os.environ.get("CLIENT_TYPE", DEFAULT_CLIENT_TYPE)
+STORAGE_PATH = os.environ.get("STORAGE_PATH", "./memory")
+POSTGRES_CONNECTION_STRING = os.environ.get("POSTGRES_CONNECTION_STRING")
 
-    Args:
-        storage_path (string): The path to the new directory.
+client = None
 
-    Returns:
-        None
 
-    Example:
-        >>> set_chroma_client(new_client)
-    """
+def get_client(client_type=None, *args, **kwargs):
     global client
-    global storage_path
-    storage_path = data_storage_path
-    client = chromadb.PersistentClient(storage_path)
-    debug_log("Set chroma client", {"storage_path": storage_path}, "system")
+    if client is not None:
+        return client
+
+    if client_type is None:
+        client_type = CLIENT_TYPE
+
+    if client_type == "POSTGRES":
+        if POSTGRES_CONNECTION_STRING is None:
+            raise EnvironmentError(
+                "Postgres connection string not set in environment variables!"
+            )
+        client = PostgresClient(POSTGRES_CONNECTION_STRING)
+    else:
+        client = chromadb.PersistentClient(path=STORAGE_PATH, *args, **kwargs)
+
+    return client
diff --git a/agentmemory/clustering.py b/agentmemory/clustering.py
@@ -1,11 +1,11 @@
 from agentmemory import search_memory, update_memory
 
-def cluster(epsilon, min_samples, category, filter_metadata=None, unique=False):
+def cluster(epsilon, min_samples, category, filter_metadata=None, novel=False):
     """
     DBScan clustering. Updates memories directly with their cluster id.
     """
     # Mark all memories as unvisited
-    memories = search_memory(category, "", n_results=float("inf"), filter_metadata=filter_metadata, unique=unique)
+    memories = search_memory(category, "", n_results=float("inf"), filter_metadata=filter_metadata, novel=novel)
     visited = {memory["id"]: False for memory in memories}
 
     cluster_id = 0
@@ -16,7 +16,7 @@ def cluster(epsilon, min_samples, category, filter_metadata=None, unique=False):
         visited[memory_id] = True
 
         # Finding neighboring memories based on the epsilon distance threshold
-        neighbors = search_memory(category, memory["document"], n_results=float("inf"), max_distance=epsilon, filter_metadata=filter_metadata, unique=unique)
+        neighbors = search_memory(category, memory["document"], n_results=float("inf"), max_distance=epsilon, filter_metadata=filter_metadata, novel=novel)
 
         # get the current metadata
         metadata = memory.get("metadata", {})
@@ -31,10 +31,10 @@ def cluster(epsilon, min_samples, category, filter_metadata=None, unique=False):
             metadata["cluster"] = str(cluster_id)
             # Mark the current memory as part of the new cluster
             update_memory(category, memory_id, metadata=metadata)
-            _expand_cluster(memory, neighbors, cluster_id, visited, epsilon, min_samples, category, filter_metadata, unique)
+            _expand_cluster(memory, neighbors, cluster_id, visited, epsilon, min_samples, category, filter_metadata, novel)
 
 
-def _expand_cluster(memory, neighbors, cluster_id, visited, epsilon, min_samples, category, filter_metadata, unique):
+def _expand_cluster(memory, neighbors, cluster_id, visited, epsilon, min_samples, category, filter_metadata, novel):
     """
     Helper function to expand the clusters.
     """
@@ -45,7 +45,7 @@ def _expand_cluster(memory, neighbors, cluster_id, visited, epsilon, min_samples
 
         if not visited[neighbor_id]:
             visited[neighbor_id] = True
-            next_neighbors = search_memory(category, neighbor_memory["document"], n_results=float("inf"), max_distance=epsilon, filter_metadata=filter_metadata, unique=unique)
+            next_neighbors = search_memory(category, neighbor_memory["document"], n_results=float("inf"), max_distance=epsilon, filter_metadata=filter_metadata, novel=novel)
             if len(next_neighbors) >= min_samples:
                 neighbors += next_neighbors
 

diff --git a/agentmemory/helpers.py b/agentmemory/helpers.py
@@ -50,24 +50,29 @@ def chroma_collection_to_list(collection):
     collection (dict): Dictionary to be converted.
 
     Returns:
-    list: Converted list of dictionaries.
+    dict_list: Converted list of dictionaries.
 
     Example:
     >>> chroma_collection_to_list(collection)
     [{'metadata': '...', 'document': '...', 'id': '...'}]
     """
 
-    list = []
+    dict_list = []
 
+
+    # check if collection is a list
+    if isinstance(collection, list):
+        return collection
+
     # If there are no embeddings, zip metadatas, documents and ids together
     if collection.get("embeddings", None) is None:
         for metadata, document, id in zip(
             collection["metadatas"], collection["documents"], collection["ids"]
         ):
             # append the zipped data as dictionary to the list
-            list.append({"metadata": metadata, "document": document, "id": id})
+            dict_list.append({"metadata": metadata, "document": document, "id": id})
 
-        return list
+        return dict_list
 
     # if distance is none, zip metadatas, documents, ids and embeddings together
     if collection.get("distances", None) is None:
@@ -78,7 +83,7 @@ def chroma_collection_to_list(collection):
             collection["embeddings"],
         ):
             # append the zipped data as dictionary to the list
-            list.append(
+            dict_list.append(
                 {
                     "metadata": metadata,
                     "document": document,
@@ -87,7 +92,7 @@ def chroma_collection_to_list(collection):
                 }
             )
 
-        return list
+        return dict_list
 
     # if embeddings are present, zip all data including embeddings and distances
     for metadata, document, id, embedding, distance in zip(
@@ -98,7 +103,7 @@ def chroma_collection_to_list(collection):
         collection.get("distances"),
     ):
         # append the zipped data as dictionary to the list
-        list.append(
+        dict_list.append(
             {
                 "metadata": metadata,
                 "document": document,
@@ -107,8 +112,8 @@ def chroma_collection_to_list(collection):
                 "id": id,
             }
         )
-    debug_log("Collection to list", {"collection": collection, "list": list})
-    return list
+    debug_log("Collection to list", {"collection": collection, "list": dict_list})
+    return dict_list
 
 
 def list_to_chroma_collection(list):
@@ -177,7 +182,13 @@ def flatten_arrays(collection):
         # If no values, continue to next iteration
         if collection[key] is None:
             continue
-        # Flatten the arrays into a single array for each key
+
+        if not isinstance(collection[key], list):
+            continue
+
+        if not any(isinstance(el, list) for el in collection[key]):
+            continue
+
         collection[key] = [item for sublist in collection[key] for item in sublist]
 
     return collection