From 6b545c6edb4313ed575dda2a7268801ec300a7bc Mon Sep 17 00:00:00 2001
From: Noble Varghese <noblekvarghese96@gmail.com>
Date: Fri, 15 Sep 2023 00:30:33 +0530
Subject: [PATCH] feat: enhancements and updates for portkey (#7669)

---
 CHANGELOG.md                                  |   3 +
 docs/examples/llm/portkey.ipynb               | 165 ++++++------
 examples/portkey_demo/fallback_demo.py        |  27 +-
 .../portkey_demo/fallback_streaming_demo.py   |  31 ++-
 examples/portkey_demo/loadbalancing_demo.py   |  30 ++-
 .../loadbalancing_streaming_demo.py           |  29 ++-
 llama_index/llms/portkey.py                   | 236 ++++++------------
 llama_index/llms/portkey_utils.py             |  32 +--
 8 files changed, 269 insertions(+), 284 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cde673309ce54..931148879b732 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## Unreleased
 
+### New Features
+- Simplified portkey LLM interface (#7669)
+
 ### Bug Fixes / Nits
 - Avoid `NotImplementedError` for async langchain embeddings (#7668)
 
diff --git a/docs/examples/llm/portkey.ipynb b/docs/examples/llm/portkey.ipynb
index 0432443856ace..dba725084f59b 100644
--- a/docs/examples/llm/portkey.ipynb
+++ b/docs/examples/llm/portkey.ipynb
@@ -33,19 +33,14 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {
-    "id": "5Z933R9wuZ4z"
+    "id": "4Q8L0IDau60f"
    },
-   "outputs": [],
    "source": [
-    "# Installing the Rubeus AI gateway developed by the Portkey team\n",
-    "!pip install rubeus\n",
+    "#### **Step 1: Get your Portkey API key**\n",
     "\n",
-    "# Importing necessary libraries and modules\n",
-    "from llama_index.llms import Portkey, ChatMessage\n",
-    "from rubeus import LLMBase"
+    "Log into [Portkey here](https://app.portkey.ai/), then click on the profile icon on top right and \"Copy API Key\". Let's also set OpenAI & Anthropic API keys."
    ]
   },
   {
@@ -60,29 +55,35 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {
-    "id": "4Q8L0IDau60f"
+    "id": "Vd7elJZHu_jF"
    },
+   "outputs": [],
    "source": [
-    "#### **Step 1: Get your Portkey API key**\n",
+    "# !pip install portkey-ai -U\n",
     "\n",
-    "Log into [Portkey here](https://app.portkey.ai/), then click on the profile icon on top right and \"Copy API Key\". Let's also set OpenAI & Anthropic API keys."
+    "# Set the portkey api key as environment variable.\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"PORTKEY_API_KEY\"] = \"PORTKEY_API_KEY\"\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "os.environ[\"ANTHROPIC_API_KEY\"] = \"\""
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "Vd7elJZHu_jF"
+    "id": "5Z933R9wuZ4z"
    },
    "outputs": [],
    "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"PORTKEY_API_KEY\"] = \"\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = \"\""
+    "# Installing the Portkey's AI gateway SDK developed by the Portkey team\n",
+    "# Importing necessary libraries and modules\n",
+    "from llama_index.llms import Portkey, ChatMessage\n",
+    "import portkey as pk"
    ]
   },
   {
@@ -118,21 +119,8 @@
    },
    "outputs": [],
    "source": [
-    "metadata = {\n",
-    "    \"_environment\": \"production\",\n",
-    "    \"_prompt\": \"test\",\n",
-    "    \"_user\": \"user\",\n",
-    "    \"_organisation\": \"acme\",\n",
-    "}\n",
-    "\n",
-    "pk_llm = Portkey(\n",
+    "portkey_client = Portkey(\n",
     "    mode=\"single\",\n",
-    "    cache_status=\"semantic\",\n",
-    "    cache_force_refresh=True,\n",
-    "    cache_age=1000,\n",
-    "    trace_id=\"portkey_llamaindex\",\n",
-    "    retry=5,\n",
-    "    metadata=metadata,\n",
     ")\n",
     "\n",
     "# Since we have defined the Portkey API Key with os.environ, we do not need to set api_key again here"
@@ -146,7 +134,7 @@
    "source": [
     "#### **Step 3: Constructing the LLM**\n",
     "\n",
-    "With the Portkey integration, constructing an LLM is simplified. Use the `LLMBase` function for all providers, with the exact same keys you're accustomed to in your OpenAI or Anthropic constructors. The only new key is `weight`, essential for the load balancing feature."
+    "With the Portkey integration, constructing an LLM is simplified. Use the `LLMOptions` function for all providers, with the exact same keys you're accustomed to in your OpenAI or Anthropic constructors. The only new key is `weight`, essential for the load balancing feature."
    ]
   },
   {
@@ -157,7 +145,9 @@
    },
    "outputs": [],
    "source": [
-    "openai_llm = LLMBase(provider=\"openai\", model=\"gpt-4\")"
+    "openai_llm = pk.LLMOptions(\n",
+    "    provider=\"openai\", model=\"gpt-4\", virtual_key=\"open-ai-key-66ah788\"\n",
+    ")"
    ]
   },
   {
@@ -166,7 +156,7 @@
     "id": "7foKVwDdwBgG"
    },
    "source": [
-    "The above code illustrates how to utilize the `LLMBase` function to set up an LLM with the OpenAI provider and the GPT-4 model. This same function can be used for other providers as well, making the integration process streamlined and consistent across various providers."
+    "The above code illustrates how to utilize the `LLMOptions` function to set up an LLM with the OpenAI provider and the GPT-4 model. This same function can be used for other providers as well, making the integration process streamlined and consistent across various providers."
    ]
   },
   {
@@ -177,7 +167,7 @@
    "source": [
     "#### **Step 4: Activate the Portkey LLM**\n",
     "\n",
-    "Once you've constructed the LLM using the `LLMBase` function, the next step is to activate it with Portkey. This step is essential to ensure that all the Portkey features are available for your LLM."
+    "Once you've constructed the LLM using the `LLMOptions` function, the next step is to activate it with Portkey. This step is essential to ensure that all the Portkey features are available for your LLM."
    ]
   },
   {
@@ -188,7 +178,7 @@
    },
    "outputs": [],
    "source": [
-    "pk_llm.add_llms(openai_llm)"
+    "portkey_client.add_llms(openai_llm)"
    ]
   },
   {
@@ -224,7 +214,7 @@
     "    ChatMessage(role=\"user\", content=\"What can you do?\"),\n",
     "]\n",
     "print(\"Testing Portkey Llamaindex integration:\")\n",
-    "response = pk_llm.chat(messages)\n",
+    "response = portkey_client.chat(messages)\n",
     "print(response)"
    ]
   },
@@ -238,17 +228,17 @@
     "\n",
     "Congratulations! You've successfully set up and tested the Portkey integration with Llamaindex. To recap:\n",
     "\n",
-    "1. pip install rubeus\n",
+    "1. pip install portkey-ai\n",
     "2. Import Portkey from llama_index.llms.\n",
     "3. Grab your Portkey API Key from [here](https://app.portkey.ai/).\n",
-    "4. Construct your Portkey LLM with `pk_llm=Portkey(mode=\"fallback\")` and any other Portkey features you want\n",
-    "5. Construct your provider LLM with opneai_llm=PortkeyBase(provider=\"openai\",model=\"gpt-4\")\n",
-    "6. Add the provider LLM to Portkey LLM with `pk_llm.add_llms(openai_llm)`\n",
-    "7. Call the Portkey LLM regularly like you would any other LLM, with `pk_llm.chat(messages)`\n",
+    "4. Construct your Portkey client with `portkey_client=Portkey(mode=\"fallback\")` and any other Portkey modes you want\n",
+    "5. Construct your provider LLM with `opneai_llm=PortkeyBase(provider=\"openai\",model=\"gpt-4\")`\n",
+    "6. Add the provider LLM to Portkey LLM with `portkey_client.add_llms(openai_llm)`\n",
+    "7. Call the Portkey methods regularly like you would any other LLM, with `portkey_client.chat(messages)`\n",
     "\n",
     "Here's the guide to all the functions and their params:\n",
     "- [Portkey LLM Constructor](#step-2-add-all-the-portkey-features-you-want-as-illustrated-below-by-calling-the-portkey-class)\n",
-    "- [LLMBase Constructor](https://github.com/Portkey-AI/rubeus-python-sdk/blob/4cf3e17b847225123e92f8e8467b41d082186d60/rubeus/api_resources/utils.py#L179)\n",
+    "- [LLMOptions Constructor](https://github.com/Portkey-AI/rubeus-python-sdk/blob/4cf3e17b847225123e92f8e8467b41d082186d60/rubeus/api_resources/utils.py#L179)\n",
     "- [List of Portkey + Llamaindex Features](#portkeys-integration-with-llamaindex-adds-the-following-production-capabilities-to-your-apps-out-of-the-box)\n"
    ]
   },
@@ -276,15 +266,26 @@
    },
    "outputs": [],
    "source": [
-    "pk_llm = Portkey(mode=\"fallback\", retry=5)\n",
+    "portkey_client = Portkey(mode=\"fallback\")\n",
+    "messages = [\n",
+    "    ChatMessage(role=\"system\", content=\"You are a helpful assistant\"),\n",
+    "    ChatMessage(role=\"user\", content=\"What can you do?\"),\n",
+    "]\n",
     "\n",
-    "llm1 = LLMBase(provider=\"openai\", model=\"gpt-4\")\n",
-    "llm2 = LLMBase(provider=\"openai\", model=\"gpt-3.5-turbo\")\n",
+    "llm1 = pk.LLMOptions(\n",
+    "    provider=\"openai\",\n",
+    "    model=\"gpt-4\",\n",
+    "    retry_settings={\"on_status_codes\": [429, 500], \"attempts\": 2},\n",
+    "    virtual_key=\"open-ai-key-66ah788\",\n",
+    ")\n",
+    "llm2 = pk.LLMOptions(\n",
+    "    provider=\"openai\", model=\"gpt-3.5-turbo\", virtual_key=\"open-ai-key-66ah788\"\n",
+    ")\n",
     "\n",
-    "pk_llm.add_llms(llm_params=[llm1, llm2])\n",
+    "portkey_client.add_llms(llm_params=[llm1])\n",
     "\n",
     "print(\"Testing Fallback & Retry functionality:\")\n",
-    "response = pk_llm.chat(messages)\n",
+    "response = portkey_client.chat(messages)\n",
     "print(response)"
    ]
   },
@@ -314,15 +315,26 @@
    },
    "outputs": [],
    "source": [
-    "pk_llm = Portkey(mode=\"loadbalance\")\n",
+    "portkey_client = Portkey(mode=\"ab_test\")\n",
+    "messages = [\n",
+    "    ChatMessage(role=\"system\", content=\"You are a helpful assistant\"),\n",
+    "    ChatMessage(role=\"user\", content=\"What can you do?\"),\n",
+    "]\n",
     "\n",
-    "llm1 = LLMBase(provider=\"openai\", model=\"gpt-4\", weight=0.2)\n",
-    "llm2 = LLMBase(provider=\"openai\", model=\"gpt-3.5-turbo\", weight=0.8)\n",
+    "llm1 = pk.LLMOptions(\n",
+    "    provider=\"openai\", virtual_key=\"open-ai-key-66ah788\", model=\"gpt-4\", weight=0.2\n",
+    ")\n",
+    "llm2 = pk.LLMOptions(\n",
+    "    provider=\"openai\",\n",
+    "    virtual_key=\"open-ai-key-66ah788\",\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    weight=0.8,\n",
+    ")\n",
     "\n",
-    "pk_llm.add_llms(llm_params=[llm1, llm2])\n",
+    "portkey_client.add_llms(llm_params=[llm1, llm2])\n",
     "\n",
     "print(\"Testing Loadbalance functionality:\")\n",
-    "response = pk_llm.chat(messages)\n",
+    "response = portkey_client.chat(messages)\n",
     "print(response)"
    ]
   },
@@ -349,8 +361,15 @@
    "source": [
     "import time\n",
     "\n",
-    "pk_llm = Portkey(mode=\"single\", cache_status=\"semantic\")\n",
-    "pk_llm.add_llms(llm1)\n",
+    "portkey_client = Portkey(mode=\"single\")\n",
+    "\n",
+    "openai_llm = pk.LLMOptions(\n",
+    "    provider=\"openai\",\n",
+    "    virtual_key=\"open-ai-key-66ah788\",\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    cache_status=\"semantic\",\n",
+    ")\n",
+    "portkey_client.add_llms(openai_llm)\n",
     "\n",
     "current_messages = [\n",
     "    ChatMessage(role=\"system\", content=\"You are a helpful assistant\"),\n",
@@ -360,7 +379,7 @@
     "print(\"Testing Portkey Semantic Cache:\")\n",
     "\n",
     "start = time.time()\n",
-    "response = pk_llm.chat(current_messages)\n",
+    "response = portkey_client.chat(current_messages)\n",
     "end = time.time() - start\n",
     "\n",
     "print(response)\n",
@@ -377,7 +396,7 @@
     "print(\"Testing Portkey Semantic Cache:\")\n",
     "\n",
     "start = time.time()\n",
-    "response = pk_llm.chat(new_messages)\n",
+    "response = portkey_client.chat(new_messages)\n",
     "end = time.time() - start\n",
     "\n",
     "print(response)\n",
@@ -408,8 +427,13 @@
    },
    "outputs": [],
    "source": [
-    "pk_llm = Portkey(\n",
-    "    mode=\"single\", cache_status=\"semantic\", cache_age=1000, cache_force_refresh=True\n",
+    "# Setting the cache status as `semantic` and cache_age as 60s.\n",
+    "openai_llm = pk.LLMOptions(\n",
+    "    provider=\"openai\",\n",
+    "    virtual_key=\"open-ai-key-66ah788\",\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    cache_force_refresh=True,\n",
+    "    cache_age=50,\n",
     ")"
    ]
   },
@@ -441,14 +465,11 @@
     "    \"_organisation\": \"acme\",\n",
     "}\n",
     "\n",
-    "pk_llm = Portkey(\n",
-    "    mode=\"single\",\n",
-    "    trace_id=\"portkey_llamaindex_test\",\n",
-    ")\n",
-    "pk_llm.add_llms(llm1)\n",
+    "portkey_client = Portkey(mode=\"single\")\n",
+    "portkey_client.add_llms(openai_llm)\n",
     "\n",
     "print(\"Testing Observability functionality:\")\n",
-    "response = pk_llm.chat(messages)\n",
+    "response = portkey_client.chat(messages)\n",
     "print(response)"
    ]
   },
@@ -458,14 +479,14 @@
     "id": "KeOQL-9uxxQz"
    },
    "source": [
-    "#### **AI Gateway with Rubeus**\n",
+    "#### **AI Gateway with Portkey**\n",
     "\n",
-    "Rubeus is an open-source AI gateway that powers features like load balancing and fallbacks in Portkey. It acts as an intermediary, ensuring that your requests are processed optimally. One of the advantages of using Rubeus is its flexibility. You can easily customize its behavior, redirect requests to different providers, or even bypass logging to Portkey.\n",
+    "Portkey is an open-source AI gateway that powers features like load balancing and fallbacks. It acts as an intermediary, ensuring that your requests are processed optimally. One of the advantages of using Portkey is its flexibility. You can easily customize its behavior, redirect requests to different providers, or even bypass logging to Portkey.\n",
     "\n",
-    "Here's an example of customizing the behavior with Rubeus:\n",
+    "Here's an example of customizing the behavior with Portkey:\n",
     "\n",
     "```py\n",
-    "pk_llm.base_url=None\n",
+    "portkey_client.base_url=None\n",
     "```"
    ]
   },
@@ -550,7 +571,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/examples/portkey_demo/fallback_demo.py b/examples/portkey_demo/fallback_demo.py
index c82ceef495bfc..2dbc874daa3bb 100644
--- a/examples/portkey_demo/fallback_demo.py
+++ b/examples/portkey_demo/fallback_demo.py
@@ -1,7 +1,7 @@
 import os
 from llama_index.llms import Portkey
 from llama_index.llms import ChatMessage  # We'll use this later
-from rubeus import LLMBase
+from portkey import LLMOptions
 
 os.environ["PORTKEY_API_KEY"] = ""
 os.environ["OPENAI_API_KEY"] = ""
@@ -16,13 +16,26 @@
 }
 
 # Define the Portkey interface.
-pk_llm = Portkey(mode="fallback", trace_id="portkey_llamaindex", metadata=metadata)
+pk_client = Portkey(mode="fallback")
 
 # Define the provider spec in the LLMBase spec. Customise the LLMs as per requirement.
-openai_llm = LLMBase(provider="openai", model="gpt-4")
-anthropic_llm = LLMBase(provider="openai", model="claude-2", max_tokens=256)
-
-pk_llm.add_llms([openai_llm, anthropic_llm])
+openai_llm = LLMOptions(
+    provider="openai",
+    model="gpt-4",
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="open-ai-key-66ah788",
+)
+anthropic_llm = LLMOptions(
+    provider="openai",
+    model="claude-2",
+    max_tokens=256,
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="anthropic-key-351feb",
+)
+
+pk_client.add_llms([openai_llm, anthropic_llm])
 
 messages = [
     ChatMessage(role="system", content="You are a helpful assistant"),
@@ -30,5 +43,5 @@
 ]
 
 print("Testing Portkey Llamaindex integration:")
-response = pk_llm.chat(messages)
+response = pk_client.chat(messages)
 print(response)
diff --git a/examples/portkey_demo/fallback_streaming_demo.py b/examples/portkey_demo/fallback_streaming_demo.py
index 4c7d249f4fa34..e7e91b313b6f3 100644
--- a/examples/portkey_demo/fallback_streaming_demo.py
+++ b/examples/portkey_demo/fallback_streaming_demo.py
@@ -1,7 +1,7 @@
 import os
 from llama_index.llms import Portkey
 from llama_index.llms import ChatMessage  # We'll use this later
-from rubeus import LLMBase
+from portkey import LLMOptions
 
 os.environ["PORTKEY_API_KEY"] = ""
 os.environ["OPENAI_API_KEY"] = ""
@@ -16,13 +16,26 @@
 }
 
 # Define the Portkey interface.
-pk_llm = Portkey(mode="fallback", trace_id="portkey_llamaindex", metadata=metadata)
-
-# Define the provider spec in the LLMBase spec. Customise the LLMs as per requirement.
-openai_llm = LLMBase(provider="openai", model="gpt-4")
-anthropic_llm = LLMBase(provider="openai", model="claude-2", max_tokens=256)
-
-pk_llm.add_llms([openai_llm, anthropic_llm])
+pk_client = Portkey(mode="fallback")
+
+# Define the provider spec in the LLMOptions spec. Customise the LLMs as per requirement.
+openai_llm = LLMOptions(
+    provider="openai",
+    model="gpt-4",
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="open-ai-key-66ah788",
+)
+anthropic_llm = LLMOptions(
+    provider="openai",
+    model="claude-2",
+    max_tokens=256,
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="anthropic-key-351feb",
+)
+
+pk_client.add_llms([openai_llm, anthropic_llm])
 
 messages = [
     ChatMessage(role="system", content="You are a helpful assistant"),
@@ -30,7 +43,7 @@
 ]
 
 print("Testing Portkey Llamaindex integration:")
-response = pk_llm.stream_chat(messages)
+response = pk_client.stream_chat(messages)
 
 for i in response:
     print(i.delta, end="", flush=True)
diff --git a/examples/portkey_demo/loadbalancing_demo.py b/examples/portkey_demo/loadbalancing_demo.py
index 91bff8caa29aa..8a6b4baeac960 100644
--- a/examples/portkey_demo/loadbalancing_demo.py
+++ b/examples/portkey_demo/loadbalancing_demo.py
@@ -1,7 +1,7 @@
 import os
 from llama_index.llms import Portkey
 from llama_index.llms import ChatMessage  # We'll use this later
-from rubeus import LLMBase
+from portkey import LLMOptions
 
 os.environ["PORTKEY_API_KEY"] = ""
 os.environ["OPENAI_API_KEY"] = ""
@@ -16,14 +16,28 @@
 }
 
 # Define the Portkey interface.
-pk_llm = Portkey(mode="loadbalance", trace_id="portkey_llamaindex", metadata=metadata)
+pk_client = Portkey(mode="ab_test")
 
-# Define the provider spec in the LLMBase spec. Customise the LLMs as per requirement.
+# Define the provider spec in the LLMOptions spec. Customise the LLMs as per requirement.
 # Added here are the weights that specify a 40:60 split in the requests.
-openai_llm = LLMBase(provider="openai", model="gpt-4", weight=0.4)
-anthropic_llm = LLMBase(provider="openai", model="claude-2", max_tokens=256, weight=0.6)
-
-pk_llm.add_llms([openai_llm, anthropic_llm])
+openai_llm = LLMOptions(
+    provider="openai",
+    model="gpt-4",
+    weight=0.4,
+    metadata=metadata,
+    virtual_key="open-ai-key-66ah788",
+)
+anthropic_llm = LLMOptions(
+    provider="openai",
+    model="claude-2",
+    max_tokens=256,
+    weight=0.6,
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="anthropic-key-351feb",
+)
+
+pk_client.add_llms([openai_llm, anthropic_llm])
 
 messages = [
     ChatMessage(role="system", content="You are a helpful assistant"),
@@ -31,5 +45,5 @@
 ]
 
 print("Testing Portkey Llamaindex integration:")
-response = pk_llm.chat(messages)
+response = pk_client.chat(messages)
 print(response)
diff --git a/examples/portkey_demo/loadbalancing_streaming_demo.py b/examples/portkey_demo/loadbalancing_streaming_demo.py
index bae0b8f80b68d..674f5095f161e 100644
--- a/examples/portkey_demo/loadbalancing_streaming_demo.py
+++ b/examples/portkey_demo/loadbalancing_streaming_demo.py
@@ -1,7 +1,7 @@
 import os
 from llama_index.llms import Portkey
 from llama_index.llms import ChatMessage  # We'll use this later
-from rubeus import LLMBase
+from portkey import LLMOptions
 
 os.environ["PORTKEY_API_KEY"] = ""
 os.environ["OPENAI_API_KEY"] = ""
@@ -16,16 +16,29 @@
 }
 
 # Define the Portkey interface.
-pk_llm = Portkey(mode="loadbalance", trace_id="portkey_llamaindex", metadata=metadata)
+pk_client = Portkey(mode="ab_test")
 
-# Define the provider spec in the LLMBase spec. Customise the LLMs as per requirement.
+# Define the provider spec in the LLMOptions spec. Customise the LLMs as per requirement.
 # Added here are the weights that specify a 40:60 split in the requests.
-openai_llm = LLMBase(provider="openai", model="gpt-4", weight=0.4)
-anthropic_llm = LLMBase(
-    provider="anthropic", model="claude-2", max_tokens=256, weight=0.6
+openai_llm = LLMOptions(
+    provider="openai",
+    model="gpt-4",
+    weight=0.4,
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="open-ai-key-66ah788",
+)
+anthropic_llm = LLMOptions(
+    provider="anthropic",
+    model="claude-2",
+    max_tokens=256,
+    weight=0.6,
+    trace_id="portkey_llamaindex",
+    metadata=metadata,
+    virtual_key="anthropic-key-351feb",
 )
 
-pk_llm.add_llms([openai_llm, anthropic_llm])
+pk_client.add_llms([openai_llm, anthropic_llm])
 
 messages = [
     ChatMessage(role="system", content="You are a helpful assistant"),
@@ -33,6 +46,6 @@
 ]
 
 print("Testing Portkey Llamaindex integration:")
-response = pk_llm.stream_chat(messages)
+response = pk_client.stream_chat(messages)
 for i in response:
     print(i.delta, end="", flush=True)
diff --git a/llama_index/llms/portkey.py b/llama_index/llms/portkey.py
index f1d1e2492bd42..a934c8f79dc62 100644
--- a/llama_index/llms/portkey.py
+++ b/llama_index/llms/portkey.py
@@ -1,7 +1,7 @@
 """
     Portkey intergation with Llama_index for enchanced monitoring
 """
-from typing import Any, Optional, Sequence, Dict, Union, List, TYPE_CHECKING
+from typing import Any, Optional, Sequence, Union, List, TYPE_CHECKING, cast
 
 from llama_index.llms.custom import CustomLLM
 from llama_index.llms.base import (
@@ -30,14 +30,11 @@
 from llama_index.bridge.pydantic import Field, PrivateAttr
 
 if TYPE_CHECKING:
-    from rubeus import (
-        Rubeus,
-        LLMBase,
-        RubeusModes,
-        RubeusCacheType,
-        RubeusCacheLiteral,
-        RubeusResponse,
-        RubeusModesLiteral,
+    from portkey import (
+        LLMOptions,
+        ModesLiteral,
+        Modes,
+        PortkeyResponse,
     )
 
 
@@ -48,91 +45,52 @@ class Portkey(CustomLLM):
         LLM (_type_): _description_
     """
 
-    mode: Optional[Union["RubeusModes", "RubeusModesLiteral"]] = Field(
+    mode: Optional[Union["Modes", "ModesLiteral"]] = Field(
         description="The mode for using the Portkey integration"
     )
 
     model: Optional[str] = Field(default="gpt-3.5-turbo")
-    llm: "LLMBase" = Field(description="LLM parameter", default_factory=dict)
+    llm: "LLMOptions" = Field(description="LLM parameter", default_factory=dict)
 
-    llms: List["LLMBase"] = Field(description="LLM parameters", default_factory=list)
+    llms: List["LLMOptions"] = Field(description="LLM parameters", default_factory=list)
 
-    _client: "Rubeus" = PrivateAttr()
+    _client: Any = PrivateAttr()
 
     def __init__(
         self,
         *,
-        mode: Optional[Union["RubeusModes", "RubeusModesLiteral"]] = None,
-        api_key: str = "",
-        cache_status: Optional[Union["RubeusCacheType", "RubeusCacheLiteral"]] = None,
-        trace_id: Optional[str] = "",
-        cache_age: Optional[int] = None,
-        cache_force_refresh: Optional[bool] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        retry: Optional[int] = 3,
+        mode: Union["Modes", "ModesLiteral"],
+        api_key: Optional[str] = None,
         base_url: Optional[str] = None,
     ) -> None:
         """
         Initialize a Portkey instance.
 
         Args:
+            mode (Optional[Modes]): The mode for using the Portkey integration
+            (default: Modes.SINGLE).
             api_key (Optional[str]): The API key to authenticate with Portkey.
-            mode (Optional[RubeusModes]): The mode for using the Portkey integration
-            (default: RubeusModes.SINGLE).
-            provider (Optional[ProviderTypes]): The LLM provider to be used for the
-                Portkey integration.
-                Eg: openai, anthropic etc.
-                NOTE: Check the ProviderTypes to see the supported list
-                of LLMs.
-            model (str): The name of the language model to use
-            (default: "gpt-3.5-turbo").
-            model_api_key (Optional[str]): The api key of the provider being used.
-                Eg: api key of openai.
-            temperature (float): The temperature parameter for text generation
-            (default: 0.1).
-            max_tokens (Optional[int]): The maximum number of tokens in the generated
-            text.
-            max_retries (int): The maximum number of retries for failed requests
-            (default: 5).
-            trace_id (Optional[str]): A unique identifier for tracing requests.
-            cache_status (Optional[RubeusCacheType]): The type of cache to use
-            (default: "").
-                If cache_status is set, then cache is automatically set to True
-            cache (Optional[bool]): Whether to use caching (default: False).
-            metadata (Optional[Dict[str, Any]]): Metadata associated with the
-            request (default: {}).
-            weight (Optional[float]): The weight of the LLM in the ensemble
-            (default: 1.0).
-            **kwargs (Any): Additional keyword arguments.
-
-        Raises:
-            ValueError: If neither 'llm' nor 'llms' are provided during
-            Portkey initialization.
+            base_url (Optional[str]): The Base url to the self hosted rubeus \
+                (the opensource version of portkey) or any other self hosted server.
         """
         try:
-            from rubeus import Rubeus
+            import portkey
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
 
-        self._client = Rubeus(
+        super().__init__(
             base_url=base_url,
             api_key=api_key,
-            default_headers={
-                "trace-id": trace_id,
-                "cache": cache_status,
-                "metadata": metadata,
-                "cache-force-refresh": cache_force_refresh,
-                "cache-age": f"max-age={cache_age}",
-                "retry-count": retry,
-            },
-        )
-        super().__init__(
-            trace_id=trace_id,
-            cache_status=cache_status,
-            metadata=metadata,
-            cache_force_refresh=cache_force_refresh,
-            cache_age=cache_age,
         )
+        if api_key is not None:
+            portkey.api_key = api_key
+
+        if base_url is not None:
+            portkey.base_url = base_url
+
+        portkey.mode = mode
+
+        self._client = portkey
         self.model = None
         self.mode = mode
 
@@ -141,14 +99,17 @@ def metadata(self) -> LLMMetadata:
         """LLM metadata."""
         return generate_llm_metadata(self.llms[0])
 
-    def add_llms(self, llm_params: Union["LLMBase", List["LLMBase"]]) -> "Portkey":
+    def add_llms(
+        self, llm_params: Union["LLMOptions", List["LLMOptions"]]
+    ) -> "Portkey":
         """
         Adds the specified LLM parameters to the list of LLMs. This may be used for
         fallbacks or load-balancing as specified in the mode.
 
         Args:
-            llm_params (Union[LLMBase, List[LLMBase]]): A single LLM parameter set or
-            a list of LLM parameter sets. Each set should be an instance of LLMBase with
+            llm_params (Union[LLMOptions, List[LLMOptions]]): A single LLM parameter \
+            set or a list of LLM parameter sets. Each set should be an instance of \
+            LLMOptions with
             the specified attributes.
                 > provider: Optional[ProviderTypes]
                 > model: str
@@ -156,20 +117,22 @@ def add_llms(self, llm_params: Union["LLMBase", List["LLMBase"]]) -> "Portkey":
                 > max_tokens: Optional[int]
                 > max_retries: int
                 > trace_id: Optional[str]
-                > cache_status: Optional[RubeusCacheType]
+                > cache_status: Optional[CacheType]
                 > cache: Optional[bool]
                 > metadata: Dict[str, Any]
                 > weight: Optional[float]
+                > **kwargs : Other additional parameters that are supported by \
+                    LLMOptions in portkey-ai
 
             NOTE: User may choose to pass additional params as well.
         Returns:
             self
         """
         try:
-            from rubeus import LLMBase
+            from portkey import LLMOptions
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        if isinstance(llm_params, LLMBase):
+        if isinstance(llm_params, LLMOptions):
             llm_params = [llm_params]
         self.llms.extend(llm_params)
         if self.model is None:
@@ -214,81 +177,56 @@ def stream_chat(
 
     def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         try:
-            from rubeus import RubeusModes, Message
+            from portkey import Message, Config
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        messages_dict = [{"role": i.role.value, "content": i.content} for i in messages]
-        self._client.default_params["messages"] = messages_dict  # type: ignore
-        if self.mode == RubeusModes.FALLBACK:
-            response = self._client.chat_completion.with_fallbacks(llms=self.llms)
-            self.llm = self._get_llm(response)
-
-        elif self.mode == RubeusModes.LOADBALANCE:
-            response = self._client.chat_completion.with_loadbalancing(self.llms)
-            self.llm = self._get_llm(response)
-        else:
-            # Single mode
-            messages_input = [
-                Message(role=i.role.value, content=i.content or "") for i in messages
-            ]
-            response = self._client.chat_completion.create(
-                messages=messages_input, **kwargs
-            )
-
-        message = response.choices[0]["message"]
-        raw = response.raw_body
-        return ChatResponse(message=message, raw=raw)
+        _messages = cast(
+            List[Message],
+            [{"role": i.role.value, "content": i.content} for i in messages],
+        )
+        config = Config(llms=self.llms)
+        response = self._client.ChatCompletions.create(
+            messages=_messages, config=config
+        )
+        self.llm = self._get_llm(response)
+
+        message = response.choices[0].message
+        return ChatResponse(message=message, raw=response)
 
     def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         try:
-            from rubeus import RubeusModes
+            from portkey import Config
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        self._client.default_params["prompt"] = prompt  # type: ignore
-        if self.mode == RubeusModes.FALLBACK:
-            response = self._client.completion.with_fallbacks(llms=self.llms)
-            self.llm = self._get_llm(response)
-        elif self.mode == RubeusModes.LOADBALANCE:
-            response = self._client.completion.with_loadbalancing(self.llms)
-            self.llm = self._get_llm(response)
-        else:
-            # Single mode
-            response = self._client.completion.single(llms=self.llms)
 
-        text = response.choices[0]["text"]
-        raw = response.raw_body
-        return CompletionResponse(text=text, raw=raw)
+        config = Config(llms=self.llms)
+        response = self._client.Completions.create(prompt=prompt, config=config)
+        text = response.choices[0].text
+        return CompletionResponse(text=text, raw=response)
 
     def _stream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
         try:
-            from rubeus import RubeusModes
+            from portkey import Message, Config
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        messages_dict = [{"role": i.role.value, "content": i.content} for i in messages]
-        self._client.default_params["messages"] = messages_dict  # type: ignore
-        self._client.default_params["stream"] = True
-        if self.mode == RubeusModes.FALLBACK:
-            response = self._client.chat_completion.with_fallbacks(
-                llms=self.llms, stream=True
-            )
-
-        elif self.mode == RubeusModes.LOADBALANCE:
-            response = self._client.chat_completion.with_loadbalancing(
-                self.llms, stream=True
-            )
-        else:
-            # Single mode
-            response = self._client.chat_completion.single(llms=self.llms, stream=True)
+        _messages = cast(
+            List[Message],
+            [{"role": i.role.value, "content": i.content} for i in messages],
+        )
+        config = Config(llms=self.llms)
+        response = self._client.ChatCompletions.create(
+            messages=_messages, config=config, stream=True, **kwargs
+        )
 
         def gen() -> ChatResponseGen:
             content = ""
             function_call: Optional[dict] = {}
             for resp in response:
-                if resp.choices == [{}]:
+                if resp.choices is None:
                     continue
-                delta = resp.choices[0]["delta"]
+                delta = resp.choices[0].delta
                 role = delta.get("role", "assistant")
                 content_delta = delta.get("content", "") or ""
                 content += content_delta
@@ -297,7 +235,6 @@ def gen() -> ChatResponseGen:
                 if function_call_delta is not None:
                     if function_call is None:
                         function_call = function_call_delta
-
                         # ensure we do not add a blank function call
                         if (
                             function_call
@@ -324,30 +261,20 @@ def gen() -> ChatResponseGen:
         return gen()
 
     def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
-        self._client.default_params["prompt"] = prompt  # type: ignore
-        self._client.default_params["stream"] = True
         try:
-            from rubeus import RubeusModes
+            from portkey import Config
         except ImportError as exc:
             raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        if self.mode == RubeusModes.FALLBACK:
-            response = self._client.completion.with_fallbacks(
-                llms=self.llms, stream=True
-            )
-        elif self.mode == RubeusModes.LOADBALANCE:
-            response = self._client.completion.with_loadbalancing(
-                self.llms, stream=True
-            )
-        else:
-            # Single mode
-            response = self._client.completion.create(
-                prompt=prompt, stream=True, **kwargs
-            )
+
+        config = Config(llms=self.llms)
+        response = self._client.Completions.create(
+            prompt=prompt, config=config, stream=True, **kwargs
+        )
 
         def gen() -> CompletionResponseGen:
             text = ""
             for resp in response:
-                delta = resp.choices[0]["text"]
+                delta = resp.choices[0].text or ""
                 text += delta
                 yield CompletionResponse(
                     delta=delta,
@@ -367,18 +294,5 @@ def _is_chat_model(self) -> bool:
         """
         return is_chat_model(self.model or "")
 
-    @property
-    def _is_fallback_mode(self) -> bool:
-        """Check if the suggested mode is fallback or not.
-
-        Returns:
-            bool: True if the provided mode is fallback type, False otherwise.
-        """
-        try:
-            from rubeus import RubeusModes
-        except ImportError as exc:
-            raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-        return self.mode == RubeusModes.FALLBACK
-
-    def _get_llm(self, response: "RubeusResponse") -> "LLMBase":
+    def _get_llm(self, response: "PortkeyResponse") -> "LLMOptions":
         return get_llm(response, self.llms)
diff --git a/llama_index/llms/portkey_utils.py b/llama_index/llms/portkey_utils.py
index 880a6808dc334..dd7aa15bde13b 100644
--- a/llama_index/llms/portkey_utils.py
+++ b/llama_index/llms/portkey_utils.py
@@ -5,7 +5,6 @@
 the functionality and usability of the Portkey class
 """
 from typing import List, TYPE_CHECKING
-from enum import Enum
 from llama_index.llms.base import LLMMetadata
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.anthropic import Anthropic
@@ -19,14 +18,14 @@
 from llama_index.llms.anthropic_utils import CLAUDE_MODELS
 
 if TYPE_CHECKING:
-    from rubeus import (
-        LLMBase,
-        RubeusResponse,
+    from portkey import (
+        LLMOptions,
+        PortkeyResponse,
     )
 
 
 IMPORT_ERROR_MESSAGE = (
-    "Rubeus is not installed.Please install it with `pip install rubeus`."
+    "Portkey is not installed.Please install it with `pip install portkey-ai`."
 )
 
 
@@ -115,7 +114,7 @@ def modelname_to_contextsize(modelname: str) -> int:
     return context_size
 
 
-def generate_llm_metadata(llm: "LLMBase") -> LLMMetadata:
+def generate_llm_metadata(llm: "LLMOptions") -> LLMMetadata:
     """
     Generate metadata for a Language Model (LLM) instance.
 
@@ -138,26 +137,26 @@ def generate_llm_metadata(llm: "LLMBase") -> LLMMetadata:
         llama_index.llms.base.LLM.
     """
     try:
-        from rubeus import LLMBase
+        from portkey import LLMOptions
     except ImportError as exc:
         raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-    if not isinstance(llm, LLMBase):
-        raise ValueError("llm must be an instance of rubeus.LLMBase")
+    if not isinstance(llm, LLMOptions):
+        raise ValueError("llm must be an instance of portkey.LLMOptions")
 
     return LLMMetadata(
-        _context_window=modelname_to_contextsize(llm.model),
-        is_chat_model=is_chat_model(llm.model),
+        _context_window=modelname_to_contextsize(llm.model or ""),
+        is_chat_model=is_chat_model(llm.model or ""),
         model_name=llm.model,
     )
 
 
-def get_llm(response: "RubeusResponse", llms: List["LLMBase"]) -> "LLMBase":
+def get_llm(response: "PortkeyResponse", llms: List["LLMOptions"]) -> "LLMOptions":
     # TODO: Update this logic over here.
     try:
-        from rubeus import LLMBase
+        from portkey import LLMOptions
     except ImportError as exc:
         raise ImportError(IMPORT_ERROR_MESSAGE) from exc
-    fallback_llm = LLMBase.construct()
+    fallback_llm = LLMOptions.construct()
     for llm in llms:
         model = llm.model
 
@@ -168,8 +167,3 @@ def get_llm(response: "RubeusResponse", llms: List["LLMBase"]) -> "LLMBase":
     if fallback_llm is None:
         raise ValueError("Failed to get the fallback LLM")
     return fallback_llm
-
-
-class RubeusApiPaths(str, Enum):
-    CHAT_COMPLETION = "/v1/chatComplete"
-    COMPLETION = "/v1/complete"