From 22192ffcfef9fb54283e9fe58136e48001b57a51 Mon Sep 17 00:00:00 2001
From: Noble Varghese <noblekvarghese96@gmail.com>
Date: Fri, 8 Dec 2023 21:30:47 +0530
Subject: [PATCH] Feat: New updates on the SDK (#58)

* fea: New updated on the sdks

* feat: new updates on the SDK - releasing version v1.0.0

* feat: updating the name of the package to portkey_ai

* feat: adding the CI pipeline for publishing to NPM on release

* fix: reverting the changes on previos commit

* fix: adding the conventional-commit-check

* fix: adding metadata in feedback routes

* fix: updated feedbacks response, added streaming in post and added over ride function in utils

* fix: feedbacks and test cases

* fix: fixing the url on the prompt completions API

* fix: adding completions method to prompt api

* fix: lint issues

* fix: renaming the post method

* feat: adding the get_headers function to retrieve the headers

* fix: linting error fixes

* fix: fixing the last chunk in streaming mode

* feat: changes

* fix: fixing llama_index imports

* fix: fixing on the LLMmetadata on llamaindex. Assingned a default value to context window

* fix: removing headers when parsing the dict

* fix: adding types into llama_index and langchain

* fix: formating

* fix: removed the unused methods on the integrations and removed headers from the generic response

* fix: lint fixes

* doc: Udpating the readme file with the latest changes

* doc: URL updated
---
 .../workflows/verify-conventional-commits.yml |  11 +
 Makefile                                      |   3 +-
 README.md                                     | 144 +-----
 examples/azure_fallback_loadbalance.ipynb     | 254 ----------
 examples/completions.ipynb                    | 187 --------
 examples/demo.py                              |  29 ++
 examples/fallback_loadbalance.ipynb           | 249 ----------
 examples/loadbalance_two_api_keys.ipynb       | 313 ------------
 portkey/api_resources/apis.py                 | 311 ------------
 portkey/version.py                            |   1 -
 {portkey => portkey_ai}/__init__.py           |  25 +-
 {portkey => portkey_ai}/_portkey_scripts.py   |   2 +-
 .../api_resources/__init__.py                 |  20 +-
 portkey_ai/api_resources/apis/__init__.py     |  18 +
 portkey_ai/api_resources/apis/api_resource.py |  19 +
 .../api_resources/apis/chat_complete.py       | 109 +++++
 portkey_ai/api_resources/apis/complete.py     |  87 ++++
 .../api_resources/apis/create_headers.py      |  29 ++
 portkey_ai/api_resources/apis/embeddings.py   |  21 +
 portkey_ai/api_resources/apis/feedback.py     |  41 ++
 portkey_ai/api_resources/apis/generation.py   | 140 ++++++
 portkey_ai/api_resources/apis/post.py         |  59 +++
 .../api_resources/base_client.py              | 156 +++---
 portkey_ai/api_resources/client.py            |  72 +++
 .../api_resources/common_types.py             |   6 +-
 .../api_resources/exceptions.py               |   0
 .../api_resources/global_constants.py         |   4 +-
 .../api_resources/streaming.py                |  12 +-
 .../api_resources/utils.py                    |  94 ++--
 .../client.py => portkey_ai/llms/__init__.py  |   0
 portkey_ai/llms/langchain/__init__.py         |   4 +
 portkey_ai/llms/langchain/chat.py             | 214 +++++++++
 portkey_ai/llms/langchain/completion.py       | 139 ++++++
 portkey_ai/llms/llama_index/__init__.py       |   3 +
 portkey_ai/llms/llama_index/completions.py    | 215 +++++++++
 portkey_ai/llms/llama_index/utils.py          | 128 +++++
 portkey_ai/llms/mypy.ini                      |   2 +
 {portkey => portkey_ai}/py.typed              |   0
 portkey_ai/version.py                         |   1 +
 setup.cfg                                     |   9 +-
 tests/__init__.py                             |   0
 ...test_anyscale_CodeLlama-34b-Instruct-hf.py | 204 --------
 .../test_anyscale_Llama-2-13b-chat-hf.py      | 204 --------
 .../test_anyscale_Llama-2-70b-chat-hf.py      | 204 --------
 .../test_anyscale_Llama-2-7b-chat-hf.py       | 204 --------
 .../anthropic_n_openai.json                   |  28 ++
 .../anyscale_n_openai.json                    |  28 ++
 .../azure_n_openai.json                       |  28 ++
 .../cohere_n_openai.json                      |  27 ++
 .../loadbalance_with_two_apikeys.json         |  15 +
 .../single_provider/single_provider.json      |   4 +
 .../single_provider_with_vk_retry_cache.json  |  13 +
 .../single_with_basic_config.json             |   3 +
 .../anthropic_n_openai.json                   |  28 ++
 .../anyscale_n_openai.json                    |  28 ++
 .../azure_n_openai.json                       |  28 ++
 .../cohere_n_openai.json                      |  27 ++
 .../loadbalance_with_two_apikeys.json         |  15 +
 .../single_provider/single_provider.json      |   4 +
 .../single_provider_with_vk_retry_cache.json  |  13 +
 .../single_with_basic_config.json             |   3 +
 tests/models.json                             |  97 ++++
 tests/test_anthropic.py                       | 218 ---------
 tests/test_azure_openai.py                    | 135 ------
 tests/test_chat_complete.py                   | 454 ++++++++++++++++++
 tests/test_cohere.py                          | 127 -----
 tests/test_complete.py                        | 416 ++++++++++++++++
 tests/test_openai.py                          | 218 ---------
 tests/utils.py                                |   6 +
 69 files changed, 2816 insertions(+), 3094 deletions(-)
 create mode 100644 .github/workflows/verify-conventional-commits.yml
 delete mode 100644 examples/azure_fallback_loadbalance.ipynb
 delete mode 100644 examples/completions.ipynb
 create mode 100644 examples/demo.py
 delete mode 100644 examples/fallback_loadbalance.ipynb
 delete mode 100644 examples/loadbalance_two_api_keys.ipynb
 delete mode 100644 portkey/api_resources/apis.py
 delete mode 100644 portkey/version.py
 rename {portkey => portkey_ai}/__init__.py (70%)
 rename {portkey => portkey_ai}/_portkey_scripts.py (89%)
 rename {portkey => portkey_ai}/api_resources/__init__.py (72%)
 create mode 100644 portkey_ai/api_resources/apis/__init__.py
 create mode 100644 portkey_ai/api_resources/apis/api_resource.py
 create mode 100644 portkey_ai/api_resources/apis/chat_complete.py
 create mode 100644 portkey_ai/api_resources/apis/complete.py
 create mode 100644 portkey_ai/api_resources/apis/create_headers.py
 create mode 100644 portkey_ai/api_resources/apis/embeddings.py
 create mode 100644 portkey_ai/api_resources/apis/feedback.py
 create mode 100644 portkey_ai/api_resources/apis/generation.py
 create mode 100644 portkey_ai/api_resources/apis/post.py
 rename {portkey => portkey_ai}/api_resources/base_client.py (76%)
 create mode 100644 portkey_ai/api_resources/client.py
 rename {portkey => portkey_ai}/api_resources/common_types.py (58%)
 rename {portkey => portkey_ai}/api_resources/exceptions.py (100%)
 rename {portkey => portkey_ai}/api_resources/global_constants.py (92%)
 rename {portkey => portkey_ai}/api_resources/streaming.py (95%)
 rename {portkey => portkey_ai}/api_resources/utils.py (84%)
 rename portkey/api_resources/client.py => portkey_ai/llms/__init__.py (100%)
 create mode 100644 portkey_ai/llms/langchain/__init__.py
 create mode 100644 portkey_ai/llms/langchain/chat.py
 create mode 100644 portkey_ai/llms/langchain/completion.py
 create mode 100644 portkey_ai/llms/llama_index/__init__.py
 create mode 100644 portkey_ai/llms/llama_index/completions.py
 create mode 100644 portkey_ai/llms/llama_index/utils.py
 create mode 100644 portkey_ai/llms/mypy.ini
 rename {portkey => portkey_ai}/py.typed (100%)
 create mode 100644 portkey_ai/version.py
 create mode 100644 tests/__init__.py
 delete mode 100644 tests/anyscale_tests/test_anyscale_CodeLlama-34b-Instruct-hf.py
 delete mode 100644 tests/anyscale_tests/test_anyscale_Llama-2-13b-chat-hf.py
 delete mode 100644 tests/anyscale_tests/test_anyscale_Llama-2-70b-chat-hf.py
 delete mode 100644 tests/anyscale_tests/test_anyscale_Llama-2-7b-chat-hf.py
 create mode 100644 tests/configs/chat_completions/loadbalance_and_fallback/anthropic_n_openai.json
 create mode 100644 tests/configs/chat_completions/loadbalance_and_fallback/anyscale_n_openai.json
 create mode 100644 tests/configs/chat_completions/loadbalance_and_fallback/azure_n_openai.json
 create mode 100644 tests/configs/chat_completions/loadbalance_and_fallback/cohere_n_openai.json
 create mode 100644 tests/configs/chat_completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
 create mode 100644 tests/configs/chat_completions/single_provider/single_provider.json
 create mode 100644 tests/configs/chat_completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
 create mode 100644 tests/configs/chat_completions/single_with_basic_config/single_with_basic_config.json
 create mode 100644 tests/configs/completions/loadbalance_and_fallback/anthropic_n_openai.json
 create mode 100644 tests/configs/completions/loadbalance_and_fallback/anyscale_n_openai.json
 create mode 100644 tests/configs/completions/loadbalance_and_fallback/azure_n_openai.json
 create mode 100644 tests/configs/completions/loadbalance_and_fallback/cohere_n_openai.json
 create mode 100644 tests/configs/completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
 create mode 100644 tests/configs/completions/single_provider/single_provider.json
 create mode 100644 tests/configs/completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
 create mode 100644 tests/configs/completions/single_with_basic_config/single_with_basic_config.json
 create mode 100644 tests/models.json
 delete mode 100644 tests/test_anthropic.py
 delete mode 100644 tests/test_azure_openai.py
 create mode 100644 tests/test_chat_complete.py
 delete mode 100644 tests/test_cohere.py
 create mode 100644 tests/test_complete.py
 delete mode 100644 tests/test_openai.py

diff --git a/.github/workflows/verify-conventional-commits.yml b/.github/workflows/verify-conventional-commits.yml
new file mode 100644
index 00000000..f541ae06
--- /dev/null
+++ b/.github/workflows/verify-conventional-commits.yml
@@ -0,0 +1,11 @@
+name: verify-conventional-commits
+
+on: [pull_request]
+
+jobs:
+  conventional-commits-checker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: verify conventional commits
+        uses: taskmedia/action-conventional-commits@v1.1.8
+            
\ No newline at end of file
diff --git a/Makefile b/Makefile
index c452d8db..2f482ada 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,7 @@ help: ## Show all Makefile targets
 .PHONY: format lint
 format: ## Run code formatter: black
 	black .
+	ruff check . --fix         
 lint: ## Run linters: mypy, black, ruff
 	mypy .
 	black . --check
@@ -24,7 +25,7 @@ build:
 
 upload:
 	python -m pip install twine
-	python -m twine upload dist/portkey-ai-*
+	python -m twine upload dist/portkey_ai-*
 	rm -rf dist
 
 dev: 
diff --git a/README.md b/README.md
index 4ee25f68..2d0135a7 100644
--- a/README.md
+++ b/README.md
@@ -35,148 +35,24 @@ $ export PORTKEY_API_KEY=PORTKEY_API_KEY
 #### Now, let's make a request with GPT-4
 
 ```py
-import portkey
-from portkey import Config, LLMOptions
+from portkey_ai import Portkey
 
-portkey.config = Config(
-    mode="single",
-    llms=LLMOptions(provider="openai", api_key="OPENAI_API_KEY")
+# Construct a client with a virtual key
+portkey = Portkey(
+    api_key="PORTKEY_API_KEY",
+    virtual_key="VIRTUAL_KEY"
 )
 
-r = portkey.ChatCompletions.create(
-    model="gpt-4", 
-    messages=[
-        {"role": "user","content": "Hello World!"}
-    ]
+completion = portkey.chat.completions.create(
+    messages = [{ "role": 'user', "content": 'Say this is a test' }],
+    model = 'gpt-3.5-turbo'
 )
+print(completion)
 ```
 
 Portkey fully adheres to the OpenAI SDK signature. This means that you can instantly switch to Portkey and start using Portkey's advanced production features right out of the box.
 
 
-## **🪜 Detailed Integration Guide**
-
-**4 Steps to Integrate the SDK**
-1. Get your virtual key for AI providers.
-2. Construct your LLM, add Portkey features, provider features, and prompt.
-3. Construct the Portkey client and set your usage mode.
-4. Now call Portkey regularly like you would call your OpenAI constructor.
-
-Let's dive in! If you are an advanced user and want to directly jump to various full-fledged examples, [click here](https://github.com/Portkey-AI/portkey-python-sdk/tree/main/examples).
-
----
-
-### **Step 1️⃣ : Get your Virtual Keys for AI providers**
-
-Navigate to the "Virtual Keys" page on [Portkey](https://app.portkey.ai/) and hit the "Add Key" button. Choose your AI provider and assign a unique name to your key. Your virtual key is ready!
-
-### **Step 2️⃣ : Construct your LLM, add Portkey features, provider features, and prompt**
-
-**Portkey Features**:
-You can find a comprehensive [list of Portkey features here](#📔-list-of-portkey-features). This includes settings for caching, retries, metadata, and more.
-
-**Provider Features**:
-Portkey is designed to be flexible. All the features you're familiar with from your LLM provider, like `top_p`, `top_k`, and `temperature`, can be used seamlessly. Check out the [complete list of provider features here](https://github.com/Portkey-AI/portkey-python-sdk/blob/af0814ebf4f1961b5dfed438918fe68b26ef5f1e/portkey/api_resources/utils.py#L137).
-
-**Setting the Prompt Input**:
-This param lets you override any prompt that is passed during the completion call - set a model-specific prompt here to optimise the model performance. You can set the input in two ways. For models like Claude and GPT3, use `prompt` = `(str)`, and for models like GPT3.5 & GPT4, use `messages` = `[array]`.
-
-
-Here's how you can combine everything:
-
-```python
-from portkey import LLMOptions
-
-# Portkey Config
-provider = "openai"
-virtual_key = "key_a"
-trace_id = "portkey_sdk_test"
-
-# Model Settings
-model = "gpt-4"
-temperature = 1
-
-# User Prompt
-messages = [{"role": "user", "content": "Who are you?"}]
-
-# Construct LLM
-llm = LLMOptions(provider=provider, virtual_key=virtual_key, trace_id=trace_id, model=model, temperature=temperature)
-```
-
-### **Step 3️⃣ : Construct the Portkey Client**
-
-Portkey client's config takes 3 params: `api_key`, `mode`, `llms`.
-
-* `api_key`: You can set your Portkey API key here or with `os.ennviron` as done above.
-* `mode`: There are **3** modes - Single, Fallback, Loadbalance.
-  * **Single** - This is the standard mode. Use it if you do not want Fallback OR Loadbalance features.
-  * **Fallback** - Set this mode if you want to enable the Fallback feature.
-  * **Loadbalance** - Set this mode if you want to enable the Loadbalance feature. 
-* `llms`: This is an array where we pass our LLMs constructed using the LLMOptions constructor.
-
-```py
-import portkey
-from portkey import Config
-
-portkey.config = Config(mode="single",llms=[llm])
-```
-
-### **Step 4️⃣ : Let's Call the Portkey Client!**
-
-The Portkey client can do `ChatCompletions` and `Completions`.
-
-Since our LLM is GPT4, we will use ChatCompletions:
-
-```py
-response = portkey.ChatCompletions.create(
-    messages=[{
-      "role": "user",
-      "content": "Who are you ?"
-    }]
-)
-print(response.choices[0].message)
-```
-
-You have integrated Portkey's Python SDK in just 4 steps!
-
----
-
-## **🔁 Demo: Implementing GPT4 to GPT3.5 Fallback Using the Portkey SDK**
-
-```py
-import os
-os.environ["PORTKEY_API_KEY"] = "PORTKEY_API_KEY" # Setting the Portkey API Key
-
-import portkey
-from portkey import Config, LLMOptions
-
-# Let's construct our LLMs.
-llm1 = LLMOptions(provider="openai", model="gpt-4", virtual_key="key_a"),
-llm2 = LLMOptions(provider="openai", model="gpt-3.5-turbo", virtual_key="key_a")
-
-# Now let's construct the Portkey client where we will set the fallback logic
-portkey.config = Config(mode="fallback",llms=[llm1,llm2])
-
-# And, that's it!
-response = portkey.ChatCompletions.create()
-print(response.choices[0].message)
-```
-
-## **📔 Full List of Portkey Config**
-
-| Feature             | Config Key              | Value(Type)                                      | Required    |
-|---------------------|-------------------------|--------------------------------------------------|-------------|
-| Provider Name       | `provider`        | `string`                                         | ✅ Required  |
-| Model Name        | `model`        | `string`                                         | ✅ Required |
-| Virtual Key OR API Key        | `virtual_key` or `api_key`        | `string`                                         | ✅ Required (can be set externally) |
-| Cache Type          | `cache_status`          | `simple`, `semantic`                             | ❔ Optional |
-| Force Cache Refresh | `cache_force_refresh`   | `True`, `False` (Boolean)                                 | ❔ Optional |
-| Cache Age           | `cache_age`             | `integer` (in seconds)                           | ❔ Optional |
-| Trace ID            | `trace_id`              | `string`                                         | ❔ Optional |
-| Retries         | `retry`           | `{dict}` with two required keys: `"attempts"` which expects integers in [0,5] and `"on_status_codes"` which expects array of status codes like [429,502] <br> `Example`: { "attempts": 5, "on_status_codes":[429,500] }                      | ❔ Optional |
-| Metadata            | `metadata`              | `json object` [More info](https://docs.portkey.ai/key-features/custom-metadata)          | ❔ Optional |
-
-
 ## **🤝 Supported Providers**
 
 || Provider  | Support Status  | Supported Endpoints |
@@ -190,7 +66,7 @@ print(response.choices[0].message)
 
 ---
 
-#### [📝 Full Documentation](https://docs.portkey.ai/) | [🛠️ Integration Requests](https://github.com/Portkey-AI/portkey-python-sdk/issues) | 
+#### [📝 Full Documentation](https://docs.portkey.ai/docs) | [🛠️ Integration Requests](https://github.com/Portkey-AI/portkey-python-sdk/issues) | 
 
 <a href="https://twitter.com/intent/follow?screen_name=portkeyai"><img src="https://img.shields.io/twitter/follow/portkeyai?style=social&logo=twitter" alt="follow on Twitter"></a>
 <a href="https://discord.gg/sDk9JaNfK8" target="_blank"><img src="https://img.shields.io/discord/1143393887742861333?logo=discord" alt="Discord"></a>
diff --git a/examples/azure_fallback_loadbalance.ipynb b/examples/azure_fallback_loadbalance.ipynb
deleted file mode 100644
index c469946d..00000000
--- a/examples/azure_fallback_loadbalance.ipynb
+++ /dev/null
@@ -1,254 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Portkey | Building Resilient LLM Apps\n",
-    "\n",
-    "**Portkey** is a full-stack LLMOps platform that productionizes your Gen AI app reliably and securely."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Key Features of Portkey\n",
-    "\n",
-    "1. **AI Gateway**:\n",
-    "    - **Automated Fallbacks & Retries**: Ensure your application remains functional even if a primary service fails.\n",
-    "    - **Load Balancing**: Efficiently distribute incoming requests among multiple models.\n",
-    "    - **Semantic Caching**: Reduce costs and latency by intelligently caching results.\n",
-    "    \n",
-    "2. **Observability**:\n",
-    "    - **Logging**: Keep track of all requests for monitoring and debugging.\n",
-    "    - **Requests Tracing**: Understand the journey of each request for optimization.\n",
-    "    - **Custom Tags**: Segment and categorize requests for better insights.\n",
-    "\n",
-    "To harness these features, let's start with the setup:\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "5Z933R9wuZ4z"
-   },
-   "outputs": [],
-   "source": [
-    "# Installing the Portkey AI python SDK developed by the Portkey team\n",
-    "!pip install portkey-ai -U\n",
-    "!portkey --version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Importing necessary libraries and modules\n",
-    "import portkey as pk\n",
-    "from portkey import Config, LLMOptions\n",
-    "from getpass import getpass"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### **Step 1: Get your Portkey API key**\n",
-    "\n",
-    "Log into [Portkey here](https://app.portkey.ai/), then click on the profile icon on top right and \"Copy API Key\". Let's also set OpenAI & Anthropic API keys."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Enter the password on the prompt window.\n",
-    "API_KEY = getpass(\"Enter your PORTKEY_API_KEY \")\n",
-    "\n",
-    "# Setting the API key\n",
-    "pk.api_key = API_KEY\n",
-    "\n",
-    "# NOTE: For adding custom url, uncomment this line and add your custom url in a selfhosted version.\n",
-    "# pk.base_url = \"\"\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### **Step 2: Configure Portkey Features**\n",
-    "\n",
-    "To harness the full potential of Portkey, you can configure various features as illustrated above. Here's a guide to all Portkey features and the expected values:\n",
-    "\n",
-    "| Feature             | Config Key              | Value(Type)                                      | Required    |\n",
-    "|---------------------|-------------------------|--------------------------------------------------|-------------|\n",
-    "| API Key             | `api_key`               | `string`                                         | ✅ Required (can be set externally) |\n",
-    "| Mode                | `mode`                  | `fallback`, `ab_test`, `single`              | ✅ Required |\n",
-    "| Cache Type          | `cache_status`          | `simple`, `semantic`                             | ❔ Optional |\n",
-    "| Force Cache Refresh | `cache_force_refresh`   | `boolean`                              | ❔ Optional |\n",
-    "| Cache Age           | `cache_age`             | `integer` (in seconds)                           | ❔ Optional |\n",
-    "| Trace ID            | `trace_id`              | `string`                                         | ❔ Optional |\n",
-    "| Retries         | `retry`           | `integer` [0,5]                                  | ❔ Optional |\n",
-    "| Metadata            | `metadata`              | `json object` [More info](https://docs.portkey.ai/key-features/custom-metadata)          | ❔ Optional |\n",
-    "| Base URL | `base_url` | `url` | ❔ Optional |\n",
-    "\n",
-    "\n",
-    "To set up Portkey for different modes and features, refer to the provided IPython Notebook examples in the examples/ directory.\n",
-    "\n",
-    "For more information and detailed documentation, please visit [Portkey Documentation](https://docs.portkey.ai/)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example-1: Configuring Portkey for Fallback Mode with Azure models\n",
-    "In this example, we'll demonstrate how to configure Portkey for the Fallback Mode using the sdk. Fallback Mode allows you to define a backup strategy when your primary service is unavailable.\n",
-    "\n",
-    "`Note`: The order of definition of LLMOptions is important for fallbacks. Ensure that you define your fallback strategy in the order of preference. This ensures that your fallback logic is in place and ready to be used when needed."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pk.config = Config(\n",
-    "    mode=\"fallback\",\n",
-    "    llms=[\n",
-    "        LLMOptions(api_key=\"<AZURE_API_KEY>\", provider=\"azure-openai\", resource_name=\"<RESOURCE_NAME>\", api_version=\"<API_VERSION>\", deployment_id=\"<DEPLOYMENT_ID>\"),\n",
-    "        LLMOptions(api_key=\"<AZURE_API_KEY>\", provider=\"azure-openai\", resource_name=\"<RESOURCE_NAME>\", api_version=\"<API_VERSION>\", deployment_id=\"<DEPLOYMENT_ID>\")\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 1: Basic example\n",
-    "\n",
-    "response = pk.ChatCompletions.create(\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Who are you ?\"}]\n",
-    ")\n",
-    "\n",
-    "print(response.choices[0].message)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 3: Streaming results\n",
-    "\n",
-    "response3 = pk.ChatCompletions.create(\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Translate the following English text to French: 'Hello, how are you?'\"}],\n",
-    "    stream=True  # Stream back partial progress\n",
-    ")\n",
-    "\n",
-    "for event in response3:\n",
-    "    if len(event.choices) == 0:\n",
-    "        continue\n",
-    "    if event.choices[0].delta is None:\n",
-    "        break\n",
-    "    print(event.choices[0].delta.get(\"content\", \"\"), end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example-2: Configuring Portkey for Load Balancing (A/B test) Mode with Azure models\n",
-    "\n",
-    "To utilize Portkey's Load Balancing Mode, follow the steps below. Load Balancing Mode enables you to distribute incoming requests across multiple services to ensure high availability and scalability.\n",
-    "\n",
-    "`NOTE`: Loadbalance is also called A/B test."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pk.config = Config(\n",
-    "    mode=\"ab_test\",\n",
-    "    llms=[\n",
-    "        LLMOptions(api_key=\"<AZURE_API_KEY>\", provider=\"azure-openai\", weight=0.4, resource_name=\"<RESOURCE_NAME>\", api_version=\"<API_VERSION>\", deployment_id=\"<DEPLOYMENT_ID>\"),\n",
-    "        LLMOptions(api_key=\"<AZURE_API_KEY>\", provider=\"azure-openai\", weight=0.6, resource_name=\"<RESOURCE_NAME>\", api_version=\"<API_VERSION>\", deployment_id=\"<DEPLOYMENT_ID>\")\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 1: Basic example\n",
-    "\n",
-    "response = pk.ChatCompletions.create(\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Summarize the key points from the article titled 'The Impact of Climate Change on Global Biodiversity.'\"}]\n",
-    ")\n",
-    "\n",
-    "print(response.choices[0].message)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 3: Streaming results\n",
-    "\n",
-    "response3 = pk.ChatCompletions.create(\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Generate a creative short story about a detective solving a mysterious case.\"}],\n",
-    "    stream=True  # Stream back partial progre|ss\n",
-    ")\n",
-    "\n",
-    "for event in response3:\n",
-    "    if len(event.choices) == 0:\n",
-    "        continue\n",
-    "    if event.choices[0].delta is None:\n",
-    "        break\n",
-    "    print(event.choices[0].delta.get(\"content\", \"\"), end=\"\", flush=True)"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/examples/completions.ipynb b/examples/completions.ipynb
deleted file mode 100644
index b1038870..00000000
--- a/examples/completions.ipynb
+++ /dev/null
@@ -1,187 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "5Z933R9wuZ4z"
-   },
-   "outputs": [],
-   "source": [
-    "# Installing the Portkey AI gateway developed by the Portkey team\n",
-    "!pip install portkey-ai -U\n",
-    "!portkey --version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import portkey as pk\n",
-    "from portkey import Config, LLMOptions\n",
-    "from getpass import getpass\n",
-    "\n",
-    "# Enter the password on the prompt window.\n",
-    "API_KEY = getpass(\"Enter you PORTKEY_API_KEY \")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Setting the API key\n",
-    "pk.api_key = API_KEY\n",
-    "\n",
-    "# NOTE: For adding custom url, uncomment this line and add your custom url.\n",
-    "# pk.base_url = \"\"\n",
-    "\n",
-    "# Setting the config for portkey\n",
-    "pk.config = Config(\n",
-    "    mode=\"single\",\n",
-    "    llms=LLMOptions(virtual_key=\"open-ai-key-66a67d\", provider=\"openai\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 1: Basic example\n",
-    "\n",
-    "response = pk.Completions.create(\n",
-    "    model=\"text-davinci-002\",\n",
-    "    prompt=\"Translate the following English text to French: 'Hello, how are you?'\"\n",
-    ")\n",
-    "\n",
-    "print(response.choices[0].text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 2: Using optional params\n",
-    "\n",
-    "response2 = pk.Completions.create(\n",
-    "    model=\"text-davinci-002\",\n",
-    "    prompt=\"Summarize the key points of Albert Einstein's theory of relativity.\",\n",
-    "    max_tokens=100,  # Limit the generated text to 100 tokens\n",
-    "    temperature=0.7,  # Use a lower temperature for deterministic output\n",
-    "    n=3,  # Generate 3 completions for the same prompt\n",
-    "    presence_penalty=0.5,  # Penalize new tokens based on their presence in the text\n",
-    "    frequency_penalty=0.2,  # Penalize new tokens based on their frequency in the text\n",
-    "    logit_bias={\"50256\": -100}  # Prevent a specific token from being generated\n",
-    ")\n",
-    "\n",
-    "for i, choice in enumerate(response2.choices):\n",
-    "    print(f\"\\nCompletion {i + 1}: {choice.text}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 3: Streaming results\n",
-    "\n",
-    "response3 = pk.Completions.create(\n",
-    "    model=\"text-davinci-002\",\n",
-    "    prompt=\"Once upon a time\",\n",
-    "    max_tokens=50,\n",
-    "    stream=True  # Stream back partial progress\n",
-    ")\n",
-    "\n",
-    "for event in response3:\n",
-    "    print(event.choices[0].text, end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 4: Echo back the prompt\n",
-    "response4 = pk.Completions.create(\n",
-    "    model=\"text-davinci-002\",\n",
-    "    prompt=\"Echo this prompt: 'Hello, World!'\",\n",
-    "    echo=True  # Echo back the prompt in addition to the completion\n",
-    ")\n",
-    "print(response4.choices[0].text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 5: Best-of completions\n",
-    "response5 = pk.Completions.create(\n",
-    "    model=\"text-davinci-002\",\n",
-    "    prompt=\"Complete this sentence: 'The quick brown fox'\",\n",
-    "    best_of=3  # Generate 3 completions server-side and return the best one\n",
-    ")\n",
-    "print(response5.choices[0].text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 6: Using logprobs parameter\n",
-    "response6 = pk.Completions.create(\n",
-    "    model=\"text-davinci-003\",\n",
-    "    prompt=\"Generate text about the solar system.\",\n",
-    "    max_tokens=50,\n",
-    "    logprobs=5  # Include log probabilities for the top 5 tokens\n",
-    ")\n",
-    "\n",
-    "generated_text = response6.choices[0].text\n",
-    "print(\"Generated Text:\")\n",
-    "print(generated_text)\n",
-    "\n",
-    "# Print log probabilities for the top 5 tokens\n",
-    "logprobs = response6.choices[0].logprobs  # Log probabilities for the sampled tokens\n",
-    "print(\"\\nLog Probabilities for Top 5 Tokens:\")\n",
-    "for token, logprob in zip(logprobs['tokens'], logprobs['token_logprobs']):\n",
-    "    print(f\"Token: {token}, Log Probability: {logprob}\")"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/examples/demo.py b/examples/demo.py
new file mode 100644
index 00000000..62ee4ca9
--- /dev/null
+++ b/examples/demo.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+import os
+from portkey_ai import Portkey
+from dotenv import load_dotenv
+
+# from tests.utils import assert_matches_type
+load_dotenv(override=True)
+base_url = os.environ.get("PORTKEY_BASE_URL")
+api_key = os.environ.get("PORTKEY_API_KEY")
+virtual_api_key = os.environ.get("COHERE_VIRTUAL_KEY")
+
+print("starting the tests....")
+portkey = Portkey(
+    base_url=base_url,
+    api_key=api_key,
+    virtual_key=virtual_api_key,
+)
+
+print("starting the creation phase.")
+
+completion = portkey.chat.completions.create(
+    messages=[
+        {"role": "system", "content": "You are an assistant"},
+        {"role": "user", "content": "Hello!"},
+    ]
+)
+
+print("completion :: ", completion)
diff --git a/examples/fallback_loadbalance.ipynb b/examples/fallback_loadbalance.ipynb
deleted file mode 100644
index c7d49139..00000000
--- a/examples/fallback_loadbalance.ipynb
+++ /dev/null
@@ -1,249 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Portkey | Building Resilient LLM Apps\n",
-    "\n",
-    "**Portkey** is a full-stack LLMOps platform that productionizes your Gen AI app reliably and securely."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Key Features of Portkey\n",
-    "\n",
-    "1. **AI Gateway**:\n",
-    "    - **Automated Fallbacks & Retries**: Ensure your application remains functional even if a primary service fails.\n",
-    "    - **Load Balancing**: Efficiently distribute incoming requests among multiple models.\n",
-    "    - **Semantic Caching**: Reduce costs and latency by intelligently caching results.\n",
-    "    \n",
-    "2. **Observability**:\n",
-    "    - **Logging**: Keep track of all requests for monitoring and debugging.\n",
-    "    - **Requests Tracing**: Understand the journey of each request for optimization.\n",
-    "    - **Custom Tags**: Segment and categorize requests for better insights.\n",
-    "\n",
-    "To harness these features, let's start with the setup:\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "5Z933R9wuZ4z"
-   },
-   "outputs": [],
-   "source": [
-    "# Installing the Portkey AI python SDK developed by the Portkey team\n",
-    "!pip install portkey-ai -U\n",
-    "!portkey --version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Importing necessary libraries and modules\n",
-    "import portkey as pk\n",
-    "from portkey import Config, LLMOptions\n",
-    "from getpass import getpass"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### **Step 1: Get your Portkey API key**\n",
-    "\n",
-    "Log into [Portkey here](https://app.portkey.ai/), then click on the profile icon on top right and \"Copy API Key\". Let's also set OpenAI & Anthropic API keys."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Enter the password on the prompt window.\n",
-    "API_KEY = getpass(\"Enter your PORTKEY_API_KEY \")\n",
-    "\n",
-    "# Setting the API key\n",
-    "pk.api_key = API_KEY\n",
-    "\n",
-    "# NOTE: For adding custom url, uncomment this line and add your custom url in a selfhosted version.\n",
-    "# pk.base_url = \"\"\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### **Step 2: Configure Portkey Features**\n",
-    "\n",
-    "To harness the full potential of Portkey, you can configure various features as illustrated above. Here's a guide to all Portkey features and the expected values:\n",
-    "\n",
-    "| Feature             | Config Key              | Value(Type)                                      | Required    |\n",
-    "|---------------------|-------------------------|--------------------------------------------------|-------------|\n",
-    "| API Key             | `api_key`               | `string`                                         | ✅ Required (can be set externally) |\n",
-    "| Mode                | `mode`                  | `fallback`, `ab_test`, `single`              | ✅ Required |\n",
-    "| Cache Type          | `cache_status`          | `simple`, `semantic`                             | ❔ Optional |\n",
-    "| Force Cache Refresh | `cache_force_refresh`   | `boolean`                              | ❔ Optional |\n",
-    "| Cache Age           | `cache_age`             | `integer` (in seconds)                           | ❔ Optional |\n",
-    "| Trace ID            | `trace_id`              | `string`                                         | ❔ Optional |\n",
-    "| Retries         | `retry`           | `integer` [0,5]                                  | ❔ Optional |\n",
-    "| Metadata            | `metadata`              | `json object` [More info](https://docs.portkey.ai/key-features/custom-metadata)          | ❔ Optional |\n",
-    "| Base URL | `base_url` | `url` | ❔ Optional |\n",
-    "\n",
-    "\n",
-    "To set up Portkey for different modes and features, refer to the provided IPython Notebook examples in the examples/ directory.\n",
-    "\n",
-    "For more information and detailed documentation, please visit [Portkey Documentation](https://docs.portkey.ai/)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example-1: Configuring Portkey for Fallback Mode\n",
-    "In this example, we'll demonstrate how to configure Portkey for the Fallback Mode using the sdk. Fallback Mode allows you to define a backup strategy when your primary service is unavailable.\n",
-    "\n",
-    "`Note`: The order of definition of LLMOptions is important for fallbacks. Ensure that you define your fallback strategy in the order of preference. This ensures that your fallback logic is in place and ready to be used when needed."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pk.config = Config(\n",
-    "    mode=\"fallback\",\n",
-    "    llms=[\n",
-    "        LLMOptions(model=\"text-davinci-002\", virtual_key=\"open-ai-key-66a67d\", provider=\"openai\"),\n",
-    "        LLMOptions(model=\"claude-2\", virtual_key=\"anthropic-key-351feb\", provider=\"anthropic\", max_tokens=250)\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 1: Basic example\n",
-    "\n",
-    "response = pk.Completions.create(\n",
-    "    prompt=\"Who are you ?\"\n",
-    ")\n",
-    "\n",
-    "print(response.choices[0].text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 3: Streaming results\n",
-    "\n",
-    "response3 = pk.Completions.create(\n",
-    "    prompt=\"Translate the following English text to French: 'Hello, how are you?'\",\n",
-    "    stream=True  # Stream back partial progress\n",
-    ")\n",
-    "\n",
-    "for event in response3:\n",
-    "    if event.choices[0].text:\n",
-    "        print(event.choices[0].text, end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example-2: Configuring Portkey for Load Balancing (A/B test) Mode\n",
-    "\n",
-    "To utilize Portkey's Load Balancing Mode, follow the steps below. Load Balancing Mode enables you to distribute incoming requests across multiple services to ensure high availability and scalability.\n",
-    "\n",
-    "`NOTE`: Loadbalance is also called A/B test."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pk.config = Config(\n",
-    "    mode=\"ab_test\",\n",
-    "    llms=[\n",
-    "        LLMOptions(model=\"text-davinci-002\", virtual_key=\"open-ai-key-66a67d\", provider=\"openai\", weight=0.4),\n",
-    "        LLMOptions(model=\"claude-2\", virtual_key=\"anthropic-key-351feb\", provider=\"anthropic\", max_tokens=250, weight=0.6)\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 1: Basic example\n",
-    "\n",
-    "response = pk.Completions.create(\n",
-    "    prompt=\"Summarize the key points from the article titled 'The Impact of Climate Change on Global Biodiversity.'\"\n",
-    ")\n",
-    "\n",
-    "print(response.choices[0].text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example 3: Streaming results\n",
-    "\n",
-    "response3 = pk.Completions.create(\n",
-    "    prompt=\"Generate a creative short story about a detective solving a mysterious case.\",\n",
-    "    stream=True  # Stream back partial progress\n",
-    ")\n",
-    "\n",
-    "for event in response3:\n",
-    "    if event.choices[0].text is None:\n",
-    "        break\n",
-    "    print(event.choices[0].text, end=\"\", flush=True)"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/examples/loadbalance_two_api_keys.ipynb b/examples/loadbalance_two_api_keys.ipynb
deleted file mode 100644
index 19c63071..00000000
--- a/examples/loadbalance_two_api_keys.ipynb
+++ /dev/null
@@ -1,313 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Loadbalance Between Two OpenAI Accounts\n",
-    "\n",
-    "Simple code example on implementing load balancing with Portkey."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create **`two LLM objects`** with **`two different API keys`**\n",
-    "\n",
-    "We will also set the *weight* for both the LLMs here - this weight determines how requests are distributed among the LLMs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Install the Portkey SDK\n",
-    "!pip install portkey-ai\n",
-    "\n",
-    "# Import necessary modules from Portkey\n",
-    "import portkey\n",
-    "from portkey import Config, LLMOptions\n",
-    "\n",
-    "# Create two LLM objects with different account keys and set their weights\n",
-    "llm_a = LLMOptions(provider = \"openai\", api_key = \"OPENAI_ACCOUNT_1_KEY\", weight = 0.5)\n",
-    "llm_b = LLMOptions(provider = \"openai\", api_key = \"OPENAI_ACCOUNT_2_KEY\", weight = 0.5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Implement **`loadbalance`** mode"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Construct the Porktey client and pass the LLM objects\n",
-    "\n",
-    "portkey.config = Config(\n",
-    "    api_key = \"PORTKEY_API_KEY\", \n",
-    "    mode = \"loadbalance\", \n",
-    "    llms = [llm_a, llm_b]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Make the **`ChatComplete`** call\n",
-    "\n",
-    "**Portkey SDK follows OpenAI SDK signature** - model params **including the model name**, can be passed at the completion call.\n",
-    "\n",
-    "Just replce **`openai.ChatCompletion.create`** to **`portkey.ChatCompletion.create`** - **Everything else remains the same**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "response = portkey.ChatCompletions.create(\n",
-    "    model = \"gpt-4-0613\", \n",
-    "    messages = [\n",
-    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "        {\"role\": \"user\", \"content\": \"What is the meaning of life, universe and everything?\"},\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "print(response)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "\n",
-    "### More Production Features\n",
-    "\n",
-    "**⏩ Streaming**\n",
-    "\n",
-    "* Just set `stream=True` while making your call"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "stream_response = portkey.ChatCompletions.create(\n",
-    "    model = \"gpt-4-0613\", \n",
-    "    messages = [\n",
-    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "        {\"role\": \"user\", \"content\": \"What is the meaning of life, universe and everything?\"},\n",
-    "    ],\n",
-    "    stream = True\n",
-    ")\n",
-    "\n",
-    "for i in stream_response:\n",
-    "    print(i.delta, end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**🔑 Virtual Keys**\n",
-    "\n",
-    "Safeguard your original OpenAI API keys and only expose dynamic virtual keys in the Portkey ecosystem\n",
-    "\n",
-    "1. Go to the “Virtual Keys” page on Portkey dashboard and hit the “Add Key” button on the top right corner.\n",
-    "2. Choose your AI provider (OpenAI in this case), assign a unique name to your key, and, and add notes. Your virtual key is ready!\n",
-    "\n",
-    "* While constructing your LLM object with Portkey, you can pass the virtual keys instead of API keys with `virtual_key` parameter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "llm_a = LLMOptions(provider = \"openai\", virtual_key = \"openai-xxxx\", weight = 0.5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**👣 Tracing**\n",
-    "\n",
-    "Monitor your apps throughout the lifecycle of a request with a singular `trace id`.\n",
-    "\n",
-    "* You can set the `trace_id` while constructing your LLM object"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "llm_a = LLMOptions(provider = \"openai\", virtual_key = \"openai-xxxx\", weight = 0.5, trace_id = \"loadbalance_accounts_test\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**📝 Live Feedback**\n",
-    "\n",
-    "Portkey’s Feedback API offers a straightforward way to gather weighted feedback from users, allowing you to refine and improve your app.\n",
-    "\n",
-    "* Append feedback to any `trace_id` through Portkey's feedback endpoint:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import requests\n",
-    "import json\n",
-    "\n",
-    "headers = { \"x-portkey-api-key\" : \"PORTKEY_API_KEY\", \"Content-Type\" : \"application/json\" }\n",
-    "data = { \"trace_id\" : \"loadbalance_accounts_test\", \"value\" : 1 }\n",
-    "\n",
-    "send_feedback = requests.post(\"https://api.portkey.ai/v1/feedback\", headers=headers, data=json.dumps(data))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**💪 Fallbacks & Retries**\n",
-    "\n",
-    "Portkey helps you build resilient apps with automatic fallbacks & retries:\n",
-    "\n",
-    "**Fallbacks**: If a primary service or model fails, Portkey will automatically switch to a backup model. <br />\n",
-    "**Retries**: If a request fails, Portkey can be configured to retry the request multiple times.\n",
-    "\n",
-    "* Fallbacks is set while construcing your Portkey client by setting `mode=\"fallback\"`\n",
-    "* Retry is set while constructing your LLM object `retry={\"attempts\": 5}`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "llm_a = LLMOptions(provider = \"openai\", virtual_key = \"openai-xxx\", retry={\"attempts\": 5})\n",
-    "llm_b = LLMOptions(provider = \"anthropic\", virtual_key = \"anthropic-xxx\", retry={\"attempts\": 5})\n",
-    "\n",
-    "portkey.config = Config( api_key = \"PORTKEY_API_KEY\", mode = \"fallback\", llms = [llm_a,llm_b])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**🛠️ Integrations**\n",
-    "\n",
-    "You can also use Portkey with the OpenAI SDK, Langchain, Llamaindex, and more.\n",
-    "\n",
-    "Check out [**Portkey docs**](https://docs.portkey.ai/portkey-docs/integrations) for more info.\n",
-    "\n",
-    "* Here's a quick example of using Portkey with the OpenAI SDK:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import openai\n",
-    "\n",
-    "# Set Portkey proxy as the base path\n",
-    "openai.api_base = \"https://api.portkey.ai/v1/proxy\"\n",
-    "\n",
-    "# Set Portkey headers\n",
-    "portkey_headers = {\n",
-    "    \"x-portkey-api-key\": \"PORTKEY_API_KEY\",\n",
-    "    \"x-portkey-mode\": \"proxy openai\"\n",
-    "}\n",
-    "\n",
-    "response = openai.Completion.create(\n",
-    "  model = \"text-davinci-003\",\n",
-    "  prompt = \"Translate the following English text to French: '{}'\",\n",
-    "  headers = portkey_headers\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Join Discord**\n",
-    "\n",
-    "Collaborate with industry practitioners building LLM apps and get first-class Portkey support: [**Join here**](https://discord.com/invite/DD7vgKK299)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/portkey/api_resources/apis.py b/portkey/api_resources/apis.py
deleted file mode 100644
index beb91529..00000000
--- a/portkey/api_resources/apis.py
+++ /dev/null
@@ -1,311 +0,0 @@
-from typing import Optional, Union, overload, Literal, List, Mapping, Any
-from portkey.api_resources.base_client import APIClient
-from .utils import (
-    Modes,
-    Config,
-    ConfigSlug,
-    retrieve_config,
-    Params,
-    Message,
-    ChatCompletionChunk,
-    ChatCompletion,
-    TextCompletion,
-    TextCompletionChunk,
-    GenericResponse,
-)
-
-from .streaming import Stream
-
-__all__ = ["Completions", "ChatCompletions"]
-
-
-class APIResource:
-    _client: APIClient
-    # _get: Any
-    # _patch: Any
-    # _put: Any
-    # _delete: Any
-
-    def __init__(self, client: APIClient) -> None:
-        self._client = client
-        # self._get = client.get
-        # self._patch = client.patch
-        # self._put = client.put
-        # self._delete = client.delete
-
-    def _post(self, *args, **kwargs):
-        return self._client.post(*args, **kwargs)
-
-
-class Completions(APIResource):
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        prompt: Optional[str] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: Literal[True],
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Stream[TextCompletionChunk]:
-        ...
-
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        prompt: Optional[str] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: Literal[False] = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> TextCompletion:
-        ...
-
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        prompt: Optional[str] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: bool = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Union[TextCompletion, Stream[TextCompletionChunk]]:
-        ...
-
-    @classmethod
-    def create(
-        cls,
-        *,
-        prompt: Optional[str] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: bool = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Union[TextCompletion, Stream[TextCompletionChunk]]:
-        if config is None:
-            config = retrieve_config()
-        params = Params(
-            prompt=prompt,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_k=top_k,
-            top_p=top_p,
-            **kwargs,
-        )
-        _client = (
-            APIClient()
-            if isinstance(config, str)
-            else APIClient(api_key=config.api_key, base_url=config.base_url)
-        )
-
-        if isinstance(config, str):
-            body = ConfigSlug(config=config)
-            return cls(_client)._post(
-                "/v1/complete",
-                body=body,
-                params=params,
-                cast_to=ChatCompletion,
-                stream_cls=Stream[TextCompletionChunk],
-                stream=stream,
-                mode="",
-            )
-
-        if config.mode == Modes.SINGLE.value:
-            return cls(_client)._post(
-                "/v1/complete",
-                body=config.llms,
-                mode=Modes.SINGLE.value,
-                params=params,
-                cast_to=TextCompletion,
-                stream_cls=Stream[TextCompletionChunk],
-                stream=stream,
-            )
-        if config.mode == Modes.FALLBACK.value:
-            return cls(_client)._post(
-                "/v1/complete",
-                body=config.llms,
-                mode=Modes.FALLBACK,
-                params=params,
-                cast_to=TextCompletion,
-                stream_cls=Stream[TextCompletionChunk],
-                stream=stream,
-            )
-        if config.mode == Modes.AB_TEST.value:
-            return cls(_client)._post(
-                "/v1/complete",
-                body=config.llms,
-                mode=Modes.AB_TEST,
-                params=params,
-                cast_to=TextCompletion,
-                stream_cls=Stream[TextCompletionChunk],
-                stream=stream,
-            )
-        raise NotImplementedError("Mode not implemented.")
-
-
-class ChatCompletions(APIResource):
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        messages: Optional[List[Message]] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: Literal[True],
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        messages: Optional[List[Message]] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: Literal[False] = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> ChatCompletion:
-        ...
-
-    @classmethod
-    @overload
-    def create(
-        cls,
-        *,
-        messages: Optional[List[Message]] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: bool = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
-        ...
-
-    @classmethod
-    def create(
-        cls,
-        *,
-        messages: Optional[List[Message]] = None,
-        config: Optional[Union[Config, str]] = None,
-        stream: bool = False,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        top_k: Optional[int] = None,
-        top_p: Optional[float] = None,
-        **kwargs,
-    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
-        if config is None:
-            config = retrieve_config()
-        params = Params(
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_k=top_k,
-            top_p=top_p,
-            **kwargs,
-        )
-        _client = (
-            APIClient()
-            if isinstance(config, str)
-            else APIClient(api_key=config.api_key, base_url=config.base_url)
-        )
-
-        if isinstance(config, str):
-            body = ConfigSlug(config=config)
-            return cls(_client)._post(
-                "/v1/chatComplete",
-                body=body,
-                params=params,
-                cast_to=ChatCompletion,
-                stream_cls=Stream[ChatCompletionChunk],
-                stream=stream,
-                mode="",
-            )
-
-        if config.mode == Modes.SINGLE.value:
-            return cls(_client)._post(
-                "/v1/chatComplete",
-                body=config.llms,
-                mode=Modes.SINGLE.value,
-                params=params,
-                cast_to=ChatCompletion,
-                stream_cls=Stream[ChatCompletionChunk],
-                stream=stream,
-            )
-        if config.mode == Modes.FALLBACK.value:
-            return cls(_client)._post(
-                "/v1/chatComplete",
-                body=config.llms,
-                mode=Modes.FALLBACK,
-                params=params,
-                cast_to=ChatCompletion,
-                stream_cls=Stream[ChatCompletionChunk],
-                stream=stream,
-            )
-        if config.mode == Modes.AB_TEST.value:
-            return cls(_client)._post(
-                "/v1/chatComplete",
-                body=config.llms,
-                mode=Modes.AB_TEST,
-                params=params,
-                cast_to=ChatCompletion,
-                stream_cls=Stream[ChatCompletionChunk],
-                stream=stream,
-            )
-        raise NotImplementedError("Mode not implemented.")
-
-
-class Generations(APIResource):
-    @classmethod
-    def create(
-        cls,
-        *,
-        prompt_id: str,
-        config: Optional[Union[Config, str]] = None,
-        variables: Optional[Mapping[str, Any]] = None,
-    ) -> Union[GenericResponse, Stream[GenericResponse]]:
-        if config is None:
-            config = retrieve_config()
-        _client = (
-            APIClient()
-            if isinstance(config, str)
-            else APIClient(api_key=config.api_key, base_url=config.base_url)
-        )
-        body = {"variables": variables}
-        return cls(_client)._post(
-            f"/v1/prompts/{prompt_id}/generate",
-            body=body,
-            mode=None,
-            params=None,
-            cast_to=GenericResponse,
-            stream_cls=Stream[GenericResponse],
-            stream=False,
-        )
diff --git a/portkey/version.py b/portkey/version.py
deleted file mode 100644
index dc178869..00000000
--- a/portkey/version.py
+++ /dev/null
@@ -1 +0,0 @@
-VERSION = "0.1.53"
diff --git a/portkey/__init__.py b/portkey_ai/__init__.py
similarity index 70%
rename from portkey/__init__.py
rename to portkey_ai/__init__.py
index af71e053..43696d10 100644
--- a/portkey/__init__.py
+++ b/portkey_ai/__init__.py
@@ -1,6 +1,6 @@
 import os
-from typing import Optional, Union
-from portkey.api_resources import (
+from typing import Mapping, Optional, Union
+from portkey_ai.api_resources import (
     LLMOptions,
     Modes,
     ModesLiteral,
@@ -11,7 +11,7 @@
     Message,
     PortkeyResponse,
     ChatCompletions,
-    Completions,
+    Completion,
     Params,
     Config,
     RetrySettings,
@@ -19,19 +19,23 @@
     ChatCompletionChunk,
     TextCompletion,
     TextCompletionChunk,
-    Generations,
+    createHeaders,
+    Prompt,
+    Portkey,
 )
-from portkey.version import VERSION
-from portkey.api_resources.global_constants import (
+from portkey_ai.version import VERSION
+from portkey_ai.api_resources.global_constants import (
     PORTKEY_BASE_URL,
     PORTKEY_API_KEY_ENV,
     PORTKEY_PROXY_ENV,
+    PORTKEY_GATEWAY_URL,
 )
 
 api_key = os.environ.get(PORTKEY_API_KEY_ENV)
 base_url = os.environ.get(PORTKEY_PROXY_ENV, PORTKEY_BASE_URL)
-config: Optional[Union[Config, str]] = None
+config: Optional[Union[Mapping, str]] = None
 mode: Optional[Union[Modes, ModesLiteral]] = None
+
 __version__ = VERSION
 __all__ = [
     "LLMOptions",
@@ -44,15 +48,18 @@
     "CacheLiteral",
     "Message",
     "ChatCompletions",
-    "Completions",
+    "Completion",
     "Params",
     "RetrySettings",
     "ChatCompletion",
     "ChatCompletionChunk",
     "TextCompletion",
     "TextCompletionChunk",
-    "Generations",
     "Config",
     "api_key",
     "base_url",
+    "PORTKEY_GATEWAY_URL",
+    "createHeaders",
+    "Prompt",
+    "Portkey",
 ]
diff --git a/portkey/_portkey_scripts.py b/portkey_ai/_portkey_scripts.py
similarity index 89%
rename from portkey/_portkey_scripts.py
rename to portkey_ai/_portkey_scripts.py
index 3c5f597c..14107e72 100644
--- a/portkey/_portkey_scripts.py
+++ b/portkey_ai/_portkey_scripts.py
@@ -10,7 +10,7 @@ def main():
         "--version",
         "-v",
         action="version",
-        version=f"portkey {VERSION}",
+        version=f"portkey_ai {VERSION}",
         help="Print version and exit.",
     )
 
diff --git a/portkey/api_resources/__init__.py b/portkey_ai/api_resources/__init__.py
similarity index 72%
rename from portkey/api_resources/__init__.py
rename to portkey_ai/api_resources/__init__.py
index 8e7b5160..76d994e2 100644
--- a/portkey/api_resources/__init__.py
+++ b/portkey_ai/api_resources/__init__.py
@@ -1,5 +1,12 @@
 """"""
-from .apis import ChatCompletions, Completions, Generations
+from .apis import (
+    Completion,
+    ChatCompletion,
+    Generations,
+    Prompt,
+    Feedback,
+    createHeaders,
+)
 from .utils import (
     Modes,
     ModesLiteral,
@@ -13,13 +20,14 @@
     Params,
     Config,
     RetrySettings,
-    ChatCompletion,
+    ChatCompletions,
     ChatCompletionChunk,
     TextCompletion,
     TextCompletionChunk,
 )
+from .client import Portkey
 
-from portkey.version import VERSION
+from portkey_ai.version import VERSION
 
 __version__ = VERSION
 __all__ = [
@@ -33,7 +41,7 @@
     "CacheLiteral",
     "Message",
     "ChatCompletions",
-    "Completions",
+    "Completion",
     "Params",
     "Config",
     "RetrySettings",
@@ -42,4 +50,8 @@
     "TextCompletion",
     "TextCompletionChunk",
     "Generations",
+    "Prompt",
+    "Feedback",
+    "createHeaders",
+    "Portkey",
 ]
diff --git a/portkey_ai/api_resources/apis/__init__.py b/portkey_ai/api_resources/apis/__init__.py
new file mode 100644
index 00000000..4de65ec8
--- /dev/null
+++ b/portkey_ai/api_resources/apis/__init__.py
@@ -0,0 +1,18 @@
+from .chat_complete import ChatCompletion
+from .complete import Completion
+from .generation import Generations, Prompt
+from .feedback import Feedback
+from .create_headers import createHeaders
+from .post import Post
+from .embeddings import Embeddings
+
+__all__ = [
+    "Completion",
+    "ChatCompletion",
+    "Generations",
+    "Feedback",
+    "Prompt",
+    "createHeaders",
+    "Post",
+    "Embeddings",
+]
diff --git a/portkey_ai/api_resources/apis/api_resource.py b/portkey_ai/api_resources/apis/api_resource.py
new file mode 100644
index 00000000..3ccf9a26
--- /dev/null
+++ b/portkey_ai/api_resources/apis/api_resource.py
@@ -0,0 +1,19 @@
+from portkey_ai.api_resources.base_client import APIClient
+
+
+class APIResource:
+    _client: APIClient
+    # _get: Any
+    # _patch: Any
+    # _put: Any
+    # _delete: Any
+
+    def __init__(self, client: APIClient) -> None:
+        self._client = client
+        # self._get = client.get
+        # self._patch = client.patch
+        # self._put = client.put
+        # self._delete = client.delete
+
+    def _post(self, *args, **kwargs):
+        return self._client._post(*args, **kwargs)
diff --git a/portkey_ai/api_resources/apis/chat_complete.py b/portkey_ai/api_resources/apis/chat_complete.py
new file mode 100644
index 00000000..0f4ae2f6
--- /dev/null
+++ b/portkey_ai/api_resources/apis/chat_complete.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import json
+from typing import Mapping, Optional, Union, overload, Literal, List
+from portkey_ai.api_resources.base_client import APIClient
+from portkey_ai.api_resources.utils import (
+    PortkeyApiPaths,
+    Message,
+    ChatCompletionChunk,
+    ChatCompletions,
+)
+
+from portkey_ai.api_resources.streaming import Stream
+from portkey_ai.api_resources.apis.api_resource import APIResource
+
+
+__all__ = ["ChatCompletion"]
+
+
+class ChatCompletion(APIResource):
+    completions: Completions
+
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+        self.completions = Completions(client)
+
+
+class Completions(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Optional[List[Message]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: Literal[True],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Stream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Optional[List[Message]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: Literal[False] = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> ChatCompletions:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Optional[List[Message]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[ChatCompletions, Stream[ChatCompletionChunk]]:
+        ...
+
+    def create(
+        self,
+        *,
+        messages: Optional[List[Message]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[ChatCompletions, Stream[ChatCompletionChunk]]:
+        body = dict(
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_k=top_k,
+            top_p=top_p,
+            stream=stream,
+            **kwargs,
+        )
+
+        return self._post(
+            PortkeyApiPaths.CHAT_COMPLETE_API,
+            body=body,
+            params=None,
+            cast_to=ChatCompletions,
+            stream_cls=Stream[ChatCompletionChunk],
+            stream=stream,
+            headers={},
+        )
+
+    def _get_config_string(self, config: Union[Mapping, str]) -> str:
+        return config if isinstance(config, str) else json.dumps(config)
diff --git a/portkey_ai/api_resources/apis/complete.py b/portkey_ai/api_resources/apis/complete.py
new file mode 100644
index 00000000..8e8e503c
--- /dev/null
+++ b/portkey_ai/api_resources/apis/complete.py
@@ -0,0 +1,87 @@
+from typing import Optional, Union, overload, Literal
+from portkey_ai.api_resources.base_client import APIClient
+from portkey_ai.api_resources.utils import (
+    PortkeyApiPaths,
+    TextCompletion,
+    TextCompletionChunk,
+)
+
+from portkey_ai.api_resources.streaming import Stream
+from portkey_ai.api_resources.apis.api_resource import APIResource
+
+
+class Completion(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt: Optional[str] = None,
+        stream: Literal[True],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Stream[TextCompletionChunk]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt: Optional[str] = None,
+        stream: Literal[False] = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> TextCompletion:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt: Optional[str] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[TextCompletion, Stream[TextCompletionChunk]]:
+        ...
+
+    def create(
+        self,
+        *,
+        prompt: Optional[str] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[TextCompletion, Stream[TextCompletionChunk]]:
+        body = dict(
+            prompt=prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_k=top_k,
+            top_p=top_p,
+            stream=stream,
+            **kwargs,
+        )
+        return self._post(
+            PortkeyApiPaths.TEXT_COMPLETE_API,
+            body=body,
+            params=None,
+            cast_to=TextCompletion,
+            stream_cls=Stream[TextCompletionChunk],
+            stream=stream,
+            headers={},
+        )
diff --git a/portkey_ai/api_resources/apis/create_headers.py b/portkey_ai/api_resources/apis/create_headers.py
new file mode 100644
index 00000000..84a682f9
--- /dev/null
+++ b/portkey_ai/api_resources/apis/create_headers.py
@@ -0,0 +1,29 @@
+from typing import Mapping
+import json
+from portkey_ai.api_resources.utils import get_portkey_header
+
+__all__ = ["createHeaders"]
+
+
+class CreateHeaders:
+    def __init__(self, **kwargs) -> None:  # type: ignore
+        self.kwargs = kwargs
+
+    def json(self) -> Mapping:
+        headers = {}
+        for k, v in self.kwargs.items():
+            if k == "mode" and "proxy" not in v:
+                v = f"proxy {v}"
+            k = "-".join(k.split("_"))
+            if isinstance(v, Mapping):
+                v = json.dumps(v)
+            if v:
+                if k.lower() != "authorization":
+                    headers[get_portkey_header(k)] = str(v)
+                else:
+                    headers[k] = str(v)
+        return headers
+
+
+def createHeaders(**kwargs):
+    return CreateHeaders(**kwargs).json()
diff --git a/portkey_ai/api_resources/apis/embeddings.py b/portkey_ai/api_resources/apis/embeddings.py
new file mode 100644
index 00000000..dc22a21d
--- /dev/null
+++ b/portkey_ai/api_resources/apis/embeddings.py
@@ -0,0 +1,21 @@
+from typing import Optional
+from portkey_ai.api_resources.apis.api_resource import APIResource
+from portkey_ai.api_resources.base_client import APIClient
+from portkey_ai.api_resources.utils import PortkeyApiPaths, GenericResponse
+
+
+class Embeddings(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    def create(self, *, input: str, model: Optional[str] = None) -> GenericResponse:
+        body = {"input": input, "model": model}
+        return self._post(
+            PortkeyApiPaths.EMBEDDING_API,
+            body=body,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=None,
+            stream=False,
+            headers={},
+        )
diff --git a/portkey_ai/api_resources/apis/feedback.py b/portkey_ai/api_resources/apis/feedback.py
new file mode 100644
index 00000000..8038ad5e
--- /dev/null
+++ b/portkey_ai/api_resources/apis/feedback.py
@@ -0,0 +1,41 @@
+from typing import Optional, Dict, Any, List
+from portkey_ai.api_resources.apis.api_resource import APIResource
+from portkey_ai.api_resources.base_client import APIClient
+from portkey_ai.api_resources.streaming import Stream
+from portkey_ai.api_resources.utils import GenericResponse, PortkeyApiPaths
+
+
+class Feedback(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    def create(
+        self,
+        *,
+        trace_id: Optional[str] = None,
+        value: Optional[int] = None,
+        weight: Optional[float] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> GenericResponse:
+        body = dict(trace_id=trace_id, value=value, weight=weight, metadata=metadata)
+        return self._post(
+            PortkeyApiPaths.FEEDBACK_API,
+            body=body,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=Stream[GenericResponse],
+            stream=False,
+            headers={},
+        )
+
+    def bulk_create(self, *, feedbacks: List[Dict[str, Any]]) -> GenericResponse:
+        body = feedbacks
+        return self._post(
+            PortkeyApiPaths.FEEDBACK_API,
+            body=body,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=Stream[GenericResponse],
+            stream=False,
+            headers={},
+        )
diff --git a/portkey_ai/api_resources/apis/generation.py b/portkey_ai/api_resources/apis/generation.py
new file mode 100644
index 00000000..f2b61bc9
--- /dev/null
+++ b/portkey_ai/api_resources/apis/generation.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+import warnings
+from typing import Literal, Optional, Union, Mapping, Any, overload
+from portkey_ai.api_resources.base_client import APIClient
+from portkey_ai.api_resources.utils import (
+    retrieve_config,
+    GenericResponse,
+)
+
+from portkey_ai.api_resources.streaming import Stream
+from portkey_ai.api_resources.apis.api_resource import APIResource
+
+
+class Generations(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    def create(
+        self,
+        *,
+        prompt_id: str,
+        config: Optional[Union[Mapping, str]] = None,
+        variables: Optional[Mapping[str, Any]] = None,
+    ) -> Union[GenericResponse, Stream[GenericResponse]]:
+        warning_message = "This API has been deprecated. Please use the Prompt API for the saved prompt."  # noqa: E501
+        warnings.warn(
+            warning_message,
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if config is None:
+            config = retrieve_config()
+        body = {"variables": variables}
+        response = self._post(
+            f"/v1/prompts/{prompt_id}/generate",
+            body=body,
+            mode=None,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=Stream[GenericResponse],
+            stream=False,
+        )
+        response["warning"] = warning_message
+        return response
+
+
+class Prompt(APIResource):
+    completions: Completions
+
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+        self.completions = Completions(client)
+
+
+class Completions(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt_id: str,
+        variables: Optional[Mapping[str, Any]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: Literal[True],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Stream[GenericResponse]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt_id: str,
+        variables: Optional[Mapping[str, Any]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: Literal[False] = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> GenericResponse:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        prompt_id: str,
+        variables: Optional[Mapping[str, Any]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[GenericResponse, Stream[GenericResponse]]:
+        ...
+
+    def create(
+        self,
+        *,
+        prompt_id: str,
+        variables: Optional[Mapping[str, Any]] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        **kwargs,
+    ) -> Union[GenericResponse, Stream[GenericResponse]]:
+        """Prompt completions Method"""
+        if config is None:
+            config = retrieve_config()
+        body = {
+            "variables": variables,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "top_k": top_k,
+            "top_p": top_p,
+            "stream": stream,
+            **kwargs,
+        }
+        return self._post(
+            f"/prompts/{prompt_id}/completions",
+            body=body,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=Stream[GenericResponse],
+            stream=stream,
+            headers={},
+        )
diff --git a/portkey_ai/api_resources/apis/post.py b/portkey_ai/api_resources/apis/post.py
new file mode 100644
index 00000000..00901e01
--- /dev/null
+++ b/portkey_ai/api_resources/apis/post.py
@@ -0,0 +1,59 @@
+from typing import Union, overload, Literal
+
+from portkey_ai.api_resources.base_client import APIClient
+
+from portkey_ai.api_resources.streaming import Stream
+from portkey_ai.api_resources.apis.api_resource import APIResource
+from portkey_ai.api_resources.utils import GenericResponse
+
+
+class Post(APIResource):
+    def __init__(self, client: APIClient) -> None:
+        super().__init__(client)
+
+    @overload
+    def create(
+        self,
+        *,
+        url: str,
+        stream: Literal[True],
+        **kwargs,
+    ) -> Stream[GenericResponse]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        url: str,
+        stream: Literal[False] = False,
+        **kwargs,
+    ) -> GenericResponse:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        url: str,
+        stream: bool = False,
+        **kwargs,
+    ) -> Union[GenericResponse, Stream[GenericResponse]]:
+        ...
+
+    def create(
+        self,
+        *,
+        url: str,
+        stream: bool = False,
+        **kwargs,
+    ) -> Union[GenericResponse, Stream[GenericResponse]]:
+        return self._post(
+            url,
+            body=kwargs,
+            params=None,
+            cast_to=GenericResponse,
+            stream_cls=Stream[GenericResponse],
+            stream=stream,
+            headers={},
+        )
diff --git a/portkey/api_resources/base_client.py b/portkey_ai/api_resources/base_client.py
similarity index 76%
rename from portkey/api_resources/base_client.py
rename to portkey_ai/api_resources/base_client.py
index a5f1826c..0403ebc2 100644
--- a/portkey/api_resources/base_client.py
+++ b/portkey_ai/api_resources/base_client.py
@@ -8,7 +8,6 @@
     Union,
     Mapping,
     cast,
-    List,
     Optional,
     Type,
     overload,
@@ -17,25 +16,16 @@
 )
 import httpx
 import platform
+
+from portkey_ai.api_resources.apis.create_headers import createHeaders
 from .global_constants import PORTKEY_HEADER_PREFIX
-from .utils import (
-    remove_empty_values,
-    Body,
-    ConfigSlug,
-    Options,
-    RequestConfig,
-    OverrideParams,
-    ProviderOptions,
-    Params,
-    Constructs,
-    PortkeyApiPaths,
-)
+from .utils import remove_empty_values, Options
 from .exceptions import (
     APIStatusError,
     APITimeoutError,
     APIConnectionError,
 )
-from portkey.version import VERSION
+from portkey_ai.version import VERSION
 from .utils import ResponseT, make_status_error, default_api_key, default_base_url
 from .common_types import StreamT
 from .streaming import Stream
@@ -58,14 +48,39 @@ def __init__(
         *,
         base_url: Optional[str] = None,
         api_key: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        metadata: Optional[str] = None,
+        **kwargs,
     ) -> None:
         self.api_key = api_key or default_api_key()
         self.base_url = base_url or default_base_url()
+        self.virtual_key = virtual_key
+        self.config = config
+        self.provider = provider
+        self.trace_id = trace_id
+        self.metadata = metadata
+        self.kwargs = kwargs
+
+        self.custom_headers = createHeaders(
+            virtual_key=virtual_key,
+            config=config,
+            provider=provider,
+            trace_id=trace_id,
+            metadata=metadata,
+            **kwargs,
+        )
         self._client = httpx.Client(
             base_url=self.base_url,
-            headers={"Accept": "application/json"},
+            headers={
+                "Accept": "application/json",
+            },
         )
 
+        self.response_headers: httpx.Headers | None = None
+
     def _serialize_header_values(
         self, headers: Optional[Mapping[str, Any]]
     ) -> Dict[str, str]:
@@ -83,79 +98,76 @@ def custom_auth(self) -> Optional[httpx.Auth]:
         return None
 
     @overload
-    def post(
+    def _post(
         self,
         path: str,
         *,
-        body: List[Body],
-        mode: str,
+        body: Mapping[str, Any],
         cast_to: Type[ResponseT],
         stream: Literal[True],
         stream_cls: type[StreamT],
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> StreamT:
         ...
 
     @overload
-    def post(
+    def _post(
         self,
         path: str,
         *,
-        body: List[Body],
-        mode: str,
+        body: Mapping[str, Any],
         cast_to: Type[ResponseT],
         stream: Literal[False],
         stream_cls: type[StreamT],
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> ResponseT:
         ...
 
     @overload
-    def post(
+    def _post(
         self,
         path: str,
         *,
-        body: List[Body],
-        mode: str,
+        body: Mapping[str, Any],
         cast_to: Type[ResponseT],
         stream: bool,
         stream_cls: type[StreamT],
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> Union[ResponseT, StreamT]:
         ...
 
-    def post(
+    def _post(
         self,
         path: str,
         *,
-        body: Union[List[Body], Any, ConfigSlug],
-        mode: str,
+        body: Mapping[str, Any],
         cast_to: Type[ResponseT],
         stream: bool,
         stream_cls: type[StreamT],
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> Union[ResponseT, StreamT]:
-        if path in [PortkeyApiPaths.CHAT_COMPLETION, PortkeyApiPaths.COMPLETION]:
-            body = cast(List[Body], body)
-            opts = self._construct(
+        if path.endswith("/generate"):
+            opts = self._construct_generate_options(
                 method="post",
                 url=path,
                 body=body,
-                mode=mode,
                 stream=stream,
                 params=params,
+                headers=headers,
             )
-        elif path.endswith("/generate"):
-            opts = self._construct_generate_options(
+        else:
+            opts = self._construct(
                 method="post",
                 url=path,
                 body=body,
-                mode=mode,
                 stream=stream,
                 params=params,
+                headers=headers,
             )
-        else:
-            raise NotImplementedError(f"This API path `{path}` is not implemented.")
 
         res = self._request(
             options=opts,
@@ -171,16 +183,16 @@ def _construct_generate_options(
         method: str,
         url: str,
         body: Any,
-        mode: str,
         stream: bool,
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> Options:
         opts = Options.construct()
         opts.method = method
         opts.url = url
         json_body = body
         opts.json_body = remove_empty_values(json_body)
-        opts.headers = None
+        opts.headers = remove_empty_values(headers)
         return opts
 
     def _construct(
@@ -188,39 +200,18 @@ def _construct(
         *,
         method: str,
         url: str,
-        body: Union[List[Body], ConfigSlug],
-        mode: str,
+        body: Mapping[str, Any],
         stream: bool,
-        params: Params,
+        params: Mapping[str, str],
+        headers: Mapping[str, str],
     ) -> Options:
         opts = Options.construct()
         opts.method = method
         opts.url = url
-        params_dict = {} if params is None else params.dict()
-        config = (
-            body.config
-            if isinstance(body, ConfigSlug)
-            else self._config(mode, body).dict()
-        )
-        json_body = {
-            "config": config,
-            "params": {**params_dict, "stream": stream},
-        }
-        opts.json_body = remove_empty_values(json_body)
-        opts.headers = None
+        opts.json_body = remove_empty_values(body)
+        opts.headers = remove_empty_values(headers)
         return opts
 
-    def _config(self, mode: str, body: List[Body]) -> RequestConfig:
-        config = RequestConfig(mode=mode, options=[])
-        for i in body:
-            override_params = cast(OverrideParams, i)
-            constructs = cast(Constructs, i)
-            options = ProviderOptions(
-                override_params=override_params, **constructs.dict()
-            )
-            config.options.append(options)
-        return config
-
     @property
     def _default_headers(self) -> Mapping[str, str]:
         return {
@@ -232,20 +223,24 @@ def _default_headers(self) -> Mapping[str, str]:
         }
 
     def _build_headers(self, options: Options) -> httpx.Headers:
-        custom_headers = options.headers or {}
-        headers_dict = self._merge_mappings(self._default_headers, custom_headers)
+        option_headers = options.headers or {}
+        headers_dict = self._merge_mappings(
+            self._default_headers, option_headers, self.custom_headers
+        )
         headers = httpx.Headers(headers_dict)
         return headers
 
     def _merge_mappings(
         self,
-        obj1: Mapping[str, Any],
-        obj2: Mapping[str, Any],
+        *args,
     ) -> Dict[str, Any]:
         """Merge two mappings of the given type
         In cases with duplicate keys the second mapping takes precedence.
         """
-        return {**obj1, **obj2}
+        mapped_headers = {}
+        for i in args:
+            mapped_headers.update(i)
+        return mapped_headers
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -336,6 +331,8 @@ def _request(
             raise APITimeoutError(request=request) from err
         except Exception as err:
             raise APIConnectionError(request=request) from err
+
+        self.response_headers = res.headers
         if stream or res.headers["content-type"] == "text/event-stream":
             if stream_cls is None:
                 raise MissingStreamClassError()
@@ -344,10 +341,15 @@ def _request(
             )
             return stream_response
 
-        response = cast(
-            ResponseT,
-            cast_to(**res.json()),
+        response = (
+            cast(
+                ResponseT,
+                cast_to(**res.json()),
+            )
+            if not isinstance(cast_to, httpx.Response)
+            else cast(ResponseT, res)
         )
+        response._headers = res.headers  # type: ignore
         return response
 
     def _extract_stream_chunk_type(self, stream_cls: Type) -> type:
diff --git a/portkey_ai/api_resources/client.py b/portkey_ai/api_resources/client.py
new file mode 100644
index 00000000..7bfd3be7
--- /dev/null
+++ b/portkey_ai/api_resources/client.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from typing import Mapping, Optional, Union
+from portkey_ai.api_resources import apis
+from portkey_ai.api_resources.base_client import APIClient
+
+
+class Portkey(APIClient):
+    completions: apis.Completion
+    chat: apis.ChatCompletion
+    generations: apis.Generations
+    prompt: apis.Prompt
+    embeddings: apis.Embeddings
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        metadata: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            api_key=api_key,
+            base_url=base_url,
+            virtual_key=virtual_key,
+            config=config,
+            provider=provider,
+            trace_id=trace_id,
+            metadata=metadata,
+            **kwargs,
+        )
+
+        self.completions = apis.Completion(self)
+        self.chat = apis.ChatCompletion(self)
+        self.generations = apis.Generations(self)
+        self.prompts = apis.Prompt(self)
+        self.embeddings = apis.Embeddings(self)
+        self.feedback = apis.Feedback(self)
+
+    def copy(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        metadata: Optional[str] = None,
+        **kwargs,
+    ) -> Portkey:
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            base_url=base_url or self.base_url,
+            virtual_key=virtual_key or self.virtual_key,
+            config=config or self.config,
+            provider=provider or self.provider,
+            trace_id=trace_id or self.trace_id,
+            metadata=metadata or self.metadata,
+            **self.kwargs,
+            **kwargs,
+        )
+
+    def post(self, url: str, **kwargs):
+        return apis.Post(self).create(url=url, **kwargs)
+
+    with_options = copy
diff --git a/portkey/api_resources/common_types.py b/portkey_ai/api_resources/common_types.py
similarity index 58%
rename from portkey/api_resources/common_types.py
rename to portkey_ai/api_resources/common_types.py
index fe071744..7c0e5652 100644
--- a/portkey/api_resources/common_types.py
+++ b/portkey_ai/api_resources/common_types.py
@@ -1,8 +1,12 @@
 from typing import TypeVar, Union
+
+import httpx
 from .streaming import Stream
 from .utils import ChatCompletionChunk, TextCompletionChunk, GenericResponse
 
 StreamT = TypeVar(
     "StreamT",
-    bound=Stream[Union[ChatCompletionChunk, TextCompletionChunk, GenericResponse]],
+    bound=Stream[
+        Union[ChatCompletionChunk, TextCompletionChunk, GenericResponse, httpx.Response]
+    ],
 )
diff --git a/portkey/api_resources/exceptions.py b/portkey_ai/api_resources/exceptions.py
similarity index 100%
rename from portkey/api_resources/exceptions.py
rename to portkey_ai/api_resources/exceptions.py
diff --git a/portkey/api_resources/global_constants.py b/portkey_ai/api_resources/global_constants.py
similarity index 92%
rename from portkey/api_resources/global_constants.py
rename to portkey_ai/api_resources/global_constants.py
index 0faf50c1..68501dba 100644
--- a/portkey/api_resources/global_constants.py
+++ b/portkey_ai/api_resources/global_constants.py
@@ -29,7 +29,7 @@
 VERSION = "0.1.0"
 DEFAULT_TIMEOUT = 60
 PORTKEY_HEADER_PREFIX = "x-portkey-"
-PORTKEY_BASE_URL = "https://api.portkey.ai"
-
+PORTKEY_BASE_URL = "https://api.portkey.ai/v1"
+PORTKEY_GATEWAY_URL = PORTKEY_BASE_URL
 PORTKEY_API_KEY_ENV = "PORTKEY_API_KEY"
 PORTKEY_PROXY_ENV = "PORTKEY_PROXY"
diff --git a/portkey/api_resources/streaming.py b/portkey_ai/api_resources/streaming.py
similarity index 95%
rename from portkey/api_resources/streaming.py
rename to portkey_ai/api_resources/streaming.py
index 34bd6874..1e551a98 100644
--- a/portkey/api_resources/streaming.py
+++ b/portkey_ai/api_resources/streaming.py
@@ -45,11 +45,7 @@ def data(self) -> str:
         return self._data
 
     def json(self) -> Any:
-        return (
-            {"model": "", "choices": [{}]}
-            if self.data == "[DONE]"
-            else json.loads(self.data)
-        )
+        return json.loads(self.data)
 
     def __repr__(self) -> str:
         return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id},\
@@ -156,8 +152,12 @@ def _iter_events(self) -> Iterator[ServerSentEvent]:
     def __stream__(self) -> Iterator[ResponseT]:
         response = self.response
         for sse in self._iter_events():
+            if sse.data.startswith("[DONE]"):
+                break
             if sse.event is None:
-                yield cast(ResponseT, self._cast_to(**sse.json()))
+                yield cast(ResponseT, self._cast_to(**sse.json())) if not isinstance(
+                    self._cast_to, httpx.Response
+                ) else cast(ResponseT, sse)
 
             if sse.event == "ping":
                 continue
diff --git a/portkey/api_resources/utils.py b/portkey_ai/api_resources/utils.py
similarity index 84%
rename from portkey/api_resources/utils.py
rename to portkey_ai/api_resources/utils.py
index 90442969..a4825b5b 100644
--- a/portkey/api_resources/utils.py
+++ b/portkey_ai/api_resources/utils.py
@@ -4,7 +4,7 @@
 from enum import Enum, EnumMeta
 from typing_extensions import TypedDict
 import httpx
-import portkey
+import portkey_ai
 from pydantic import BaseModel, validator
 from .exceptions import (
     APIStatusError,
@@ -20,10 +20,10 @@
 from .global_constants import (
     MISSING_API_KEY_ERROR_MESSAGE,
     MISSING_BASE_URL,
-    MISSING_CONFIG_MESSAGE,
     MISSING_MODE_MESSAGE,
     PORTKEY_BASE_URL,
     PORTKEY_API_KEY_ENV,
+    PORTKEY_HEADER_PREFIX,
     PORTKEY_PROXY_ENV,
 )
 
@@ -47,7 +47,7 @@ class CacheType(str, Enum, metaclass=MetaEnum):
 
 ResponseT = TypeVar(
     "ResponseT",
-    bound="Union[ChatCompletionChunk, ChatCompletion, TextCompletionChunk, TextCompletion, GenericResponse]",  # noqa: E501
+    bound="Union[ChatCompletionChunk, ChatCompletions, TextCompletionChunk, TextCompletion, GenericResponse, httpx.Response]",  # noqa: E501
 )
 
 
@@ -95,9 +95,12 @@ class ApiType(str, Enum, metaclass=MetaEnum):
 
 
 class PortkeyApiPaths(str, Enum, metaclass=MetaEnum):
-    CHAT_COMPLETION = "/v1/chatComplete"
-    COMPLETION = "/v1/complete"
-    GENERATION = "/v1/prompts/{prompt_id}/generate"
+    GENERATION = "/prompts/{prompt_id}/generate"
+    CHAT_COMPLETE_API = "/chat/completions"
+    TEXT_COMPLETE_API = "/completions"
+    PROMPT_API = "/prompt/complete"
+    FEEDBACK_API = "/feedback"
+    EMBEDDING_API = "/embeddings"
 
 
 class Options(BaseModel):
@@ -236,7 +239,7 @@ class ConfigSlug(BaseModel):
     config: str
 
 
-class Params(ConversationInput, ModelParams, extra="forbid"):
+class Params(Constructs, ConversationInput, ModelParams, extra="forbid"):
     ...
 
 
@@ -255,7 +258,7 @@ def __str__(self):
 
 
 # Models for Chat Stream
-class Delta(BaseModel):
+class Delta(BaseModel, extra="allow"):
     role: Optional[str] = None
     content: Optional[str] = ""
 
@@ -269,7 +272,7 @@ def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
 
-class StreamChoice(BaseModel):
+class StreamChoice(BaseModel, extra="allow"):
     index: Optional[int] = None
     delta: Union[Delta, Dict[Any, Any]] = {}
     finish_reason: Optional[str] = None
@@ -284,7 +287,7 @@ def __getitem__(self, key):
         return getattr(self, key, None)
 
 
-class ChatCompletionChunk(BaseModel):
+class ChatCompletionChunk(BaseModel, extra="allow"):
     id: Optional[str] = None
     object: Optional[str] = None
     created: Optional[int] = None
@@ -302,7 +305,7 @@ def get(self, key: str, default: Optional[Any] = None):
 
 
 # Models for Chat Non-stream
-class ChatChoice(BaseModel):
+class ChatChoice(BaseModel, extra="allow"):
     index: Optional[int] = None
     message: Optional[Message] = None
     finish_reason: Optional[str] = None
@@ -317,7 +320,7 @@ def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
 
-class Usage(BaseModel):
+class Usage(BaseModel, extra="allow"):
     prompt_tokens: Optional[int] = None
     completion_tokens: Optional[int] = None
     total_tokens: Optional[int] = None
@@ -332,15 +335,17 @@ def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
 
-class ChatCompletion(BaseModel):
+class ChatCompletions(BaseModel, extra="allow"):
     id: Optional[str] = None
     object: Optional[str] = None
     created: Optional[int] = None
     model: Optional[str] = None
     choices: Union[List[ChatChoice], Dict[Any, Any]] = {}
     usage: Optional[Usage] = None
+    _headers: Optional[httpx.Headers] = None
 
     def __str__(self):
+        del self._headers
         return json.dumps(self.dict(), indent=4)
 
     def __getitem__(self, key):
@@ -349,9 +354,12 @@ def __getitem__(self, key):
     def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
+    def get_headers(self) -> Optional[Dict[str, str]]:
+        return parse_headers(self._headers)
+
 
 # Models for text completion Non-stream
-class TextChoice(BaseModel):
+class TextChoice(BaseModel, extra="allow"):
     index: Optional[int] = None
     text: Optional[str] = None
     logprobs: Any
@@ -367,15 +375,17 @@ def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
 
-class TextCompletion(BaseModel):
+class TextCompletion(BaseModel, extra="allow"):
     id: Optional[str] = None
     object: Optional[str] = None
     created: Optional[int] = None
     model: Optional[str] = None
     choices: Union[List[TextChoice], Dict[Any, Any]] = {}
     usage: Optional[Usage] = None
+    _headers: Optional[httpx.Headers] = None
 
     def __str__(self):
+        del self._headers
         return json.dumps(self.dict(), indent=4)
 
     def __getitem__(self, key):
@@ -384,9 +394,12 @@ def __getitem__(self, key):
     def get(self, key: str, default: Optional[Any] = None):
         return getattr(self, key, None) or default
 
+    def get_headers(self) -> Optional[Dict[str, str]]:
+        return parse_headers(self._headers)
+
 
 # Models for text completion stream
-class TextCompletionChunk(BaseModel):
+class TextCompletionChunk(BaseModel, extra="allow"):
     id: Optional[str] = None
     object: Optional[str] = None
     created: Optional[int] = None
@@ -405,7 +418,16 @@ def get(self, key: str, default: Optional[Any] = None):
 
 class GenericResponse(BaseModel, extra="allow"):
     success: Optional[bool]
-    data: Optional[Mapping[str, Any]]
+    data: Optional[Any]
+    warning: Optional[str]
+    _headers: Optional[httpx.Headers] = None
+
+    def __str__(self):
+        del self._headers
+        return json.dumps(self.dict(), indent=4)
+
+    def get_headers(self) -> Optional[Dict[str, str]]:
+        return parse_headers(self._headers)
 
 
 def apikey_from_env(provider: Union[ProviderTypes, ProviderTypesLiteral, str]) -> str:
@@ -480,8 +502,8 @@ def parse_llms(cls, llms):
 
 
 def default_api_key() -> str:
-    if portkey.api_key:
-        return portkey.api_key
+    if portkey_ai.api_key:
+        return portkey_ai.api_key
     env_api_key = os.environ.get(PORTKEY_API_KEY_ENV, "")
     if env_api_key:
         return env_api_key
@@ -489,8 +511,8 @@ def default_api_key() -> str:
 
 
 def default_base_url() -> str:
-    if portkey.base_url:
-        return portkey.base_url
+    if portkey_ai.base_url:
+        return portkey_ai.base_url
 
     env_base_url = os.environ.get(PORTKEY_PROXY_ENV, PORTKEY_BASE_URL)
     if env_base_url:
@@ -498,13 +520,31 @@ def default_base_url() -> str:
     raise ValueError(MISSING_BASE_URL)
 
 
-def retrieve_config() -> Union[Config, str]:
-    if portkey.config:
-        return portkey.config
-    raise ValueError(MISSING_CONFIG_MESSAGE)
+def retrieve_config() -> Union[Mapping, str]:
+    if portkey_ai.config:
+        return portkey_ai.config
+    # raise ValueError(MISSING_CONFIG_MESSAGE)
+    return {}
 
 
 def retrieve_mode() -> Union[Modes, ModesLiteral, str]:
-    if portkey.mode:
-        return portkey.mode
+    if portkey_ai.mode:
+        return portkey_ai.mode
     raise ValueError(MISSING_MODE_MESSAGE)
+
+
+def get_portkey_header(key: str) -> str:
+    return f"{PORTKEY_HEADER_PREFIX}{key}"
+
+
+def parse_headers(headers: Optional[httpx.Headers]) -> dict:
+    if headers is None:
+        return {}
+
+    _headers = {}
+    for k, v in headers.items():
+        if k.startswith(PORTKEY_HEADER_PREFIX):
+            k = k.replace(PORTKEY_HEADER_PREFIX, "")
+            _headers[k] = v
+
+    return _headers
diff --git a/portkey/api_resources/client.py b/portkey_ai/llms/__init__.py
similarity index 100%
rename from portkey/api_resources/client.py
rename to portkey_ai/llms/__init__.py
diff --git a/portkey_ai/llms/langchain/__init__.py b/portkey_ai/llms/langchain/__init__.py
new file mode 100644
index 00000000..45770de5
--- /dev/null
+++ b/portkey_ai/llms/langchain/__init__.py
@@ -0,0 +1,4 @@
+from .chat import ChatPortkey
+from .completion import PortkeyLLM
+
+__all__ = ["ChatPortkey", "PortkeyLLM"]
diff --git a/portkey_ai/llms/langchain/chat.py b/portkey_ai/llms/langchain/chat.py
new file mode 100644
index 00000000..314168de
--- /dev/null
+++ b/portkey_ai/llms/langchain/chat.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+from portkey_ai import Portkey
+import logging
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    TypedDict,
+    Union,
+    cast,
+)
+
+try:
+    from langchain.callbacks.manager import CallbackManagerForLLMRun
+    from langchain.chat_models.base import SimpleChatModel
+    from langchain.pydantic_v1 import Field, PrivateAttr
+    from langchain.schema.messages import (
+        AIMessage,
+        AIMessageChunk,
+        BaseMessage,
+        ChatMessage,
+        FunctionMessage,
+        HumanMessage,
+        SystemMessage,
+    )
+    from langchain.schema.output import (
+        ChatGenerationChunk,
+    )
+except ImportError as exc:
+    raise Exception(
+        "Langchain is not installed.Please install it with `pip install langchain`."
+    ) from exc
+
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    pass
+
+
+IMPORT_ERROR_MESSAGE = (
+    "Portkey is not installed.Please install it with `pip install portkey-ai`."
+)
+
+
+def convert_message_to_dict(message: BaseMessage) -> dict:
+    message_dict: Dict[str, Any]
+    if isinstance(message, ChatMessage):
+        message_dict = {"role": message.role, "content": message.content}
+    elif isinstance(message, HumanMessage):
+        message_dict = {"role": "user", "content": message.content}
+    elif isinstance(message, AIMessage):
+        message_dict = {"role": "assistant", "content": message.content}
+        if "function_call" in message.additional_kwargs:
+            message_dict["function_call"] = message.additional_kwargs["function_call"]
+            # If function call only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
+    elif isinstance(message, SystemMessage):
+        message_dict = {"role": "system", "content": message.content}
+    elif isinstance(message, FunctionMessage):
+        message_dict = {
+            "role": "function",
+            "content": message.content,
+            "name": message.name,
+        }
+    else:
+        raise TypeError(f"Got unknown type {message}")
+    if "name" in message.additional_kwargs:
+        message_dict["name"] = message.additional_kwargs["name"]
+    return message_dict
+
+
+class Message(TypedDict):
+    role: str
+    content: str
+
+
+class ChatPortkey(SimpleChatModel):
+    """`Portkey` Chat large language models.
+    To use, you should have the ``portkey-ai`` python package installed, and the
+    environment variable ``PORTKEY_API_KEY``, set with your API key, or pass
+    it as a named parameter to the `Portkey` constructor.
+    NOTE: You can install portkey using ``pip install portkey-ai``
+    Example:
+        .. code-block:: python
+            import portkey
+            from langchain.chat_models import ChatPortkey
+            # Simplest invocation for an openai provider. Can be extended to
+            # others as well
+            llm_option = portkey.LLMOptions(
+                provider="openai",
+                # Checkout the docs for the virtual-api-key
+                virtual_key="openai-virtual-key",
+                model="text-davinci-003"
+            )
+            # Initialise the client
+            client = ChatPortkey(
+                api_key="PORTKEY_API_KEY",
+                mode="single"
+            ).add_llms(llm_params=llm_option)
+            response = client("What are the biggest risks facing humanity?")
+    """
+
+    model: Optional[str] = Field(default="gpt-3.5-turbo")
+
+    _client: Any = PrivateAttr()
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    virtual_key: Optional[str] = None
+    config: Optional[Union[Mapping, str]] = None
+    provider: Optional[str] = None
+    trace_id: Optional[str] = None
+    custom_metadata: Optional[str] = None
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        custom_metadata: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+
+        self._client = Portkey(
+            api_key=api_key,
+            base_url=base_url,
+            virtual_key=virtual_key,
+            config=config,
+            provider=provider,
+            trace_id=trace_id,
+            metadata=custom_metadata,
+            **kwargs,
+        )
+        self.model = None
+
+    def _call(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Call Portkey's chatCompletions endpoint.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+        Returns:
+            The string generated by the provider set in the initialisation of the LLM.
+        Example:
+            .. code-block:: python
+                message = [{
+                    "role": "user",
+                    "content": "Tell me a joke."
+                }]
+                response = portkey(message)
+        """
+        _messages = cast(Message, self._create_message_dicts(messages))
+        response = self._client.chat.completions.create(
+            messages=_messages, stream=False, stop=stop, **kwargs
+        )
+        message = response.choices[0].message
+        return message.get("content", "") if message else ""
+
+    def _create_message_dicts(
+        self, messages: List[BaseMessage]
+    ) -> List[Dict[str, Any]]:
+        message_dicts = [convert_message_to_dict(m) for m in messages]
+        return message_dicts
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Call Portkey completion_stream and return the resulting generator.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+        Returns:
+            A generator representing the stream of tokens from Portkey.
+        Example:
+            .. code-block:: python
+                prompt = "Write a poem about a stream."
+                generator = portkey.stream(prompt)
+                for token in generator:
+                    yield token
+        """
+        _messages = cast(Message, self._create_message_dicts(messages))
+        response = self._client.chat.completions.create(
+            messages=_messages, stream=True, stop=stop, **kwargs
+        )
+        for token in response:
+            _content = token.choices[0].delta.get("content") or ""
+            chunk = ChatGenerationChunk(message=AIMessageChunk(content=_content))
+            yield chunk
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "portkey-ai-gateway"
diff --git a/portkey_ai/llms/langchain/completion.py b/portkey_ai/llms/langchain/completion.py
new file mode 100644
index 00000000..22b712cc
--- /dev/null
+++ b/portkey_ai/llms/langchain/completion.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+from portkey_ai import Portkey
+
+import logging
+from typing import Any, Iterator, List, Mapping, Optional, Union
+
+try:
+    from langchain.callbacks.manager import CallbackManagerForLLMRun
+    from langchain.llms.base import LLM
+    from langchain.pydantic_v1 import Field, PrivateAttr
+    from langchain.schema.output import GenerationChunk
+except ImportError as exc:
+    raise Exception(
+        "Langchain is not installed.Please install it with `pip install langchain`."
+    ) from exc
+
+
+logger = logging.getLogger(__name__)
+
+
+class PortkeyLLM(LLM):
+    """Portkey Service models
+    To use, you should have the ``portkey-ai`` python package installed, and the
+    environment variable ``PORTKEY_API_KEY``, set with your API key, or pass
+    it as a named parameter to the `Portkey` constructor.
+    NOTE: You can install portkey using ``pip install portkey-ai``
+    Example:
+        .. code-block:: python
+            import portkey
+            from langchain.llms import Portkey
+            # Simplest invocation for an openai provider. Can be extended to
+            # others as well
+            llm_option = portkey.LLMOptions(
+                provider="openai",
+                # Checkout the docs for the virtual-api-key
+                virtual_key="openai-virtual-key",
+                model="text-davinci-003"
+            )
+            # Initialise the client
+            client = Portkey(
+                api_key="PORTKEY_API_KEY",
+                mode="single"
+            ).add_llms(llm_params=llm_option)
+            response = client("What are the biggest risks facing humanity?")
+    """
+
+    model: Optional[str] = Field(default="gpt-3.5-turbo")
+    _client: Any = PrivateAttr()
+
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    virtual_key: Optional[str] = None
+    config: Optional[Union[Mapping, str]] = None
+    provider: Optional[str] = None
+    trace_id: Optional[str] = None
+    custom_metadata: Optional[str] = None
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        custom_metadata: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+
+        self._client = Portkey(
+            api_key=api_key,
+            base_url=base_url,
+            virtual_key=virtual_key,
+            config=config,
+            provider=provider,
+            trace_id=trace_id,
+            metadata=custom_metadata,
+            **kwargs,
+        )
+        self.model = None
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Call Portkey's completions endpoint.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+        Returns:
+            The string generated by the provider set in the initialisation of the LLM.
+        Example:
+            .. code-block:: python
+                response = portkey("Tell me a joke.")
+        """
+        response = self._client.completions.create(
+            prompt=prompt, stream=False, stop=stop, **kwargs
+        )
+        text = response.choices[0].text
+        return text or ""
+
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        """Call Portkey completion_stream and return the resulting generator.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+        Returns:
+            A generator representing the stream of tokens from Portkey.
+        Example:
+            .. code-block:: python
+                prompt = "Write a poem about a stream."
+                generator = portkey.stream(prompt)
+                for token in generator:
+                    yield token
+        """
+        response = self._client.completions.create(
+            stream=True, prompt=prompt, stop=stop, **kwargs
+        )
+        for token in response:
+            chunk = GenerationChunk(text=token.choices[0].text or "")
+            yield chunk
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "portkey-ai-gateway"
diff --git a/portkey_ai/llms/llama_index/__init__.py b/portkey_ai/llms/llama_index/__init__.py
new file mode 100644
index 00000000..63c23036
--- /dev/null
+++ b/portkey_ai/llms/llama_index/__init__.py
@@ -0,0 +1,3 @@
+from .completions import PortkeyLLM
+
+__all__ = ["PortkeyLLM"]
diff --git a/portkey_ai/llms/llama_index/completions.py b/portkey_ai/llms/llama_index/completions.py
new file mode 100644
index 00000000..1e7196f3
--- /dev/null
+++ b/portkey_ai/llms/llama_index/completions.py
@@ -0,0 +1,215 @@
+from portkey_ai import Message, Portkey
+from typing import Optional, Union, List, Any, Mapping, cast, Sequence
+from portkey_ai.api_resources.utils import PortkeyResponse
+
+from portkey_ai.llms.llama_index.utils import (
+    IMPORT_ERROR_MESSAGE,
+    is_chat_model,
+    modelname_to_contextsize,
+)
+
+try:
+    from llama_index.llms.custom import CustomLLM
+    from llama_index.bridge.pydantic import PrivateAttr
+    from llama_index.llms.base import (
+        ChatMessage,
+        ChatResponse,
+        ChatResponseGen,
+        CompletionResponse,
+        CompletionResponseGen,
+        LLMMetadata,
+        llm_chat_callback,
+        llm_completion_callback,
+    )
+except ImportError as exc:
+    raise ImportError(IMPORT_ERROR_MESSAGE) from exc
+
+
+class PortkeyLLM(CustomLLM):
+    """_summary_.
+
+    Args:
+        LLM (_type_): _description_
+    """
+
+    _client: Any = PrivateAttr()
+    model: str = ""
+
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    virtual_key: Optional[str] = None
+    config: Optional[Union[Mapping, str]] = None
+    provider: Optional[str] = None
+    trace_id: Optional[str] = None
+    custom_metadata: Optional[str] = None
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        virtual_key: Optional[str] = None,
+        config: Optional[Union[Mapping, str]] = None,
+        provider: Optional[str] = None,
+        trace_id: Optional[str] = None,
+        custom_metadata: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Initialize a Portkey instance.
+
+        Args:
+            api_key (Optional[str]): The API key to authenticate with Portkey.
+            base_url (Optional[str]): The Base url to the self hosted rubeus \
+                (the opensource version of portkey) or any other self hosted server.
+        """
+        super().__init__(
+            base_url=base_url,
+            api_key=api_key,
+        )
+        self._client = Portkey(
+            api_key=api_key,
+            base_url=base_url,
+            virtual_key=virtual_key,
+            config=config,
+            provider=provider,
+            trace_id=trace_id,
+            metadata=custom_metadata,
+            **kwargs,
+        )
+        self.model = ""
+
+    @property
+    def metadata(self) -> LLMMetadata:
+        """LLM metadata."""
+        try:
+            from llama_index.llms.base import (
+                LLMMetadata,
+            )
+        except ImportError as exc:
+            raise ImportError(IMPORT_ERROR_MESSAGE) from exc
+        return LLMMetadata(
+            _context_window=modelname_to_contextsize(self.model) if self.model else 0,
+            is_chat_model=is_chat_model(self.model),
+            model_name=self.model,
+        )
+
+    @llm_completion_callback()
+    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+        """Completion endpoint for LLM."""
+        complete_fn = self._complete
+        return complete_fn(prompt, **kwargs)
+
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        chat_fn = self._chat
+        return chat_fn(messages, **kwargs)
+
+    @llm_completion_callback()
+    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
+        """Completion endpoint for LLM."""
+        complete_fn = self._stream_complete
+        return complete_fn(prompt, **kwargs)
+
+    @llm_chat_callback()
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        stream_chat_fn = self._stream_chat
+        return stream_chat_fn(messages, **kwargs)
+
+    def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        _messages = cast(
+            List[Message],
+            [{"role": i.role.value, "content": i.content} for i in messages],
+        )
+        response = self._client.chat.completions.create(messages=_messages, **kwargs)
+        self.model = self._get_model(response)
+
+        message = response.choices[0].message
+        return ChatResponse(message=message, raw=response)
+
+    def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+        response = self._client.completions.create(prompt=prompt, **kwargs)
+        text = response.choices[0].text
+        return CompletionResponse(text=text, raw=response)
+
+    def _stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        _messages = cast(
+            List[Message],
+            [{"role": i.role.value, "content": i.content} for i in messages],
+        )
+        response = self._client.chat.completions.create(
+            messages=_messages, stream=True, **kwargs
+        )
+
+        def gen() -> ChatResponseGen:
+            content = ""
+            function_call: Optional[dict] = {}
+            for resp in response:
+                if resp.choices is None:
+                    continue
+                delta = resp.choices[0].delta
+                role = delta.get("role", "assistant")
+                content_delta = delta.get("content", "") or ""
+                content += content_delta
+
+                function_call_delta = delta.get("function_call", None)
+                if function_call_delta is not None:
+                    if function_call is None:
+                        function_call = function_call_delta
+                        # ensure we do not add a blank function call
+                        if (
+                            function_call
+                            and function_call.get("function_name", "") is None
+                        ):
+                            del function_call["function_name"]
+                    else:
+                        function_call["arguments"] += function_call_delta["arguments"]
+
+                additional_kwargs = {}
+                if function_call is not None:
+                    additional_kwargs["function_call"] = function_call
+
+                yield ChatResponse(
+                    message=ChatMessage(
+                        role=role,
+                        content=content,
+                        additional_kwargs=additional_kwargs,
+                    ),
+                    delta=content_delta,
+                    raw=resp,
+                )
+
+        return gen()
+
+    def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
+        response = self._client.completions.create(prompt=prompt, stream=True, **kwargs)
+
+        def gen() -> CompletionResponseGen:
+            text = ""
+            for resp in response:
+                delta = resp.choices[0].text or ""
+                text += delta
+                yield CompletionResponse(
+                    delta=delta,
+                    text=text,
+                    raw=resp,
+                )
+
+        return gen()
+
+    @property
+    def _is_chat_model(self) -> bool:
+        """Check if a given model is a chat-based language model.
+
+        Returns:
+            bool: True if the provided model is a chat-based language model,
+            False otherwise.
+        """
+        return is_chat_model(self.model or "")
+
+    def _get_model(self, response: PortkeyResponse) -> str:
+        return response.model
diff --git a/portkey_ai/llms/llama_index/utils.py b/portkey_ai/llms/llama_index/utils.py
new file mode 100644
index 00000000..b893563a
--- /dev/null
+++ b/portkey_ai/llms/llama_index/utils.py
@@ -0,0 +1,128 @@
+"""
+Utility Tools for the Portkey Class.
+
+This file module contains a collection of utility functions designed to enhance
+the functionality and usability of the Portkey class
+"""
+from typing import TYPE_CHECKING
+
+
+if TYPE_CHECKING:
+    pass
+
+IMPORT_ERROR_MESSAGE = (
+    "Llama-Index is not installed.Please install it with `pip install llama-index`."
+)
+
+
+def all_available_models():
+    try:
+        from llama_index.llms.anthropic_utils import CLAUDE_MODELS
+        from llama_index.llms.openai_utils import (
+            AZURE_TURBO_MODELS,
+            GPT3_5_MODELS,
+            GPT3_MODELS,
+            GPT4_MODELS,
+            TURBO_MODELS,
+        )
+
+        return {
+            **GPT4_MODELS,
+            **TURBO_MODELS,
+            **GPT3_5_MODELS,
+            **GPT3_MODELS,
+            **AZURE_TURBO_MODELS,
+            **CLAUDE_MODELS,
+        }
+    except ImportError as exc:
+        raise Exception(IMPORT_ERROR_MESSAGE) from exc
+
+
+def chat_models():
+    try:
+        from llama_index.llms.openai_utils import (
+            AZURE_TURBO_MODELS,
+            GPT4_MODELS,
+            TURBO_MODELS,
+        )
+
+        return {
+            **GPT4_MODELS,
+            **TURBO_MODELS,
+            **AZURE_TURBO_MODELS,
+        }
+    except ImportError as exc:
+        raise Exception(IMPORT_ERROR_MESSAGE) from exc
+
+
+DISCONTINUED_MODELS = {
+    "code-davinci-002": 8001,
+    "code-davinci-001": 8001,
+    "code-cushman-002": 2048,
+    "code-cushman-001": 2048,
+}
+
+DEFAULT_MODEL = "gpt-3.5-turbo"
+
+
+CLUADE_MODEL_FULLVERSION_MAP = {
+    "claude-instant-1": "claude-instant-1.2",
+    "claude-2": "claude-2.0",
+}
+
+ALL_AVAILABLE_MODELS = all_available_models()
+
+CHAT_MODELS = chat_models()
+
+
+def is_chat_model(model: str) -> bool:
+    """Check if a given model is a chat-based language model.
+
+    This function takes a model name or identifier as input and determines whether
+    the model is designed for chat-based language generation, conversation, or
+    interaction.
+
+    Args:
+        model (str): The name or identifier of the model to be checked.
+
+    Returns:
+        bool: True if the provided model is a chat-based language model,
+        False otherwise.
+    """
+    return model in CHAT_MODELS
+
+
+def modelname_to_contextsize(modelname: str) -> int:
+    """Calculate the maximum number of tokens possible to generate for a model.
+
+    Args:
+        modelname: The modelname we want to know the context size for.
+
+    Returns:
+        The maximum context size
+
+    Example:
+        .. code-block:: python
+
+            max_tokens = modelname_to_contextsize("text-davinci-003")
+    """
+    # handling finetuned models
+    if "ft-" in modelname:  # legacy fine-tuning
+        modelname = modelname.split(":")[0]
+    elif modelname.startswith("ft:"):
+        modelname = modelname.split(":")[1]
+
+    if modelname in DISCONTINUED_MODELS:
+        raise ValueError(
+            f"Model {modelname} has been discontinued. " "Please choose another model."
+        )
+
+    context_size = ALL_AVAILABLE_MODELS.get(modelname, None)
+
+    if context_size is None:
+        raise ValueError(
+            f"Unknown model: {modelname}. Please provide a valid model name."
+            "Known models are: " + ", ".join(ALL_AVAILABLE_MODELS.keys())
+        )
+
+    return context_size
diff --git a/portkey_ai/llms/mypy.ini b/portkey_ai/llms/mypy.ini
new file mode 100644
index 00000000..976ba029
--- /dev/null
+++ b/portkey_ai/llms/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+ignore_missing_imports = True
diff --git a/portkey/py.typed b/portkey_ai/py.typed
similarity index 100%
rename from portkey/py.typed
rename to portkey_ai/py.typed
diff --git a/portkey_ai/version.py b/portkey_ai/version.py
new file mode 100644
index 00000000..3277f64c
--- /dev/null
+++ b/portkey_ai/version.py
@@ -0,0 +1 @@
+VERSION = "1.0.0"
diff --git a/setup.cfg b/setup.cfg
index 202bf11b..444cd2a4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = portkey-ai
-version = attr: portkey.version.VERSION
+version = attr: portkey_ai.version.VERSION
 description = Python client library for the Portkey API
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -27,10 +27,10 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-  portkey = portkey._portkey_scripts:main
+  portkey_ai = portkey_ai._portkey_scripts:main
 
 [options.package_data]
-  portkey = py.typed
+  portkey_ai = py.typed
 
 [options.extras_require]
 dev =
@@ -42,6 +42,9 @@ dev =
   python-dotenv==1.0.0
   ruff==0.0.292
 
+[mypy]
+ignore_missing_imports = true
+
 [options.packages.find]
 exclude =
   tests/
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/anyscale_tests/test_anyscale_CodeLlama-34b-Instruct-hf.py b/tests/anyscale_tests/test_anyscale_CodeLlama-34b-Instruct-hf.py
deleted file mode 100644
index 648c9177..00000000
--- a/tests/anyscale_tests/test_anyscale_CodeLlama-34b-Instruct-hf.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-anyscale_api_key = os.environ.get("ANYSCALE_API_KEY")
-
-
-class TestAnyscaleCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestAnyscaleChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="codellama/CodeLlama-34b-Instruct-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
diff --git a/tests/anyscale_tests/test_anyscale_Llama-2-13b-chat-hf.py b/tests/anyscale_tests/test_anyscale_Llama-2-13b-chat-hf.py
deleted file mode 100644
index 503fc0db..00000000
--- a/tests/anyscale_tests/test_anyscale_Llama-2-13b-chat-hf.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-anyscale_api_key = os.environ.get("ANYSCALE_API_KEY")
-
-
-class TestAnyscaleCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestAnyscaleChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-13b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
diff --git a/tests/anyscale_tests/test_anyscale_Llama-2-70b-chat-hf.py b/tests/anyscale_tests/test_anyscale_Llama-2-70b-chat-hf.py
deleted file mode 100644
index 68d9acb0..00000000
--- a/tests/anyscale_tests/test_anyscale_Llama-2-70b-chat-hf.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-anyscale_api_key = os.environ.get("ANYSCALE_API_KEY")
-
-
-class TestAnyscaleCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestAnyscaleChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-70b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
diff --git a/tests/anyscale_tests/test_anyscale_Llama-2-7b-chat-hf.py b/tests/anyscale_tests/test_anyscale_Llama-2-7b-chat-hf.py
deleted file mode 100644
index cc1a7b0d..00000000
--- a/tests/anyscale_tests/test_anyscale_Llama-2-7b-chat-hf.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-anyscale_api_key = os.environ.get("ANYSCALE_API_KEY")
-
-
-class TestAnyscaleCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestAnyscaleChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                api_key=anyscale_api_key,
-                provider="anyscale",
-                metadata={"_user": "portkey-python-sdk"},
-                model="meta-llama/Llama-2-7b-chat-hf",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
diff --git a/tests/configs/chat_completions/loadbalance_and_fallback/anthropic_n_openai.json b/tests/configs/chat_completions/loadbalance_and_fallback/anthropic_n_openai.json
new file mode 100644
index 00000000..e3d8a401
--- /dev/null
+++ b/tests/configs/chat_completions/loadbalance_and_fallback/anthropic_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "anthropic-419f08"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/loadbalance_and_fallback/anyscale_n_openai.json b/tests/configs/chat_completions/loadbalance_and_fallback/anyscale_n_openai.json
new file mode 100644
index 00000000..8642515e
--- /dev/null
+++ b/tests/configs/chat_completions/loadbalance_and_fallback/anyscale_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "anyscale-c24b93"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/loadbalance_and_fallback/azure_n_openai.json b/tests/configs/chat_completions/loadbalance_and_fallback/azure_n_openai.json
new file mode 100644
index 00000000..241183c5
--- /dev/null
+++ b/tests/configs/chat_completions/loadbalance_and_fallback/azure_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "azure-api-key-993da0"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/loadbalance_and_fallback/cohere_n_openai.json b/tests/configs/chat_completions/loadbalance_and_fallback/cohere_n_openai.json
new file mode 100644
index 00000000..79212141
--- /dev/null
+++ b/tests/configs/chat_completions/loadbalance_and_fallback/cohere_n_openai.json
@@ -0,0 +1,27 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "cohere-api-key-fffe27"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json b/tests/configs/chat_completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
new file mode 100644
index 00000000..14b58beb
--- /dev/null
+++ b/tests/configs/chat_completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
@@ -0,0 +1,15 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "provider": "anthropic",
+            "virtual_key": "anthropic-419f08"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/single_provider/single_provider.json b/tests/configs/chat_completions/single_provider/single_provider.json
new file mode 100644
index 00000000..e15c4916
--- /dev/null
+++ b/tests/configs/chat_completions/single_provider/single_provider.json
@@ -0,0 +1,4 @@
+{
+    "provider": "openai",
+    "virtual_key": "open-ai-apikey-3368e0"
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json b/tests/configs/chat_completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
new file mode 100644
index 00000000..91700f9d
--- /dev/null
+++ b/tests/configs/chat_completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
@@ -0,0 +1,13 @@
+{
+    "virtual_key": "open-ai-apikey-3368e0",
+    "cache": {
+        "mode": "semantic",
+        "max_age": 60
+    },
+    "retry": {
+        "attempts": 5,
+        "on_status_codes": [
+            429
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/configs/chat_completions/single_with_basic_config/single_with_basic_config.json b/tests/configs/chat_completions/single_with_basic_config/single_with_basic_config.json
new file mode 100644
index 00000000..6703a2fd
--- /dev/null
+++ b/tests/configs/chat_completions/single_with_basic_config/single_with_basic_config.json
@@ -0,0 +1,3 @@
+{
+    "virtual_key": "open-ai-apikey-3368e0"
+}
\ No newline at end of file
diff --git a/tests/configs/completions/loadbalance_and_fallback/anthropic_n_openai.json b/tests/configs/completions/loadbalance_and_fallback/anthropic_n_openai.json
new file mode 100644
index 00000000..e3d8a401
--- /dev/null
+++ b/tests/configs/completions/loadbalance_and_fallback/anthropic_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "anthropic-419f08"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/completions/loadbalance_and_fallback/anyscale_n_openai.json b/tests/configs/completions/loadbalance_and_fallback/anyscale_n_openai.json
new file mode 100644
index 00000000..8642515e
--- /dev/null
+++ b/tests/configs/completions/loadbalance_and_fallback/anyscale_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "anyscale-c24b93"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/completions/loadbalance_and_fallback/azure_n_openai.json b/tests/configs/completions/loadbalance_and_fallback/azure_n_openai.json
new file mode 100644
index 00000000..241183c5
--- /dev/null
+++ b/tests/configs/completions/loadbalance_and_fallback/azure_n_openai.json
@@ -0,0 +1,28 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "azure-api-key-993da0"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/completions/loadbalance_and_fallback/cohere_n_openai.json b/tests/configs/completions/loadbalance_and_fallback/cohere_n_openai.json
new file mode 100644
index 00000000..79212141
--- /dev/null
+++ b/tests/configs/completions/loadbalance_and_fallback/cohere_n_openai.json
@@ -0,0 +1,27 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "strategy": {
+                "mode": "fallback",
+                "on_status_codes": [
+                    429,
+                    241
+                ]
+            },
+            "targets": [
+                {
+                    "virtual_key": "cohere-api-key-fffe27"
+                },
+                {
+                    "virtual_key": "open-ai-apikey-3368e0"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json b/tests/configs/completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
new file mode 100644
index 00000000..14b58beb
--- /dev/null
+++ b/tests/configs/completions/loadbalance_with_two_apikeys/loadbalance_with_two_apikeys.json
@@ -0,0 +1,15 @@
+{
+    "strategy": {
+        "mode": "loadbalance"
+    },
+    "targets": [
+        {
+            "provider": "openai",
+            "virtual_key": "open-ai-apikey-3368e0"
+        },
+        {
+            "provider": "anthropic",
+            "virtual_key": "anthropic-419f08"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/configs/completions/single_provider/single_provider.json b/tests/configs/completions/single_provider/single_provider.json
new file mode 100644
index 00000000..e15c4916
--- /dev/null
+++ b/tests/configs/completions/single_provider/single_provider.json
@@ -0,0 +1,4 @@
+{
+    "provider": "openai",
+    "virtual_key": "open-ai-apikey-3368e0"
+}
\ No newline at end of file
diff --git a/tests/configs/completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json b/tests/configs/completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
new file mode 100644
index 00000000..91700f9d
--- /dev/null
+++ b/tests/configs/completions/single_provider_with_vk_retry_cache/single_provider_with_vk_retry_cache.json
@@ -0,0 +1,13 @@
+{
+    "virtual_key": "open-ai-apikey-3368e0",
+    "cache": {
+        "mode": "semantic",
+        "max_age": 60
+    },
+    "retry": {
+        "attempts": 5,
+        "on_status_codes": [
+            429
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/configs/completions/single_with_basic_config/single_with_basic_config.json b/tests/configs/completions/single_with_basic_config/single_with_basic_config.json
new file mode 100644
index 00000000..6703a2fd
--- /dev/null
+++ b/tests/configs/completions/single_with_basic_config/single_with_basic_config.json
@@ -0,0 +1,3 @@
+{
+    "virtual_key": "open-ai-apikey-3368e0"
+}
\ No newline at end of file
diff --git a/tests/models.json b/tests/models.json
new file mode 100644
index 00000000..cc50e242
--- /dev/null
+++ b/tests/models.json
@@ -0,0 +1,97 @@
+{
+    "openai": {
+        "env_variable": "OPENAI_API_KEY",
+        "chat": [
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo-0613",
+            "gpt-4-1106-preview",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-16k",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo",
+            "gpt-4-0613"
+        ],
+        "text": [
+            "gpt-3.5-turbo-instruct",
+            "text-davinci-003",
+            "text-davinci-002",
+            "text-curie-001",
+            "text-babbage-001",
+            "text-ada-001",
+            "babbage-002",
+            "davinci-002",
+            "text-davinci-001"
+        ]
+    },
+    "anyscale": {
+        "env_variable": "ANYSCALE_API_KEY",
+        "chat": [
+            "meta-llama/Llama-2-7b-chat-hf",
+            "meta-llama/Llama-2-13b-chat-hf",
+            "meta-llama/Llama-2-70b-chat-hf",
+            "codellama/CodeLlama-34b-Instruct-hf",
+            "mistralai/Mistral-7B-Instruct-v0.1"
+        ],
+        "text": [
+            "meta-llama/Llama-2-7b-chat-hf",
+            "meta-llama/Llama-2-13b-chat-hf",
+            "meta-llama/Llama-2-70b-chat-hf",
+            "codellama/CodeLlama-34b-Instruct-hf",
+            "mistralai/Mistral-7B-Instruct-v0.1"
+        ]
+    },
+    "anthropic": {
+        "env_variable": "ANTHROPIC_API_KEY",
+        "chat": [
+            "claude-instant-1.2",
+            "claude-1",
+            "claude-1-100k",
+            "claude-instant-1",
+            "claude-instant-1-100k",
+            "claude-1.3",
+            "claude-1.3-100k",
+            "claude-1.2",
+            "claude-1.0",
+            "claude-instant-1.1",
+            "claude-instant-1.1-100k",
+            "claude-instant-1.0",
+            "claude-2"
+        ],
+        "text": [
+            "claude-instant-1.2",
+            "claude-1",
+            "claude-1-100k",
+            "claude-instant-1",
+            "claude-instant-1-100k",
+            "claude-1.3",
+            "claude-1.3-100k",
+            "claude-1.2",
+            "claude-1.0",
+            "claude-instant-1.1",
+            "claude-instant-1.1-100k",
+            "claude-instant-1.0",
+            "claude-2"
+        ]
+    },
+    "cohere": {
+        "env_variable": "COHERE_API_KEY",
+        "chat": [
+            "command-light",
+            "command",
+            "base-light",
+            "base",
+            "embed-english-v2.0",
+            "embed-english-light-v2.0",
+            "embed-multilingual-v2.0",
+            "embed-english-v3.0",
+            "embed-english-light-v3.0",
+            "embed-multilingual-v3.0",
+            "embed-multilingual-light-v3.0"
+        ],
+        "text": []
+    }
+}
\ No newline at end of file
diff --git a/tests/test_anthropic.py b/tests/test_anthropic.py
deleted file mode 100644
index 45f63f62..00000000
--- a/tests/test_anthropic.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-virtual_api_key = os.environ.get("ANTHROPIC_VIRTUAL_KEY")
-
-
-class TestAnthropicCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestAnthropicChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="anthropic",
-                metadata={"_user": "portkey-python-sdk"},
-                model="claude-2",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestOpenaiGenerations:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_stream(self, client: Any) -> None:
-        config = Config(mode="")
-        client.config = config
-        _ = client.Generations.create(
-            prompt_id="22a96a48-95ef-47cd-84a8-fd37c7930313",
-        )
diff --git a/tests/test_azure_openai.py b/tests/test_azure_openai.py
deleted file mode 100644
index 96188d44..00000000
--- a/tests/test_azure_openai.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-virtual_api_key = os.environ.get("AZURE_OPENAI_VIRTUAL_KEY")
-
-
-class TestAzureChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="azure-openai",
-                metadata={"_user": "portkey-python-sdk"},
-                api_version="2023-03-15-preview",
-                resource_name="portkey",
-                deployment_id="turbo-16k",
-                retry={"attempts": 5, "on_status_codes": [429]},
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="azure-openai",
-                metadata={"_user": "portkey-python-sdk"},
-                api_version="2023-03-15-preview",
-                resource_name="portkey",
-                deployment_id="turbo-16k",
-                retry={"attempts": 5, "on_status_codes": [429]},
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="azure-openai",
-                metadata={"_user": "portkey-python-sdk"},
-                api_version="2023-03-15-preview",
-                resource_name="portkey",
-                deployment_id="turbo-16k",
-                retry={"attempts": 5, "on_status_codes": [429]},
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="azure-openai",
-                metadata={"_user": "portkey-python-sdk"},
-                api_version="2023-03-15-preview",
-                resource_name="portkey",
-                deployment_id="turbo-16k",
-                retry={"attempts": 5, "on_status_codes": [429]},
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestOpenaiGenerations:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_stream(self, client: Any) -> None:
-        config = Config(mode="")
-        client.config = config
-        _ = client.Generations.create(
-            prompt_id="",
-        )
diff --git a/tests/test_chat_complete.py b/tests/test_chat_complete.py
new file mode 100644
index 00000000..8d08ee29
--- /dev/null
+++ b/tests/test_chat_complete.py
@@ -0,0 +1,454 @@
+from __future__ import annotations
+import inspect
+
+import os
+from os import walk
+from typing import Any, Dict, List
+import pytest
+from uuid import uuid4
+from portkey_ai import Portkey
+from time import sleep
+from dotenv import load_dotenv
+from .utils import read_json_file
+
+
+load_dotenv(override=True)
+base_url = os.environ.get("PORTKEY_BASE_URL")
+api_key = os.environ.get("PORTKEY_API_KEY")
+virtual_api_key = os.environ.get("OPENAI_VIRTUAL_KEY")
+CONFIGS_PATH = "./tests/configs/chat_completions"
+
+
+def get_configs(folder_path) -> List[Dict[str, Any]]:
+    config_files = []
+    for dirpath, _, file_names in walk(folder_path):
+        for f in file_names:
+            config_files.append(read_json_file(os.path.join(dirpath, f)))
+
+    return config_files
+
+
+class TestChatCompletions:
+    client = Portkey
+    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
+    models = read_json_file("./tests/models.json")
+
+    def get_metadata(self):
+        return {
+            "case": "testing",
+            "function": inspect.currentframe().f_back.f_code.co_name,
+            "random_id": str(uuid4()),
+        }
+
+    # --------------------------
+    # Test-1
+    t1_params = []
+    t = []
+    for k, v in models.items():
+        for i in v["chat"]:
+            t.append((client, k, os.environ.get(v["env_variable"]), i))
+
+        t1_params.extend(t)
+
+    @pytest.mark.parametrize("client, provider, auth, model", t1_params)
+    def test_method_single_with_vk_and_provider(
+        self, client: Any, provider: str, auth: str, model
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            provider=f"{provider}",
+            Authorization=f"Bearer {auth}",
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model=model,
+            max_tokens=245,
+        )
+
+    # --------------------------
+    # Test -2
+    t2_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_with_basic_config"):
+        t2_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t2_params)
+    def test_method_single_with_basic_config(self, client: Any, config: Dict) -> None:
+        """
+        Test the creation of a chat completion with a virtual key using the specified
+        Portkey client.
+
+        This test method performs the following steps:
+        1. Creates a Portkey client instance with the provided base URL, API key, trace
+        ID, and configuration loaded from the 'single_provider_with_virtualkey.json'
+        file.
+        2. Calls the Portkey client's chat.completions.create method to generate a
+        completion.
+        3. Prints the choices from the completion.
+
+        Args:
+            client (Portkey): The Portkey client instance used for the test.
+
+        Raises:
+            Any exceptions raised during the test.
+
+        Note:
+            - Ensure that the 'single_provider_with_virtualkey.json' file exists and
+            contains valid configuration data.
+            - Modify the 'model' parameter and the 'messages' content as needed for your
+            use case.
+        """
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+        )
+
+        # print(completion.choices)
+        # assert("True", "True")
+
+        # assert_matches_type(TextCompletion, completion, path=["response"])
+
+    # --------------------------
+    # Test-3
+    t3_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider_with_vk_retry_cache"):
+        t3_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t3_params)
+    def test_method_single_provider_with_vk_retry_cache(
+        self, client: Any, config: Dict
+    ) -> None:
+        # 1. Make a new cache the cache
+        # 2. Make a cache hit and see if the response contains the data.
+        random_id = str(uuid4())
+        metadata = self.get_metadata()
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+        )
+        # Sleeping for the cache to reflect across the workers. The cache has an
+        # eventual consistency and not immediate consistency.
+        sleep(20)
+        portkey_2 = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey_2.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+        )
+
+    # --------------------------
+    # Test-4
+    t4_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_with_two_apikeys"):
+        t4_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t4_params)
+    def test_method_loadbalance_with_two_apikeys(
+        self, client: Any, config: Dict
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            # virtual_key=virtual_api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}], max_tokens=245
+        )
+
+        print(completion.choices)
+
+    # --------------------------
+    # Test-5
+    t5_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_and_fallback"):
+        t5_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t5_params)
+    def test_method_loadbalance_and_fallback(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Say this is just a loadbalance and fallback test test",
+                }
+            ],
+        )
+
+        print(completion.choices)
+
+    # --------------------------
+    # Test-6
+    t6_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider"):
+        t6_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t6_params)
+    def test_method_single_provider(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+        )
+
+        print(completion.choices)
+
+
+class TestChatCompletionsStreaming:
+    client = Portkey
+    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
+    models = read_json_file("./tests/models.json")
+
+    def get_metadata(self):
+        return {
+            "case": "testing",
+            "function": inspect.currentframe().f_back.f_code.co_name,
+            "random_id": str(uuid4()),
+        }
+
+    # --------------------------
+    # Test-1
+    t1_params = []
+    t = []
+    for k, v in models.items():
+        for i in v["chat"]:
+            t.append((client, k, os.environ.get(v["env_variable"]), i))
+
+        t1_params.extend(t)
+
+    @pytest.mark.parametrize("client, provider, auth, model", t1_params)
+    def test_method_single_with_vk_and_provider(
+        self, client: Any, provider: str, auth: str, model
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            provider=f"{provider}",
+            Authorization=f"Bearer {auth}",
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model=model,
+            max_tokens=245,
+            stream=True,
+        )
+
+    # --------------------------
+    # Test -2
+    t2_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_with_basic_config"):
+        t2_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t2_params)
+    def test_method_single_with_basic_config(self, client: Any, config: Dict) -> None:
+        """
+        Test the creation of a chat completion with a virtual key using the specified
+        Portkey client.
+
+        This test method performs the following steps:
+        1. Creates a Portkey client instance with the provided base URL, API key, trace
+        ID, and configuration loaded from the 'single_provider_with_virtualkey.json'
+        file.
+        2. Calls the Portkey client's chat.completions.create method to generate a
+        completion.
+        3. Prints the choices from the completion.
+
+        Args:
+            client (Portkey): The Portkey client instance used for the test.
+
+        Raises:
+            Any exceptions raised during the test.
+
+        Note:
+            - Ensure that the 'single_provider_with_virtualkey.json' file exists and
+            contains valid configuration data.
+            - Modify the 'model' parameter and the 'messages' content as needed for your
+            use case.
+        """
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+            stream=True,
+        )
+
+        # print(completion.choices)
+        # assert("True", "True")
+
+        # assert_matches_type(TextCompletion, completion, path=["response"])
+
+    # --------------------------
+    # Test-3
+    t3_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider_with_vk_retry_cache"):
+        t3_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t3_params)
+    def test_method_single_provider_with_vk_retry_cache(
+        self, client: Any, config: Dict
+    ) -> None:
+        # 1. Make a new cache the cache
+        # 2. Make a cache hit and see if the response contains the data.
+        random_id = str(uuid4())
+        metadata = self.get_metadata()
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+            stream=True,
+        )
+        # Sleeping for the cache to reflect across the workers. The cache has an
+        # eventual consistency and not immediate consistency.
+        sleep(20)
+        portkey_2 = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey_2.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+            stream=True,
+        )
+
+    # --------------------------
+    # Test-4
+    t4_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_with_two_apikeys"):
+        t4_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t4_params)
+    def test_method_loadbalance_with_two_apikeys(
+        self, client: Any, config: Dict
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            # virtual_key=virtual_api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            max_tokens=245,
+            stream=True,
+        )
+
+        print(completion)
+
+    # --------------------------
+    # Test-5
+    t5_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_and_fallback"):
+        t5_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t5_params)
+    def test_method_loadbalance_and_fallback(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Say this is just a loadbalance and fallback test test",
+                }
+            ],
+            stream=True,
+        )
+
+        print(completion)
+
+    # --------------------------
+    # Test-6
+    t6_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider"):
+        t6_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t6_params)
+    def test_method_single_provider(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.chat.completions.create(
+            messages=[{"role": "user", "content": "Say this is a test"}],
+            model="gpt-3.5-turbo",
+            stream=True,
+        )
+
+        print(completion)
diff --git a/tests/test_cohere.py b/tests/test_cohere.py
deleted file mode 100644
index e19397e1..00000000
--- a/tests/test_cohere.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-virtual_api_key = os.environ.get("COHERE_VIRTUAL_KEY")
-
-
-class TestCohereCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="cohere",
-                metadata={"_user": "portkey-python-sdk"},
-                model="command-nightly",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            model="text-davinci-003",
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="cohere",
-                metadata={"_user": "portkey-python-sdk"},
-                model="command-nightly",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            model="text-davinci-003",
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="cohere",
-                metadata={"_user": "portkey-python-sdk"},
-                model="command-nightly",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            model="text-davinci-003",
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="cohere",
-                metadata={"_user": "portkey-python-sdk"},
-                model="command-nightly",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            model="text-davinci-003",
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestOpenaiGenerations:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_stream(self, client: Any) -> None:
-        config = Config(mode="")
-        client.config = config
-        _ = client.Generations.create(
-            prompt_id="22a96a48-95ef-47cd-84a8-fd37c7930313",
-        )
diff --git a/tests/test_complete.py b/tests/test_complete.py
new file mode 100644
index 00000000..527defba
--- /dev/null
+++ b/tests/test_complete.py
@@ -0,0 +1,416 @@
+from __future__ import annotations
+import inspect
+
+import os
+from os import walk
+from typing import Any, Dict, List
+import pytest
+from uuid import uuid4
+from portkey_ai import Portkey
+from time import sleep
+from dotenv import load_dotenv
+from .utils import read_json_file
+
+
+load_dotenv(override=True)
+base_url = os.environ.get("PORTKEY_BASE_URL")
+api_key = os.environ.get("PORTKEY_API_KEY")
+virtual_api_key = os.environ.get("OPENAI_VIRTUAL_KEY")
+CONFIGS_PATH = "./tests/configs/completions"
+
+
+def get_configs(folder_path) -> List[Dict[str, Any]]:
+    config_files = []
+    for dirpath, _, file_names in walk(folder_path):
+        for f in file_names:
+            config_files.append(read_json_file(os.path.join(dirpath, f)))
+
+    return config_files
+
+
+class TestChatCompletions:
+    client = Portkey
+    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
+    models = read_json_file("./tests/models.json")
+
+    def get_metadata(self):
+        return {
+            "case": "testing",
+            "function": inspect.currentframe().f_back.f_code.co_name,
+            "random_id": str(uuid4()),
+        }
+
+    # --------------------------
+    # Test-1
+    t1_params = []
+    t = []
+    for k, v in models.items():
+        for i in v["text"]:
+            t.append((client, k, os.environ.get(v["env_variable"]), i))
+
+        t1_params.extend(t)
+
+    @pytest.mark.parametrize("client, provider, auth, model", t1_params)
+    def test_method_single_with_vk_and_provider(
+        self, client: Any, provider: str, auth: str, model
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            provider=f"{provider}",
+            Authorization=f"Bearer {auth}",
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+        )
+
+        portkey.completions.create(
+            prompt="Say this is a test",
+            model=model,
+            max_tokens=245,
+        )
+
+    # --------------------------
+    # Test -2
+    t2_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_with_basic_config"):
+        t2_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t2_params)
+    def test_method_single_with_basic_config(self, client: Any, config: Dict) -> None:
+        """
+        Test the creation of a chat completion with a virtual key using the specified
+        Portkey client.
+
+        This test method performs the following steps:
+        1. Creates a Portkey client instance with the provided base URL, API key,
+            trace ID,and configuration loaded from the
+            'single_provider_with_virtualkey.json' file.
+        2. Calls the Portkey client's completions.create method to generate a completion
+        3. Prints the choices from the completion.
+
+        Args:
+            client (Portkey): The Portkey client instance used for the test.
+
+        Raises:
+            Any exceptions raised during the test.
+
+        Note:
+            - Ensure that the 'single_provider_with_virtualkey.json' file exists and
+                contains valid configuration data.
+            - Modify the 'model' parameter and the 'messages' content as needed for your
+              use case.
+        """
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        portkey.completions.create(
+            prompt="Say this is a test",
+        )
+
+        # print(completion.choices)
+        # assert("True", "True")
+
+        # assert_matches_type(TextCompletion, completion, path=["response"])
+
+    # --------------------------
+    # Test-3
+    t3_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider_with_vk_retry_cache"):
+        t3_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t3_params)
+    def test_method_single_provider_with_vk_retry_cache(
+        self, client: Any, config: Dict
+    ) -> None:
+        # 1. Make a new cache the cache
+        # 2. Make a cache hit and see if the response contains the data.
+        random_id = str(uuid4())
+        metadata = self.get_metadata()
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey.completions.create(
+            prompt="Say this is a test",
+        )
+        # Sleeping for the cache to reflect across the workers. The cache has an
+        # eventual consistency and not immediate consistency.
+        sleep(20)
+        portkey_2 = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey_2.completions.create(prompt="Say this is a test")
+
+    # --------------------------
+    # Test-4
+    t4_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_with_two_apikeys"):
+        t4_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t4_params)
+    def test_method_loadbalance_with_two_apikeys(
+        self, client: Any, config: Dict
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            # virtual_key=virtual_api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is a test", max_tokens=245
+        )
+
+        print(completion.choices)
+
+    # --------------------------
+    # Test-5
+    t5_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_and_fallback"):
+        t5_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t5_params)
+    def test_method_loadbalance_and_fallback(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is just a loadbalance and fallback test test"
+        )
+
+        print(completion.choices)
+
+    # --------------------------
+    # Test-6
+    t6_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider"):
+        t6_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t6_params)
+    def test_method_single_provider(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is a test",
+        )
+
+        print(completion.choices)
+
+
+class TestChatCompletionsStreaming:
+    client = Portkey
+    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
+    models = read_json_file("./tests/models.json")
+
+    def get_metadata(self):
+        return {
+            "case": "testing",
+            "function": inspect.currentframe().f_back.f_code.co_name,
+            "random_id": str(uuid4()),
+        }
+
+    # --------------------------
+    # Test-1
+    t1_params = []
+    t = []
+    for k, v in models.items():
+        for i in v["text"]:
+            t.append((client, k, os.environ.get(v["env_variable"]), i))
+
+        t1_params.extend(t)
+
+    @pytest.mark.parametrize("client, provider, auth, model", t1_params)
+    def test_method_single_with_vk_and_provider(
+        self, client: Any, provider: str, auth: str, model
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            provider=f"{provider}",
+            Authorization=f"Bearer {auth}",
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+        )
+
+        portkey.completions.create(
+            prompt="Say this is a test", model=model, max_tokens=245, stream=True
+        )
+
+    # --------------------------
+    # Test -2
+    t2_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_with_basic_config"):
+        t2_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t2_params)
+    def test_method_single_with_basic_config(self, client: Any, config: Dict) -> None:
+        """
+        Test the creation of a chat completion with a virtual key using the specified
+        Portkey client.
+
+        This test method performs the following steps:
+        1. Creates a Portkey client instance with the provided base URL, API key,
+            trace ID,and configuration loaded from the
+            'single_provider_with_virtualkey.json' file.
+        2. Calls the Portkey client's completions.create method to generate a completion
+        3. Prints the choices from the completion.
+
+        Args:
+            client (Portkey): The Portkey client instance used for the test.
+
+        Raises:
+            Any exceptions raised during the test.
+
+        Note:
+            - Ensure that the 'single_provider_with_virtualkey.json' file exists and
+                contains valid configuration data.
+            - Modify the 'model' parameter and the 'messages' content as needed for your
+              use case.
+        """
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        portkey.completions.create(prompt="Say this is a test", stream=True)
+
+        # print(completion.choices)
+        # assert("True", "True")
+
+        # assert_matches_type(TextCompletion, completion, path=["response"])
+
+    # --------------------------
+    # Test-3
+    t3_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider_with_vk_retry_cache"):
+        t3_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t3_params)
+    def test_method_single_provider_with_vk_retry_cache(
+        self, client: Any, config: Dict
+    ) -> None:
+        # 1. Make a new cache the cache
+        # 2. Make a cache hit and see if the response contains the data.
+        random_id = str(uuid4())
+        metadata = self.get_metadata()
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey.completions.create(prompt="Say this is a test", stream=True)
+        # Sleeping for the cache to reflect across the workers. The cache has an
+        # eventual consistency and not immediate consistency.
+        sleep(20)
+        portkey_2 = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=random_id,
+            virtual_key=virtual_api_key,
+            metadata=metadata,
+            config=config,
+        )
+
+        portkey_2.completions.create(prompt="Say this is a test", stream=True)
+
+    # --------------------------
+    # Test-4
+    t4_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_with_two_apikeys"):
+        t4_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t4_params)
+    def test_method_loadbalance_with_two_apikeys(
+        self, client: Any, config: Dict
+    ) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            # virtual_key=virtual_api_key,
+            trace_id=str(uuid4()),
+            metadata=self.get_metadata(),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is a test", max_tokens=245, stream=True
+        )
+
+        print(completion)
+
+    # --------------------------
+    # Test-5
+    t5_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/loadbalance_and_fallback"):
+        t5_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t5_params)
+    def test_method_loadbalance_and_fallback(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is just a loadbalance and fallback test test", stream=True
+        )
+
+        print(completion)
+
+    # --------------------------
+    # Test-6
+    t6_params = []
+    for i in get_configs(f"{CONFIGS_PATH}/single_provider"):
+        t6_params.append((client, i))
+
+    @pytest.mark.parametrize("client, config", t6_params)
+    def test_method_single_provider(self, client: Any, config: Dict) -> None:
+        portkey = client(
+            base_url=base_url,
+            api_key=api_key,
+            trace_id=str(uuid4()),
+            config=config,
+        )
+
+        completion = portkey.completions.create(
+            prompt="Say this is a test", stream=True
+        )
+
+        print(completion)
diff --git a/tests/test_openai.py b/tests/test_openai.py
deleted file mode 100644
index f6f95f88..00000000
--- a/tests/test_openai.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any
-import pytest
-import portkey
-from portkey import Config, LLMOptions
-from dotenv import load_dotenv
-
-# from tests.utils import assert_matches_type
-load_dotenv()
-base_url = os.environ.get("PORTKEY_BASE_URL")
-api_key = os.environ.get("PORTKEY_API_KEY")
-virtual_api_key = os.environ.get("OPENAI_VIRTUAL_KEY")
-
-
-class TestOpenaiCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="text-davinci-003",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="text-davinci-003",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="text-davinci-003",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="text-davinci-003",
-            ),
-        )
-        client.config = config
-        _ = client.Completions.create(
-            max_tokens=256,
-            prompt="why is the sky blue ?",
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestOpenaiChatCompletions:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="gpt-3.5-turbo",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-        )
-        # assert("True", "True")
-
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_non_stream(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="gpt-3.5-turbo",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stop_sequences=["string", "string", "string"],
-            stream=False,
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-        # assert_matches_type(TextCompletion, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="gpt-3.5-turbo",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-        )
-        # assert("True", "True")
-
-        # for chunk in completion_streaming:
-        #     assert_matches_type(TextCompletionChunk, chunk, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_streaming(self, client: Any) -> None:
-        config = Config(
-            mode="single",
-            llms=LLMOptions(
-                virtual_key=virtual_api_key,
-                provider="openai",
-                metadata={"_user": "portkey-python-sdk"},
-                model="gpt-3.5-turbo",
-            ),
-        )
-        client.config = config
-        _ = client.ChatCompletions.create(
-            max_tokens=256,
-            messages=[{"role": "user", "content": "why is the sky blue ?"}],
-            stream=True,
-            stop_sequences=["string", "string", "string"],
-            temperature=1,
-            top_k=5,
-            top_p=0.7,
-        )
-        # assert("True", "True")
-
-
-class TestOpenaiGenerations:
-    client = portkey
-    client.api_key = api_key
-    parametrize = pytest.mark.parametrize("client", [client], ids=["strict"])
-
-    @parametrize
-    def test_method_create_stream(self, client: Any) -> None:
-        config = Config(mode="single")
-        client.config = config
-        client.Generations.create(
-            prompt_id="",
-        )
diff --git a/tests/utils.py b/tests/utils.py
index e69de29b..1f9484b1 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -0,0 +1,6 @@
+import json
+from typing import Any, Dict
+
+
+def read_json_file(path: str) -> Dict[str, Any]:
+    return json.load(open(path, "r"))